code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 import os
  30 import os.path
  31 import time
  32 import re
  33 import platform
  34 import logging
  35 import copy
  36 import OpenSSL
  37
  38 from ganeti import ssh
  39 from ganeti import utils
  40 from ganeti import errors
  41 from ganeti import hypervisor
  42 from ganeti import locking
  43 from ganeti import constants
  44 from ganeti import objects
  45 from ganeti import serializer
  46 from ganeti import ssconf
  47
  48
  49 class LogicalUnit(object):
  50   """Logical Unit base class.
  51
  52   Subclasses must follow these rules:
  53     - implement ExpandNames
  54     - implement CheckPrereq (except when tasklets are used)
  55     - implement Exec (except when tasklets are used)
  56     - implement BuildHooksEnv
  57     - redefine HPATH and HTYPE
  58     - optionally redefine their run requirements:
  59         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  60
  61   Note that all commands require root permissions.
  62
  63   @ivar dry_run_result: the value (if any) that will be returned to the caller
  64       in dry-run mode (signalled by opcode dry_run parameter)
  65
  66   """
  67   HPATH = None
  68   HTYPE = None
  69   _OP_REQP = []
  70   REQ_BGL = True
  71
  72   def __init__(self, processor, op, context, rpc):
  73     """Constructor for LogicalUnit.
  74
  75     This needs to be overridden in derived classes in order to check op
  76     validity.
  77
  78     """
  79     self.proc = processor
  80     self.op = op
  81     self.cfg = context.cfg
  82     self.context = context
  83     self.rpc = rpc
  84     # Dicts used to declare locking needs to mcpu
  85     self.needed_locks = None
  86     self.acquired_locks = {}
  87     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  88     self.add_locks = {}
  89     self.remove_locks = {}
  90     # Used to force good behavior when calling helper functions
  91     self.recalculate_locks = {}
  92     self.__ssh = None
  93     # logging
  94     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
  95     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
  96     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
  97     # support for dry-run
  98     self.dry_run_result = None
  99     # support for generic debug attribute
 100     if (not hasattr(self.op, "debug_level") or
 101         not isinstance(self.op.debug_level, int)):
 102       self.op.debug_level = 0
 103
 104     # Tasklets
 105     self.tasklets = None
 106
 107     for attr_name in self._OP_REQP:
 108       attr_val = getattr(op, attr_name, None)
 109       if attr_val is None:
 110         raise errors.OpPrereqError("Required parameter '%s' missing" %
 111                                    attr_name, errors.ECODE_INVAL)
 112
 113     self.CheckArguments()
 114
 115   def __GetSSH(self):
 116     """Returns the SshRunner object
 117
 118     """
 119     if not self.__ssh:
 120       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 121     return self.__ssh
 122
 123   ssh = property(fget=__GetSSH)
 124
 125   def CheckArguments(self):
 126     """Check syntactic validity for the opcode arguments.
 127
 128     This method is for doing a simple syntactic check and ensure
 129     validity of opcode parameters, without any cluster-related
 130     checks. While the same can be accomplished in ExpandNames and/or
 131     CheckPrereq, doing these separate is better because:
 132
 133       - ExpandNames is left as as purely a lock-related function
 134       - CheckPrereq is run after we have acquired locks (and possible
 135         waited for them)
 136
 137     The function is allowed to change the self.op attribute so that
 138     later methods can no longer worry about missing parameters.
 139
 140     """
 141     pass
 142
 143   def ExpandNames(self):
 144     """Expand names for this LU.
 145
 146     This method is called before starting to execute the opcode, and it should
 147     update all the parameters of the opcode to their canonical form (e.g. a
 148     short node name must be fully expanded after this method has successfully
 149     completed). This way locking, hooks, logging, ecc. can work correctly.
 150
 151     LUs which implement this method must also populate the self.needed_locks
 152     member, as a dict with lock levels as keys, and a list of needed lock names
 153     as values. Rules:
 154
 155       - use an empty dict if you don't need any lock
 156       - if you don't need any lock at a particular level omit that level
 157       - don't put anything for the BGL level
 158       - if you want all locks at a level use locking.ALL_SET as a value
 159
 160     If you need to share locks (rather than acquire them exclusively) at one
 161     level you can modify self.share_locks, setting a true value (usually 1) for
 162     that level. By default locks are not shared.
 163
 164     This function can also define a list of tasklets, which then will be
 165     executed in order instead of the usual LU-level CheckPrereq and Exec
 166     functions, if those are not defined by the LU.
 167
 168     Examples::
 169
 170       # Acquire all nodes and one instance
 171       self.needed_locks = {
 172         locking.LEVEL_NODE: locking.ALL_SET,
 173         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 174       }
 175       # Acquire just two nodes
 176       self.needed_locks = {
 177         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 178       }
 179       # Acquire no locks
 180       self.needed_locks = {} # No, you can't leave it to the default value None
 181
 182     """
 183     # The implementation of this method is mandatory only if the new LU is
 184     # concurrent, so that old LUs don't need to be changed all at the same
 185     # time.
 186     if self.REQ_BGL:
 187       self.needed_locks = {} # Exclusive LUs don't need locks.
 188     else:
 189       raise NotImplementedError
 190
 191   def DeclareLocks(self, level):
 192     """Declare LU locking needs for a level
 193
 194     While most LUs can just declare their locking needs at ExpandNames time,
 195     sometimes there's the need to calculate some locks after having acquired
 196     the ones before. This function is called just before acquiring locks at a
 197     particular level, but after acquiring the ones at lower levels, and permits
 198     such calculations. It can be used to modify self.needed_locks, and by
 199     default it does nothing.
 200
 201     This function is only called if you have something already set in
 202     self.needed_locks for the level.
 203
 204     @param level: Locking level which is going to be locked
 205     @type level: member of ganeti.locking.LEVELS
 206
 207     """
 208
 209   def CheckPrereq(self):
 210     """Check prerequisites for this LU.
 211
 212     This method should check that the prerequisites for the execution
 213     of this LU are fulfilled. It can do internode communication, but
 214     it should be idempotent - no cluster or system changes are
 215     allowed.
 216
 217     The method should raise errors.OpPrereqError in case something is
 218     not fulfilled. Its return value is ignored.
 219
 220     This method should also update all the parameters of the opcode to
 221     their canonical form if it hasn't been done by ExpandNames before.
 222
 223     """
 224     if self.tasklets is not None:
 225       for (idx, tl) in enumerate(self.tasklets):
 226         logging.debug("Checking prerequisites for tasklet %s/%s",
 227                       idx + 1, len(self.tasklets))
 228         tl.CheckPrereq()
 229     else:
 230       raise NotImplementedError
 231
 232   def Exec(self, feedback_fn):
 233     """Execute the LU.
 234
 235     This method should implement the actual work. It should raise
 236     errors.OpExecError for failures that are somewhat dealt with in
 237     code, or expected.
 238
 239     """
 240     if self.tasklets is not None:
 241       for (idx, tl) in enumerate(self.tasklets):
 242         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 243         tl.Exec(feedback_fn)
 244     else:
 245       raise NotImplementedError
 246
 247   def BuildHooksEnv(self):
 248     """Build hooks environment for this LU.
 249
 250     This method should return a three-node tuple consisting of: a dict
 251     containing the environment that will be used for running the
 252     specific hook for this LU, a list of node names on which the hook
 253     should run before the execution, and a list of node names on which
 254     the hook should run after the execution.
 255
 256     The keys of the dict must not have 'GANETI_' prefixed as this will
 257     be handled in the hooks runner. Also note additional keys will be
 258     added by the hooks runner. If the LU doesn't define any
 259     environment, an empty dict (and not None) should be returned.
 260
 261     No nodes should be returned as an empty list (and not None).
 262
 263     Note that if the HPATH for a LU class is None, this function will
 264     not be called.
 265
 266     """
 267     raise NotImplementedError
 268
 269   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 270     """Notify the LU about the results of its hooks.
 271
 272     This method is called every time a hooks phase is executed, and notifies
 273     the Logical Unit about the hooks' result. The LU can then use it to alter
 274     its result based on the hooks.  By default the method does nothing and the
 275     previous result is passed back unchanged but any LU can define it if it
 276     wants to use the local cluster hook-scripts somehow.
 277
 278     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 279         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 280     @param hook_results: the results of the multi-node hooks rpc call
 281     @param feedback_fn: function used send feedback back to the caller
 282     @param lu_result: the previous Exec result this LU had, or None
 283         in the PRE phase
 284     @return: the new Exec result, based on the previous result
 285         and hook results
 286
 287     """
 288     # API must be kept, thus we ignore the unused argument and could
 289     # be a function warnings
 290     # pylint: disable-msg=W0613,R0201
 291     return lu_result
 292
 293   def _ExpandAndLockInstance(self):
 294     """Helper function to expand and lock an instance.
 295
 296     Many LUs that work on an instance take its name in self.op.instance_name
 297     and need to expand it and then declare the expanded name for locking. This
 298     function does it, and then updates self.op.instance_name to the expanded
 299     name. It also initializes needed_locks as a dict, if this hasn't been done
 300     before.
 301
 302     """
 303     if self.needed_locks is None:
 304       self.needed_locks = {}
 305     else:
 306       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 307         "_ExpandAndLockInstance called with instance-level locks set"
 308     self.op.instance_name = _ExpandInstanceName(self.cfg,
 309                                                 self.op.instance_name)
 310     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 311
 312   def _LockInstancesNodes(self, primary_only=False):
 313     """Helper function to declare instances' nodes for locking.
 314
 315     This function should be called after locking one or more instances to lock
 316     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 317     with all primary or secondary nodes for instances already locked and
 318     present in self.needed_locks[locking.LEVEL_INSTANCE].
 319
 320     It should be called from DeclareLocks, and for safety only works if
 321     self.recalculate_locks[locking.LEVEL_NODE] is set.
 322
 323     In the future it may grow parameters to just lock some instance's nodes, or
 324     to just lock primaries or secondary nodes, if needed.
 325
 326     If should be called in DeclareLocks in a way similar to::
 327
 328       if level == locking.LEVEL_NODE:
 329         self._LockInstancesNodes()
 330
 331     @type primary_only: boolean
 332     @param primary_only: only lock primary nodes of locked instances
 333
 334     """
 335     assert locking.LEVEL_NODE in self.recalculate_locks, \
 336       "_LockInstancesNodes helper function called with no nodes to recalculate"
 337
 338     # TODO: check if we're really been called with the instance locks held
 339
 340     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 341     # future we might want to have different behaviors depending on the value
 342     # of self.recalculate_locks[locking.LEVEL_NODE]
 343     wanted_nodes = []
 344     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 345       instance = self.context.cfg.GetInstanceInfo(instance_name)
 346       wanted_nodes.append(instance.primary_node)
 347       if not primary_only:
 348         wanted_nodes.extend(instance.secondary_nodes)
 349
 350     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 351       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 352     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 353       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 354
 355     del self.recalculate_locks[locking.LEVEL_NODE]
 356
 357
 358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 359   """Simple LU which runs no hooks.
 360
 361   This LU is intended as a parent for other LogicalUnits which will
 362   run no hooks, in order to reduce duplicate code.
 363
 364   """
 365   HPATH = None
 366   HTYPE = None
 367
 368   def BuildHooksEnv(self):
 369     """Empty BuildHooksEnv for NoHooksLu.
 370
 371     This just raises an error.
 372
 373     """
 374     assert False, "BuildHooksEnv called for NoHooksLUs"
 375
 376
 377 class Tasklet:
 378   """Tasklet base class.
 379
 380   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 381   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 382   tasklets know nothing about locks.
 383
 384   Subclasses must follow these rules:
 385     - Implement CheckPrereq
 386     - Implement Exec
 387
 388   """
 389   def __init__(self, lu):
 390     self.lu = lu
 391
 392     # Shortcuts
 393     self.cfg = lu.cfg
 394     self.rpc = lu.rpc
 395
 396   def CheckPrereq(self):
 397     """Check prerequisites for this tasklets.
 398
 399     This method should check whether the prerequisites for the execution of
 400     this tasklet are fulfilled. It can do internode communication, but it
 401     should be idempotent - no cluster or system changes are allowed.
 402
 403     The method should raise errors.OpPrereqError in case something is not
 404     fulfilled. Its return value is ignored.
 405
 406     This method should also update all parameters to their canonical form if it
 407     hasn't been done before.
 408
 409     """
 410     raise NotImplementedError
 411
 412   def Exec(self, feedback_fn):
 413     """Execute the tasklet.
 414
 415     This method should implement the actual work. It should raise
 416     errors.OpExecError for failures that are somewhat dealt with in code, or
 417     expected.
 418
 419     """
 420     raise NotImplementedError
 421
 422
 423 def _GetWantedNodes(lu, nodes):
 424   """Returns list of checked and expanded node names.
 425
 426   @type lu: L{LogicalUnit}
 427   @param lu: the logical unit on whose behalf we execute
 428   @type nodes: list
 429   @param nodes: list of node names or None for all nodes
 430   @rtype: list
 431   @return: the list of nodes, sorted
 432   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 433
 434   """
 435   if not isinstance(nodes, list):
 436     raise errors.OpPrereqError("Invalid argument type 'nodes'",
 437                                errors.ECODE_INVAL)
 438
 439   if not nodes:
 440     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 441       " non-empty list of nodes whose name is to be expanded.")
 442
 443   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 444   return utils.NiceSort(wanted)
 445
 446
 447 def _GetWantedInstances(lu, instances):
 448   """Returns list of checked and expanded instance names.
 449
 450   @type lu: L{LogicalUnit}
 451   @param lu: the logical unit on whose behalf we execute
 452   @type instances: list
 453   @param instances: list of instance names or None for all instances
 454   @rtype: list
 455   @return: the list of instances, sorted
 456   @raise errors.OpPrereqError: if the instances parameter is wrong type
 457   @raise errors.OpPrereqError: if any of the passed instances is not found
 458
 459   """
 460   if not isinstance(instances, list):
 461     raise errors.OpPrereqError("Invalid argument type 'instances'",
 462                                errors.ECODE_INVAL)
 463
 464   if instances:
 465     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 466   else:
 467     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 468   return wanted
 469
 470
 471 def _CheckOutputFields(static, dynamic, selected):
 472   """Checks whether all selected fields are valid.
 473
 474   @type static: L{utils.FieldSet}
 475   @param static: static fields set
 476   @type dynamic: L{utils.FieldSet}
 477   @param dynamic: dynamic fields set
 478
 479   """
 480   f = utils.FieldSet()
 481   f.Extend(static)
 482   f.Extend(dynamic)
 483
 484   delta = f.NonMatching(selected)
 485   if delta:
 486     raise errors.OpPrereqError("Unknown output fields selected: %s"
 487                                % ",".join(delta), errors.ECODE_INVAL)
 488
 489
 490 def _CheckBooleanOpField(op, name):
 491   """Validates boolean opcode parameters.
 492
 493   This will ensure that an opcode parameter is either a boolean value,
 494   or None (but that it always exists).
 495
 496   """
 497   val = getattr(op, name, None)
 498   if not (val is None or isinstance(val, bool)):
 499     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 500                                (name, str(val)), errors.ECODE_INVAL)
 501   setattr(op, name, val)
 502
 503
 504 def _CheckGlobalHvParams(params):
 505   """Validates that given hypervisor params are not global ones.
 506
 507   This will ensure that instances don't get customised versions of
 508   global params.
 509
 510   """
 511   used_globals = constants.HVC_GLOBALS.intersection(params)
 512   if used_globals:
 513     msg = ("The following hypervisor parameters are global and cannot"
 514            " be customized at instance level, please modify them at"
 515            " cluster level: %s" % utils.CommaJoin(used_globals))
 516     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 517
 518
 519 def _CheckNodeOnline(lu, node):
 520   """Ensure that a given node is online.
 521
 522   @param lu: the LU on behalf of which we make the check
 523   @param node: the node to check
 524   @raise errors.OpPrereqError: if the node is offline
 525
 526   """
 527   if lu.cfg.GetNodeInfo(node).offline:
 528     raise errors.OpPrereqError("Can't use offline node %s" % node,
 529                                errors.ECODE_INVAL)
 530
 531
 532 def _CheckNodeNotDrained(lu, node):
 533   """Ensure that a given node is not drained.
 534
 535   @param lu: the LU on behalf of which we make the check
 536   @param node: the node to check
 537   @raise errors.OpPrereqError: if the node is drained
 538
 539   """
 540   if lu.cfg.GetNodeInfo(node).drained:
 541     raise errors.OpPrereqError("Can't use drained node %s" % node,
 542                                errors.ECODE_INVAL)
 543
 544
 545 def _CheckDiskTemplate(template):
 546   """Ensure a given disk template is valid.
 547
 548   """
 549   if template not in constants.DISK_TEMPLATES:
 550     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 551            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 552     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 553
 554
 555 def _CheckInstanceDown(lu, instance, reason):
 556   """Ensure that an instance is not running."""
 557   if instance.admin_up:
 558     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 559                                (instance.name, reason), errors.ECODE_STATE)
 560
 561   pnode = instance.primary_node
 562   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 563   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 564               prereq=True, ecode=errors.ECODE_ENVIRON)
 565
 566   if instance.name in ins_l.payload:
 567     raise errors.OpPrereqError("Instance %s is running, %s" %
 568                                (instance.name, reason), errors.ECODE_STATE)
 569
 570
 571 def _ExpandItemName(fn, name, kind):
 572   """Expand an item name.
 573
 574   @param fn: the function to use for expansion
 575   @param name: requested item name
 576   @param kind: text description ('Node' or 'Instance')
 577   @return: the resolved (full) name
 578   @raise errors.OpPrereqError: if the item is not found
 579
 580   """
 581   full_name = fn(name)
 582   if full_name is None:
 583     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 584                                errors.ECODE_NOENT)
 585   return full_name
 586
 587
 588 def _ExpandNodeName(cfg, name):
 589   """Wrapper over L{_ExpandItemName} for nodes."""
 590   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 591
 592
 593 def _ExpandInstanceName(cfg, name):
 594   """Wrapper over L{_ExpandItemName} for instance."""
 595   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 596
 597
 598 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 599                           memory, vcpus, nics, disk_template, disks,
 600                           bep, hvp, hypervisor_name):
 601   """Builds instance related env variables for hooks
 602
 603   This builds the hook environment from individual variables.
 604
 605   @type name: string
 606   @param name: the name of the instance
 607   @type primary_node: string
 608   @param primary_node: the name of the instance's primary node
 609   @type secondary_nodes: list
 610   @param secondary_nodes: list of secondary nodes as strings
 611   @type os_type: string
 612   @param os_type: the name of the instance's OS
 613   @type status: boolean
 614   @param status: the should_run status of the instance
 615   @type memory: string
 616   @param memory: the memory size of the instance
 617   @type vcpus: string
 618   @param vcpus: the count of VCPUs the instance has
 619   @type nics: list
 620   @param nics: list of tuples (ip, mac, mode, link) representing
 621       the NICs the instance has
 622   @type disk_template: string
 623   @param disk_template: the disk template of the instance
 624   @type disks: list
 625   @param disks: the list of (size, mode) pairs
 626   @type bep: dict
 627   @param bep: the backend parameters for the instance
 628   @type hvp: dict
 629   @param hvp: the hypervisor parameters for the instance
 630   @type hypervisor_name: string
 631   @param hypervisor_name: the hypervisor for the instance
 632   @rtype: dict
 633   @return: the hook environment for this instance
 634
 635   """
 636   if status:
 637     str_status = "up"
 638   else:
 639     str_status = "down"
 640   env = {
 641     "OP_TARGET": name,
 642     "INSTANCE_NAME": name,
 643     "INSTANCE_PRIMARY": primary_node,
 644     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 645     "INSTANCE_OS_TYPE": os_type,
 646     "INSTANCE_STATUS": str_status,
 647     "INSTANCE_MEMORY": memory,
 648     "INSTANCE_VCPUS": vcpus,
 649     "INSTANCE_DISK_TEMPLATE": disk_template,
 650     "INSTANCE_HYPERVISOR": hypervisor_name,
 651   }
 652
 653   if nics:
 654     nic_count = len(nics)
 655     for idx, (ip, mac, mode, link) in enumerate(nics):
 656       if ip is None:
 657         ip = ""
 658       env["INSTANCE_NIC%d_IP" % idx] = ip
 659       env["INSTANCE_NIC%d_MAC" % idx] = mac
 660       env["INSTANCE_NIC%d_MODE" % idx] = mode
 661       env["INSTANCE_NIC%d_LINK" % idx] = link
 662       if mode == constants.NIC_MODE_BRIDGED:
 663         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 664   else:
 665     nic_count = 0
 666
 667   env["INSTANCE_NIC_COUNT"] = nic_count
 668
 669   if disks:
 670     disk_count = len(disks)
 671     for idx, (size, mode) in enumerate(disks):
 672       env["INSTANCE_DISK%d_SIZE" % idx] = size
 673       env["INSTANCE_DISK%d_MODE" % idx] = mode
 674   else:
 675     disk_count = 0
 676
 677   env["INSTANCE_DISK_COUNT"] = disk_count
 678
 679   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 680     for key, value in source.items():
 681       env["INSTANCE_%s_%s" % (kind, key)] = value
 682
 683   return env
 684
 685
 686 def _NICListToTuple(lu, nics):
 687   """Build a list of nic information tuples.
 688
 689   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 690   value in LUQueryInstanceData.
 691
 692   @type lu:  L{LogicalUnit}
 693   @param lu: the logical unit on whose behalf we execute
 694   @type nics: list of L{objects.NIC}
 695   @param nics: list of nics to convert to hooks tuples
 696
 697   """
 698   hooks_nics = []
 699   c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
 700   for nic in nics:
 701     ip = nic.ip
 702     mac = nic.mac
 703     filled_params = objects.FillDict(c_nicparams, nic.nicparams)
 704     mode = filled_params[constants.NIC_MODE]
 705     link = filled_params[constants.NIC_LINK]
 706     hooks_nics.append((ip, mac, mode, link))
 707   return hooks_nics
 708
 709
 710 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 711   """Builds instance related env variables for hooks from an object.
 712
 713   @type lu: L{LogicalUnit}
 714   @param lu: the logical unit on whose behalf we execute
 715   @type instance: L{objects.Instance}
 716   @param instance: the instance for which we should build the
 717       environment
 718   @type override: dict
 719   @param override: dictionary with key/values that will override
 720       our values
 721   @rtype: dict
 722   @return: the hook environment dictionary
 723
 724   """
 725   cluster = lu.cfg.GetClusterInfo()
 726   bep = cluster.FillBE(instance)
 727   hvp = cluster.FillHV(instance)
 728   args = {
 729     'name': instance.name,
 730     'primary_node': instance.primary_node,
 731     'secondary_nodes': instance.secondary_nodes,
 732     'os_type': instance.os,
 733     'status': instance.admin_up,
 734     'memory': bep[constants.BE_MEMORY],
 735     'vcpus': bep[constants.BE_VCPUS],
 736     'nics': _NICListToTuple(lu, instance.nics),
 737     'disk_template': instance.disk_template,
 738     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 739     'bep': bep,
 740     'hvp': hvp,
 741     'hypervisor_name': instance.hypervisor,
 742   }
 743   if override:
 744     args.update(override)
 745   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 746
 747
 748 def _AdjustCandidatePool(lu, exceptions):
 749   """Adjust the candidate pool after node operations.
 750
 751   """
 752   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 753   if mod_list:
 754     lu.LogInfo("Promoted nodes to master candidate role: %s",
 755                utils.CommaJoin(node.name for node in mod_list))
 756     for name in mod_list:
 757       lu.context.ReaddNode(name)
 758   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 759   if mc_now > mc_max:
 760     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 761                (mc_now, mc_max))
 762
 763
 764 def _DecideSelfPromotion(lu, exceptions=None):
 765   """Decide whether I should promote myself as a master candidate.
 766
 767   """
 768   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 769   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 770   # the new node will increase mc_max with one, so:
 771   mc_should = min(mc_should + 1, cp_size)
 772   return mc_now < mc_should
 773
 774
 775 def _CheckNicsBridgesExist(lu, target_nics, target_node,
 776                                profile=constants.PP_DEFAULT):
 777   """Check that the brigdes needed by a list of nics exist.
 778
 779   """
 780   c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
 781   paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
 782                 for nic in target_nics]
 783   brlist = [params[constants.NIC_LINK] for params in paramslist
 784             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 785   if brlist:
 786     result = lu.rpc.call_bridges_exist(target_node, brlist)
 787     result.Raise("Error checking bridges on destination node '%s'" %
 788                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 789
 790
 791 def _CheckInstanceBridgesExist(lu, instance, node=None):
 792   """Check that the brigdes needed by an instance exist.
 793
 794   """
 795   if node is None:
 796     node = instance.primary_node
 797   _CheckNicsBridgesExist(lu, instance.nics, node)
 798
 799
 800 def _CheckOSVariant(os_obj, name):
 801   """Check whether an OS name conforms to the os variants specification.
 802
 803   @type os_obj: L{objects.OS}
 804   @param os_obj: OS object to check
 805   @type name: string
 806   @param name: OS name passed by the user, to check for validity
 807
 808   """
 809   if not os_obj.supported_variants:
 810     return
 811   try:
 812     variant = name.split("+", 1)[1]
 813   except IndexError:
 814     raise errors.OpPrereqError("OS name must include a variant",
 815                                errors.ECODE_INVAL)
 816
 817   if variant not in os_obj.supported_variants:
 818     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 819
 820
 821 def _GetNodeInstancesInner(cfg, fn):
 822   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 823
 824
 825 def _GetNodeInstances(cfg, node_name):
 826   """Returns a list of all primary and secondary instances on a node.
 827
 828   """
 829
 830   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 831
 832
 833 def _GetNodePrimaryInstances(cfg, node_name):
 834   """Returns primary instances on a node.
 835
 836   """
 837   return _GetNodeInstancesInner(cfg,
 838                                 lambda inst: node_name == inst.primary_node)
 839
 840
 841 def _GetNodeSecondaryInstances(cfg, node_name):
 842   """Returns secondary instances on a node.
 843
 844   """
 845   return _GetNodeInstancesInner(cfg,
 846                                 lambda inst: node_name in inst.secondary_nodes)
 847
 848
 849 def _GetStorageTypeArgs(cfg, storage_type):
 850   """Returns the arguments for a storage type.
 851
 852   """
 853   # Special case for file storage
 854   if storage_type == constants.ST_FILE:
 855     # storage.FileStorage wants a list of storage directories
 856     return [[cfg.GetFileStorageDir()]]
 857
 858   return []
 859
 860
 861 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 862   faulty = []
 863
 864   for dev in instance.disks:
 865     cfg.SetDiskID(dev, node_name)
 866
 867   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 868   result.Raise("Failed to get disk status from node %s" % node_name,
 869                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 870
 871   for idx, bdev_status in enumerate(result.payload):
 872     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 873       faulty.append(idx)
 874
 875   return faulty
 876
 877
 878 def _FormatTimestamp(secs):
 879   """Formats a Unix timestamp with the local timezone.
 880
 881   """
 882   return time.strftime("%F %T %Z", time.gmtime(secs))
 883
 884
 885 class LUPostInitCluster(LogicalUnit):
 886   """Logical unit for running hooks after cluster initialization.
 887
 888   """
 889   HPATH = "cluster-init"
 890   HTYPE = constants.HTYPE_CLUSTER
 891   _OP_REQP = []
 892
 893   def BuildHooksEnv(self):
 894     """Build hooks env.
 895
 896     """
 897     env = {"OP_TARGET": self.cfg.GetClusterName()}
 898     mn = self.cfg.GetMasterNode()
 899     return env, [], [mn]
 900
 901   def CheckPrereq(self):
 902     """No prerequisites to check.
 903
 904     """
 905     return True
 906
 907   def Exec(self, feedback_fn):
 908     """Nothing to do.
 909
 910     """
 911     return True
 912
 913
 914 class LUDestroyCluster(LogicalUnit):
 915   """Logical unit for destroying the cluster.
 916
 917   """
 918   HPATH = "cluster-destroy"
 919   HTYPE = constants.HTYPE_CLUSTER
 920   _OP_REQP = []
 921
 922   def BuildHooksEnv(self):
 923     """Build hooks env.
 924
 925     """
 926     env = {"OP_TARGET": self.cfg.GetClusterName()}
 927     return env, [], []
 928
 929   def CheckPrereq(self):
 930     """Check prerequisites.
 931
 932     This checks whether the cluster is empty.
 933
 934     Any errors are signaled by raising errors.OpPrereqError.
 935
 936     """
 937     master = self.cfg.GetMasterNode()
 938
 939     nodelist = self.cfg.GetNodeList()
 940     if len(nodelist) != 1 or nodelist[0] != master:
 941       raise errors.OpPrereqError("There are still %d node(s) in"
 942                                  " this cluster." % (len(nodelist) - 1),
 943                                  errors.ECODE_INVAL)
 944     instancelist = self.cfg.GetInstanceList()
 945     if instancelist:
 946       raise errors.OpPrereqError("There are still %d instance(s) in"
 947                                  " this cluster." % len(instancelist),
 948                                  errors.ECODE_INVAL)
 949
 950   def Exec(self, feedback_fn):
 951     """Destroys the cluster.
 952
 953     """
 954     master = self.cfg.GetMasterNode()
 955     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
 956
 957     # Run post hooks on master node before it's removed
 958     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
 959     try:
 960       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
 961     except:
 962       # pylint: disable-msg=W0702
 963       self.LogWarning("Errors occurred running hooks on %s" % master)
 964
 965     result = self.rpc.call_node_stop_master(master, False)
 966     result.Raise("Could not disable the master role")
 967
 968     if modify_ssh_setup:
 969       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
 970       utils.CreateBackup(priv_key)
 971       utils.CreateBackup(pub_key)
 972
 973     return master
 974
 975
 976 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
 977                             warn_days=constants.SSL_CERT_EXPIRATION_WARN,
 978                             error_days=constants.SSL_CERT_EXPIRATION_ERROR):
 979   """Verifies certificate details for LUVerifyCluster.
 980
 981   """
 982   if expired:
 983     msg = "Certificate %s is expired" % filename
 984
 985     if not_before is not None and not_after is not None:
 986       msg += (" (valid from %s to %s)" %
 987               (_FormatTimestamp(not_before),
 988                _FormatTimestamp(not_after)))
 989     elif not_before is not None:
 990       msg += " (valid from %s)" % _FormatTimestamp(not_before)
 991     elif not_after is not None:
 992       msg += " (valid until %s)" % _FormatTimestamp(not_after)
 993
 994     return (LUVerifyCluster.ETYPE_ERROR, msg)
 995
 996   elif not_before is not None and not_before > now:
 997     return (LUVerifyCluster.ETYPE_WARNING,
 998             "Certificate %s not yet valid (valid from %s)" %
 999             (filename, _FormatTimestamp(not_before)))
1000
1001   elif not_after is not None:
1002     remaining_days = int((not_after - now) / (24 * 3600))
1003
1004     msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1005
1006     if remaining_days <= error_days:
1007       return (LUVerifyCluster.ETYPE_ERROR, msg)
1008
1009     if remaining_days <= warn_days:
1010       return (LUVerifyCluster.ETYPE_WARNING, msg)
1011
1012   return (None, None)
1013
1014
1015 def _VerifyCertificate(filename):
1016   """Verifies a certificate for LUVerifyCluster.
1017
1018   @type filename: string
1019   @param filename: Path to PEM file
1020
1021   """
1022   try:
1023     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1024                                            utils.ReadFile(filename))
1025   except Exception, err: # pylint: disable-msg=W0703
1026     return (LUVerifyCluster.ETYPE_ERROR,
1027             "Failed to load X509 certificate %s: %s" % (filename, err))
1028
1029   # Depending on the pyOpenSSL version, this can just return (None, None)
1030   (not_before, not_after) = utils.GetX509CertValidity(cert)
1031
1032   return _VerifyCertificateInner(filename, cert.has_expired(),
1033                                  not_before, not_after, time.time())
1034
1035
1036 class LUVerifyCluster(LogicalUnit):
1037   """Verifies the cluster status.
1038
1039   """
1040   HPATH = "cluster-verify"
1041   HTYPE = constants.HTYPE_CLUSTER
1042   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1043   REQ_BGL = False
1044
1045   TCLUSTER = "cluster"
1046   TNODE = "node"
1047   TINSTANCE = "instance"
1048
1049   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1050   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1051   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1052   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1053   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1054   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1055   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1056   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1057   ENODEDRBD = (TNODE, "ENODEDRBD")
1058   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1059   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1060   ENODEHV = (TNODE, "ENODEHV")
1061   ENODELVM = (TNODE, "ENODELVM")
1062   ENODEN1 = (TNODE, "ENODEN1")
1063   ENODENET = (TNODE, "ENODENET")
1064   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1065   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1066   ENODERPC = (TNODE, "ENODERPC")
1067   ENODESSH = (TNODE, "ENODESSH")
1068   ENODEVERSION = (TNODE, "ENODEVERSION")
1069   ENODESETUP = (TNODE, "ENODESETUP")
1070   ENODETIME = (TNODE, "ENODETIME")
1071
1072   ETYPE_FIELD = "code"
1073   ETYPE_ERROR = "ERROR"
1074   ETYPE_WARNING = "WARNING"
1075
1076   def ExpandNames(self):
1077     self.needed_locks = {
1078       locking.LEVEL_NODE: locking.ALL_SET,
1079       locking.LEVEL_INSTANCE: locking.ALL_SET,
1080     }
1081     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1082
1083   def _Error(self, ecode, item, msg, *args, **kwargs):
1084     """Format an error message.
1085
1086     Based on the opcode's error_codes parameter, either format a
1087     parseable error code, or a simpler error string.
1088
1089     This must be called only from Exec and functions called from Exec.
1090
1091     """
1092     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1093     itype, etxt = ecode
1094     # first complete the msg
1095     if args:
1096       msg = msg % args
1097     # then format the whole message
1098     if self.op.error_codes:
1099       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1100     else:
1101       if item:
1102         item = " " + item
1103       else:
1104         item = ""
1105       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1106     # and finally report it via the feedback_fn
1107     self._feedback_fn("  - %s" % msg)
1108
1109   def _ErrorIf(self, cond, *args, **kwargs):
1110     """Log an error message if the passed condition is True.
1111
1112     """
1113     cond = bool(cond) or self.op.debug_simulate_errors
1114     if cond:
1115       self._Error(*args, **kwargs)
1116     # do not mark the operation as failed for WARN cases only
1117     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1118       self.bad = self.bad or cond
1119
1120   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1121                   node_result, master_files, drbd_map, vg_name):
1122     """Run multiple tests against a node.
1123
1124     Test list:
1125
1126       - compares ganeti version
1127       - checks vg existence and size > 20G
1128       - checks config file checksum
1129       - checks ssh to other nodes
1130
1131     @type nodeinfo: L{objects.Node}
1132     @param nodeinfo: the node to check
1133     @param file_list: required list of files
1134     @param local_cksum: dictionary of local files and their checksums
1135     @param node_result: the results from the node
1136     @param master_files: list of files that only masters should have
1137     @param drbd_map: the useddrbd minors for this node, in
1138         form of minor: (instance, must_exist) which correspond to instances
1139         and their running status
1140     @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1141
1142     """
1143     node = nodeinfo.name
1144     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1145
1146     # main result, node_result should be a non-empty dict
1147     test = not node_result or not isinstance(node_result, dict)
1148     _ErrorIf(test, self.ENODERPC, node,
1149                   "unable to verify node: no data returned")
1150     if test:
1151       return
1152
1153     # compares ganeti version
1154     local_version = constants.PROTOCOL_VERSION
1155     remote_version = node_result.get('version', None)
1156     test = not (remote_version and
1157                 isinstance(remote_version, (list, tuple)) and
1158                 len(remote_version) == 2)
1159     _ErrorIf(test, self.ENODERPC, node,
1160              "connection to node returned invalid data")
1161     if test:
1162       return
1163
1164     test = local_version != remote_version[0]
1165     _ErrorIf(test, self.ENODEVERSION, node,
1166              "incompatible protocol versions: master %s,"
1167              " node %s", local_version, remote_version[0])
1168     if test:
1169       return
1170
1171     # node seems compatible, we can actually try to look into its results
1172
1173     # full package version
1174     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1175                   self.ENODEVERSION, node,
1176                   "software version mismatch: master %s, node %s",
1177                   constants.RELEASE_VERSION, remote_version[1],
1178                   code=self.ETYPE_WARNING)
1179
1180     # checks vg existence and size > 20G
1181     if vg_name is not None:
1182       vglist = node_result.get(constants.NV_VGLIST, None)
1183       test = not vglist
1184       _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1185       if not test:
1186         vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1187                                               constants.MIN_VG_SIZE)
1188         _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1189
1190     # checks config file checksum
1191
1192     remote_cksum = node_result.get(constants.NV_FILELIST, None)
1193     test = not isinstance(remote_cksum, dict)
1194     _ErrorIf(test, self.ENODEFILECHECK, node,
1195              "node hasn't returned file checksum data")
1196     if not test:
1197       for file_name in file_list:
1198         node_is_mc = nodeinfo.master_candidate
1199         must_have = (file_name not in master_files) or node_is_mc
1200         # missing
1201         test1 = file_name not in remote_cksum
1202         # invalid checksum
1203         test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1204         # existing and good
1205         test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1206         _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1207                  "file '%s' missing", file_name)
1208         _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1209                  "file '%s' has wrong checksum", file_name)
1210         # not candidate and this is not a must-have file
1211         _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1212                  "file '%s' should not exist on non master"
1213                  " candidates (and the file is outdated)", file_name)
1214         # all good, except non-master/non-must have combination
1215         _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1216                  "file '%s' should not exist"
1217                  " on non master candidates", file_name)
1218
1219     # checks ssh to any
1220
1221     test = constants.NV_NODELIST not in node_result
1222     _ErrorIf(test, self.ENODESSH, node,
1223              "node hasn't returned node ssh connectivity data")
1224     if not test:
1225       if node_result[constants.NV_NODELIST]:
1226         for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1227           _ErrorIf(True, self.ENODESSH, node,
1228                    "ssh communication with node '%s': %s", a_node, a_msg)
1229
1230     test = constants.NV_NODENETTEST not in node_result
1231     _ErrorIf(test, self.ENODENET, node,
1232              "node hasn't returned node tcp connectivity data")
1233     if not test:
1234       if node_result[constants.NV_NODENETTEST]:
1235         nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1236         for anode in nlist:
1237           _ErrorIf(True, self.ENODENET, node,
1238                    "tcp communication with node '%s': %s",
1239                    anode, node_result[constants.NV_NODENETTEST][anode])
1240
1241     hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1242     if isinstance(hyp_result, dict):
1243       for hv_name, hv_result in hyp_result.iteritems():
1244         test = hv_result is not None
1245         _ErrorIf(test, self.ENODEHV, node,
1246                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1247
1248     # check used drbd list
1249     if vg_name is not None:
1250       used_minors = node_result.get(constants.NV_DRBDLIST, [])
1251       test = not isinstance(used_minors, (tuple, list))
1252       _ErrorIf(test, self.ENODEDRBD, node,
1253                "cannot parse drbd status file: %s", str(used_minors))
1254       if not test:
1255         for minor, (iname, must_exist) in drbd_map.items():
1256           test = minor not in used_minors and must_exist
1257           _ErrorIf(test, self.ENODEDRBD, node,
1258                    "drbd minor %d of instance %s is not active",
1259                    minor, iname)
1260         for minor in used_minors:
1261           test = minor not in drbd_map
1262           _ErrorIf(test, self.ENODEDRBD, node,
1263                    "unallocated drbd minor %d is in use", minor)
1264     test = node_result.get(constants.NV_NODESETUP,
1265                            ["Missing NODESETUP results"])
1266     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1267              "; ".join(test))
1268
1269     # check pv names
1270     if vg_name is not None:
1271       pvlist = node_result.get(constants.NV_PVLIST, None)
1272       test = pvlist is None
1273       _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1274       if not test:
1275         # check that ':' is not present in PV names, since it's a
1276         # special character for lvcreate (denotes the range of PEs to
1277         # use on the PV)
1278         for _, pvname, owner_vg in pvlist:
1279           test = ":" in pvname
1280           _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1281                    " '%s' of VG '%s'", pvname, owner_vg)
1282
1283   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1284                       node_instance, n_offline):
1285     """Verify an instance.
1286
1287     This function checks to see if the required block devices are
1288     available on the instance's node.
1289
1290     """
1291     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1292     node_current = instanceconfig.primary_node
1293
1294     node_vol_should = {}
1295     instanceconfig.MapLVsByNode(node_vol_should)
1296
1297     for node in node_vol_should:
1298       if node in n_offline:
1299         # ignore missing volumes on offline nodes
1300         continue
1301       for volume in node_vol_should[node]:
1302         test = node not in node_vol_is or volume not in node_vol_is[node]
1303         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1304                  "volume %s missing on node %s", volume, node)
1305
1306     if instanceconfig.admin_up:
1307       test = ((node_current not in node_instance or
1308                not instance in node_instance[node_current]) and
1309               node_current not in n_offline)
1310       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1311                "instance not running on its primary node %s",
1312                node_current)
1313
1314     for node in node_instance:
1315       if (not node == node_current):
1316         test = instance in node_instance[node]
1317         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1318                  "instance should not run on node %s", node)
1319
1320   def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1321     """Verify if there are any unknown volumes in the cluster.
1322
1323     The .os, .swap and backup volumes are ignored. All other volumes are
1324     reported as unknown.
1325
1326     """
1327     for node in node_vol_is:
1328       for volume in node_vol_is[node]:
1329         test = (node not in node_vol_should or
1330                 volume not in node_vol_should[node])
1331         self._ErrorIf(test, self.ENODEORPHANLV, node,
1332                       "volume %s is unknown", volume)
1333
1334   def _VerifyOrphanInstances(self, instancelist, node_instance):
1335     """Verify the list of running instances.
1336
1337     This checks what instances are running but unknown to the cluster.
1338
1339     """
1340     for node in node_instance:
1341       for o_inst in node_instance[node]:
1342         test = o_inst not in instancelist
1343         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1344                       "instance %s on node %s should not exist", o_inst, node)
1345
1346   def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1347     """Verify N+1 Memory Resilience.
1348
1349     Check that if one single node dies we can still start all the instances it
1350     was primary for.
1351
1352     """
1353     for node, nodeinfo in node_info.iteritems():
1354       # This code checks that every node which is now listed as secondary has
1355       # enough memory to host all instances it is supposed to should a single
1356       # other node in the cluster fail.
1357       # FIXME: not ready for failover to an arbitrary node
1358       # FIXME: does not support file-backed instances
1359       # WARNING: we currently take into account down instances as well as up
1360       # ones, considering that even if they're down someone might want to start
1361       # them even in the event of a node failure.
1362       for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1363         needed_mem = 0
1364         for instance in instances:
1365           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1366           if bep[constants.BE_AUTO_BALANCE]:
1367             needed_mem += bep[constants.BE_MEMORY]
1368         test = nodeinfo['mfree'] < needed_mem
1369         self._ErrorIf(test, self.ENODEN1, node,
1370                       "not enough memory on to accommodate"
1371                       " failovers should peer node %s fail", prinode)
1372
1373   def CheckPrereq(self):
1374     """Check prerequisites.
1375
1376     Transform the list of checks we're going to skip into a set and check that
1377     all its members are valid.
1378
1379     """
1380     self.skip_set = frozenset(self.op.skip_checks)
1381     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1382       raise errors.OpPrereqError("Invalid checks to be skipped specified",
1383                                  errors.ECODE_INVAL)
1384
1385   def BuildHooksEnv(self):
1386     """Build hooks env.
1387
1388     Cluster-Verify hooks just ran in the post phase and their failure makes
1389     the output be logged in the verify output and the verification to fail.
1390
1391     """
1392     all_nodes = self.cfg.GetNodeList()
1393     env = {
1394       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1395       }
1396     for node in self.cfg.GetAllNodesInfo().values():
1397       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1398
1399     return env, [], all_nodes
1400
1401   def Exec(self, feedback_fn):
1402     """Verify integrity of cluster, performing various test on nodes.
1403
1404     """
1405     self.bad = False
1406     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1407     verbose = self.op.verbose
1408     self._feedback_fn = feedback_fn
1409     feedback_fn("* Verifying global settings")
1410     for msg in self.cfg.VerifyConfig():
1411       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1412
1413     # Check the cluster certificates
1414     for cert_filename in constants.ALL_CERT_FILES:
1415       (errcode, msg) = _VerifyCertificate(cert_filename)
1416       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1417
1418     vg_name = self.cfg.GetVGName()
1419     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1420     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1421     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1422     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1423     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1424                         for iname in instancelist)
1425     i_non_redundant = [] # Non redundant instances
1426     i_non_a_balanced = [] # Non auto-balanced instances
1427     n_offline = [] # List of offline nodes
1428     n_drained = [] # List of nodes being drained
1429     node_volume = {}
1430     node_instance = {}
1431     node_info = {}
1432     instance_cfg = {}
1433
1434     # FIXME: verify OS list
1435     # do local checksums
1436     master_files = [constants.CLUSTER_CONF_FILE]
1437
1438     file_names = ssconf.SimpleStore().GetFileList()
1439     file_names.extend(constants.ALL_CERT_FILES)
1440     file_names.extend(master_files)
1441
1442     local_checksums = utils.FingerprintFiles(file_names)
1443
1444     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1445     node_verify_param = {
1446       constants.NV_FILELIST: file_names,
1447       constants.NV_NODELIST: [node.name for node in nodeinfo
1448                               if not node.offline],
1449       constants.NV_HYPERVISOR: hypervisors,
1450       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1451                                   node.secondary_ip) for node in nodeinfo
1452                                  if not node.offline],
1453       constants.NV_INSTANCELIST: hypervisors,
1454       constants.NV_VERSION: None,
1455       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1456       constants.NV_NODESETUP: None,
1457       constants.NV_TIME: None,
1458       }
1459
1460     if vg_name is not None:
1461       node_verify_param[constants.NV_VGLIST] = None
1462       node_verify_param[constants.NV_LVLIST] = vg_name
1463       node_verify_param[constants.NV_PVLIST] = [vg_name]
1464       node_verify_param[constants.NV_DRBDLIST] = None
1465
1466     # Due to the way our RPC system works, exact response times cannot be
1467     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1468     # time before and after executing the request, we can at least have a time
1469     # window.
1470     nvinfo_starttime = time.time()
1471     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1472                                            self.cfg.GetClusterName())
1473     nvinfo_endtime = time.time()
1474
1475     cluster = self.cfg.GetClusterInfo()
1476     master_node = self.cfg.GetMasterNode()
1477     all_drbd_map = self.cfg.ComputeDRBDMap()
1478
1479     feedback_fn("* Verifying node status")
1480     for node_i in nodeinfo:
1481       node = node_i.name
1482
1483       if node_i.offline:
1484         if verbose:
1485           feedback_fn("* Skipping offline node %s" % (node,))
1486         n_offline.append(node)
1487         continue
1488
1489       if node == master_node:
1490         ntype = "master"
1491       elif node_i.master_candidate:
1492         ntype = "master candidate"
1493       elif node_i.drained:
1494         ntype = "drained"
1495         n_drained.append(node)
1496       else:
1497         ntype = "regular"
1498       if verbose:
1499         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1500
1501       msg = all_nvinfo[node].fail_msg
1502       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1503       if msg:
1504         continue
1505
1506       nresult = all_nvinfo[node].payload
1507       node_drbd = {}
1508       for minor, instance in all_drbd_map[node].items():
1509         test = instance not in instanceinfo
1510         _ErrorIf(test, self.ECLUSTERCFG, None,
1511                  "ghost instance '%s' in temporary DRBD map", instance)
1512           # ghost instance should not be running, but otherwise we
1513           # don't give double warnings (both ghost instance and
1514           # unallocated minor in use)
1515         if test:
1516           node_drbd[minor] = (instance, False)
1517         else:
1518           instance = instanceinfo[instance]
1519           node_drbd[minor] = (instance.name, instance.admin_up)
1520
1521       self._VerifyNode(node_i, file_names, local_checksums,
1522                        nresult, master_files, node_drbd, vg_name)
1523
1524       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1525       if vg_name is None:
1526         node_volume[node] = {}
1527       elif isinstance(lvdata, basestring):
1528         _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1529                  utils.SafeEncode(lvdata))
1530         node_volume[node] = {}
1531       elif not isinstance(lvdata, dict):
1532         _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1533         continue
1534       else:
1535         node_volume[node] = lvdata
1536
1537       # node_instance
1538       idata = nresult.get(constants.NV_INSTANCELIST, None)
1539       test = not isinstance(idata, list)
1540       _ErrorIf(test, self.ENODEHV, node,
1541                "rpc call to node failed (instancelist): %s",
1542                utils.SafeEncode(str(idata)))
1543       if test:
1544         continue
1545
1546       node_instance[node] = idata
1547
1548       # node_info
1549       nodeinfo = nresult.get(constants.NV_HVINFO, None)
1550       test = not isinstance(nodeinfo, dict)
1551       _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1552       if test:
1553         continue
1554
1555       # Node time
1556       ntime = nresult.get(constants.NV_TIME, None)
1557       try:
1558         ntime_merged = utils.MergeTime(ntime)
1559       except (ValueError, TypeError):
1560         _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1561
1562       if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1563         ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1564       elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1565         ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1566       else:
1567         ntime_diff = None
1568
1569       _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1570                "Node time diverges by at least %s from master node time",
1571                ntime_diff)
1572
1573       if ntime_diff is not None:
1574         continue
1575
1576       try:
1577         node_info[node] = {
1578           "mfree": int(nodeinfo['memory_free']),
1579           "pinst": [],
1580           "sinst": [],
1581           # dictionary holding all instances this node is secondary for,
1582           # grouped by their primary node. Each key is a cluster node, and each
1583           # value is a list of instances which have the key as primary and the
1584           # current node as secondary.  this is handy to calculate N+1 memory
1585           # availability if you can only failover from a primary to its
1586           # secondary.
1587           "sinst-by-pnode": {},
1588         }
1589         # FIXME: devise a free space model for file based instances as well
1590         if vg_name is not None:
1591           test = (constants.NV_VGLIST not in nresult or
1592                   vg_name not in nresult[constants.NV_VGLIST])
1593           _ErrorIf(test, self.ENODELVM, node,
1594                    "node didn't return data for the volume group '%s'"
1595                    " - it is either missing or broken", vg_name)
1596           if test:
1597             continue
1598           node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1599       except (ValueError, KeyError):
1600         _ErrorIf(True, self.ENODERPC, node,
1601                  "node returned invalid nodeinfo, check lvm/hypervisor")
1602         continue
1603
1604     node_vol_should = {}
1605
1606     feedback_fn("* Verifying instance status")
1607     for instance in instancelist:
1608       if verbose:
1609         feedback_fn("* Verifying instance %s" % instance)
1610       inst_config = instanceinfo[instance]
1611       self._VerifyInstance(instance, inst_config, node_volume,
1612                            node_instance, n_offline)
1613       inst_nodes_offline = []
1614
1615       inst_config.MapLVsByNode(node_vol_should)
1616
1617       instance_cfg[instance] = inst_config
1618
1619       pnode = inst_config.primary_node
1620       _ErrorIf(pnode not in node_info and pnode not in n_offline,
1621                self.ENODERPC, pnode, "instance %s, connection to"
1622                " primary node failed", instance)
1623       if pnode in node_info:
1624         node_info[pnode]['pinst'].append(instance)
1625
1626       if pnode in n_offline:
1627         inst_nodes_offline.append(pnode)
1628
1629       # If the instance is non-redundant we cannot survive losing its primary
1630       # node, so we are not N+1 compliant. On the other hand we have no disk
1631       # templates with more than one secondary so that situation is not well
1632       # supported either.
1633       # FIXME: does not support file-backed instances
1634       if len(inst_config.secondary_nodes) == 0:
1635         i_non_redundant.append(instance)
1636       _ErrorIf(len(inst_config.secondary_nodes) > 1,
1637                self.EINSTANCELAYOUT, instance,
1638                "instance has multiple secondary nodes", code="WARNING")
1639
1640       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1641         i_non_a_balanced.append(instance)
1642
1643       for snode in inst_config.secondary_nodes:
1644         _ErrorIf(snode not in node_info and snode not in n_offline,
1645                  self.ENODERPC, snode,
1646                  "instance %s, connection to secondary node"
1647                  " failed", instance)
1648
1649         if snode in node_info:
1650           node_info[snode]['sinst'].append(instance)
1651           if pnode not in node_info[snode]['sinst-by-pnode']:
1652             node_info[snode]['sinst-by-pnode'][pnode] = []
1653           node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1654
1655         if snode in n_offline:
1656           inst_nodes_offline.append(snode)
1657
1658       # warn that the instance lives on offline nodes
1659       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1660                "instance lives on offline node(s) %s",
1661                utils.CommaJoin(inst_nodes_offline))
1662
1663     feedback_fn("* Verifying orphan volumes")
1664     self._VerifyOrphanVolumes(node_vol_should, node_volume)
1665
1666     feedback_fn("* Verifying remaining instances")
1667     self._VerifyOrphanInstances(instancelist, node_instance)
1668
1669     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1670       feedback_fn("* Verifying N+1 Memory redundancy")
1671       self._VerifyNPlusOneMemory(node_info, instance_cfg)
1672
1673     feedback_fn("* Other Notes")
1674     if i_non_redundant:
1675       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1676                   % len(i_non_redundant))
1677
1678     if i_non_a_balanced:
1679       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1680                   % len(i_non_a_balanced))
1681
1682     if n_offline:
1683       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1684
1685     if n_drained:
1686       feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1687
1688     return not self.bad
1689
1690   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1691     """Analyze the post-hooks' result
1692
1693     This method analyses the hook result, handles it, and sends some
1694     nicely-formatted feedback back to the user.
1695
1696     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1697         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1698     @param hooks_results: the results of the multi-node hooks rpc call
1699     @param feedback_fn: function used send feedback back to the caller
1700     @param lu_result: previous Exec result
1701     @return: the new Exec result, based on the previous result
1702         and hook results
1703
1704     """
1705     # We only really run POST phase hooks, and are only interested in
1706     # their results
1707     if phase == constants.HOOKS_PHASE_POST:
1708       # Used to change hooks' output to proper indentation
1709       indent_re = re.compile('^', re.M)
1710       feedback_fn("* Hooks Results")
1711       assert hooks_results, "invalid result from hooks"
1712
1713       for node_name in hooks_results:
1714         res = hooks_results[node_name]
1715         msg = res.fail_msg
1716         test = msg and not res.offline
1717         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1718                       "Communication failure in hooks execution: %s", msg)
1719         if res.offline or msg:
1720           # No need to investigate payload if node is offline or gave an error.
1721           # override manually lu_result here as _ErrorIf only
1722           # overrides self.bad
1723           lu_result = 1
1724           continue
1725         for script, hkr, output in res.payload:
1726           test = hkr == constants.HKR_FAIL
1727           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1728                         "Script %s failed, output:", script)
1729           if test:
1730             output = indent_re.sub('      ', output)
1731             feedback_fn("%s" % output)
1732             lu_result = 0
1733
1734       return lu_result
1735
1736
1737 class LUVerifyDisks(NoHooksLU):
1738   """Verifies the cluster disks status.
1739
1740   """
1741   _OP_REQP = []
1742   REQ_BGL = False
1743
1744   def ExpandNames(self):
1745     self.needed_locks = {
1746       locking.LEVEL_NODE: locking.ALL_SET,
1747       locking.LEVEL_INSTANCE: locking.ALL_SET,
1748     }
1749     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1750
1751   def CheckPrereq(self):
1752     """Check prerequisites.
1753
1754     This has no prerequisites.
1755
1756     """
1757     pass
1758
1759   def Exec(self, feedback_fn):
1760     """Verify integrity of cluster disks.
1761
1762     @rtype: tuple of three items
1763     @return: a tuple of (dict of node-to-node_error, list of instances
1764         which need activate-disks, dict of instance: (node, volume) for
1765         missing volumes
1766
1767     """
1768     result = res_nodes, res_instances, res_missing = {}, [], {}
1769
1770     vg_name = self.cfg.GetVGName()
1771     nodes = utils.NiceSort(self.cfg.GetNodeList())
1772     instances = [self.cfg.GetInstanceInfo(name)
1773                  for name in self.cfg.GetInstanceList()]
1774
1775     nv_dict = {}
1776     for inst in instances:
1777       inst_lvs = {}
1778       if (not inst.admin_up or
1779           inst.disk_template not in constants.DTS_NET_MIRROR):
1780         continue
1781       inst.MapLVsByNode(inst_lvs)
1782       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1783       for node, vol_list in inst_lvs.iteritems():
1784         for vol in vol_list:
1785           nv_dict[(node, vol)] = inst
1786
1787     if not nv_dict:
1788       return result
1789
1790     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1791
1792     for node in nodes:
1793       # node_volume
1794       node_res = node_lvs[node]
1795       if node_res.offline:
1796         continue
1797       msg = node_res.fail_msg
1798       if msg:
1799         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1800         res_nodes[node] = msg
1801         continue
1802
1803       lvs = node_res.payload
1804       for lv_name, (_, _, lv_online) in lvs.items():
1805         inst = nv_dict.pop((node, lv_name), None)
1806         if (not lv_online and inst is not None
1807             and inst.name not in res_instances):
1808           res_instances.append(inst.name)
1809
1810     # any leftover items in nv_dict are missing LVs, let's arrange the
1811     # data better
1812     for key, inst in nv_dict.iteritems():
1813       if inst.name not in res_missing:
1814         res_missing[inst.name] = []
1815       res_missing[inst.name].append(key)
1816
1817     return result
1818
1819
1820 class LURepairDiskSizes(NoHooksLU):
1821   """Verifies the cluster disks sizes.
1822
1823   """
1824   _OP_REQP = ["instances"]
1825   REQ_BGL = False
1826
1827   def ExpandNames(self):
1828     if not isinstance(self.op.instances, list):
1829       raise errors.OpPrereqError("Invalid argument type 'instances'",
1830                                  errors.ECODE_INVAL)
1831
1832     if self.op.instances:
1833       self.wanted_names = []
1834       for name in self.op.instances:
1835         full_name = _ExpandInstanceName(self.cfg, name)
1836         self.wanted_names.append(full_name)
1837       self.needed_locks = {
1838         locking.LEVEL_NODE: [],
1839         locking.LEVEL_INSTANCE: self.wanted_names,
1840         }
1841       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1842     else:
1843       self.wanted_names = None
1844       self.needed_locks = {
1845         locking.LEVEL_NODE: locking.ALL_SET,
1846         locking.LEVEL_INSTANCE: locking.ALL_SET,
1847         }
1848     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1849
1850   def DeclareLocks(self, level):
1851     if level == locking.LEVEL_NODE and self.wanted_names is not None:
1852       self._LockInstancesNodes(primary_only=True)
1853
1854   def CheckPrereq(self):
1855     """Check prerequisites.
1856
1857     This only checks the optional instance list against the existing names.
1858
1859     """
1860     if self.wanted_names is None:
1861       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1862
1863     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1864                              in self.wanted_names]
1865
1866   def _EnsureChildSizes(self, disk):
1867     """Ensure children of the disk have the needed disk size.
1868
1869     This is valid mainly for DRBD8 and fixes an issue where the
1870     children have smaller disk size.
1871
1872     @param disk: an L{ganeti.objects.Disk} object
1873
1874     """
1875     if disk.dev_type == constants.LD_DRBD8:
1876       assert disk.children, "Empty children for DRBD8?"
1877       fchild = disk.children[0]
1878       mismatch = fchild.size < disk.size
1879       if mismatch:
1880         self.LogInfo("Child disk has size %d, parent %d, fixing",
1881                      fchild.size, disk.size)
1882         fchild.size = disk.size
1883
1884       # and we recurse on this child only, not on the metadev
1885       return self._EnsureChildSizes(fchild) or mismatch
1886     else:
1887       return False
1888
1889   def Exec(self, feedback_fn):
1890     """Verify the size of cluster disks.
1891
1892     """
1893     # TODO: check child disks too
1894     # TODO: check differences in size between primary/secondary nodes
1895     per_node_disks = {}
1896     for instance in self.wanted_instances:
1897       pnode = instance.primary_node
1898       if pnode not in per_node_disks:
1899         per_node_disks[pnode] = []
1900       for idx, disk in enumerate(instance.disks):
1901         per_node_disks[pnode].append((instance, idx, disk))
1902
1903     changed = []
1904     for node, dskl in per_node_disks.items():
1905       newl = [v[2].Copy() for v in dskl]
1906       for dsk in newl:
1907         self.cfg.SetDiskID(dsk, node)
1908       result = self.rpc.call_blockdev_getsizes(node, newl)
1909       if result.fail_msg:
1910         self.LogWarning("Failure in blockdev_getsizes call to node"
1911                         " %s, ignoring", node)
1912         continue
1913       if len(result.data) != len(dskl):
1914         self.LogWarning("Invalid result from node %s, ignoring node results",
1915                         node)
1916         continue
1917       for ((instance, idx, disk), size) in zip(dskl, result.data):
1918         if size is None:
1919           self.LogWarning("Disk %d of instance %s did not return size"
1920                           " information, ignoring", idx, instance.name)
1921           continue
1922         if not isinstance(size, (int, long)):
1923           self.LogWarning("Disk %d of instance %s did not return valid"
1924                           " size information, ignoring", idx, instance.name)
1925           continue
1926         size = size >> 20
1927         if size != disk.size:
1928           self.LogInfo("Disk %d of instance %s has mismatched size,"
1929                        " correcting: recorded %d, actual %d", idx,
1930                        instance.name, disk.size, size)
1931           disk.size = size
1932           self.cfg.Update(instance, feedback_fn)
1933           changed.append((instance.name, idx, size))
1934         if self._EnsureChildSizes(disk):
1935           self.cfg.Update(instance, feedback_fn)
1936           changed.append((instance.name, idx, disk.size))
1937     return changed
1938
1939
1940 class LURenameCluster(LogicalUnit):
1941   """Rename the cluster.
1942
1943   """
1944   HPATH = "cluster-rename"
1945   HTYPE = constants.HTYPE_CLUSTER
1946   _OP_REQP = ["name"]
1947
1948   def BuildHooksEnv(self):
1949     """Build hooks env.
1950
1951     """
1952     env = {
1953       "OP_TARGET": self.cfg.GetClusterName(),
1954       "NEW_NAME": self.op.name,
1955       }
1956     mn = self.cfg.GetMasterNode()
1957     all_nodes = self.cfg.GetNodeList()
1958     return env, [mn], all_nodes
1959
1960   def CheckPrereq(self):
1961     """Verify that the passed name is a valid one.
1962
1963     """
1964     hostname = utils.GetHostInfo(self.op.name)
1965
1966     new_name = hostname.name
1967     self.ip = new_ip = hostname.ip
1968     old_name = self.cfg.GetClusterName()
1969     old_ip = self.cfg.GetMasterIP()
1970     if new_name == old_name and new_ip == old_ip:
1971       raise errors.OpPrereqError("Neither the name nor the IP address of the"
1972                                  " cluster has changed",
1973                                  errors.ECODE_INVAL)
1974     if new_ip != old_ip:
1975       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1976         raise errors.OpPrereqError("The given cluster IP address (%s) is"
1977                                    " reachable on the network. Aborting." %
1978                                    new_ip, errors.ECODE_NOTUNIQUE)
1979
1980     self.op.name = new_name
1981
1982   def Exec(self, feedback_fn):
1983     """Rename the cluster.
1984
1985     """
1986     clustername = self.op.name
1987     ip = self.ip
1988
1989     # shutdown the master IP
1990     master = self.cfg.GetMasterNode()
1991     result = self.rpc.call_node_stop_master(master, False)
1992     result.Raise("Could not disable the master role")
1993
1994     try:
1995       cluster = self.cfg.GetClusterInfo()
1996       cluster.cluster_name = clustername
1997       cluster.master_ip = ip
1998       self.cfg.Update(cluster, feedback_fn)
1999
2000       # update the known hosts file
2001       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2002       node_list = self.cfg.GetNodeList()
2003       try:
2004         node_list.remove(master)
2005       except ValueError:
2006         pass
2007       result = self.rpc.call_upload_file(node_list,
2008                                          constants.SSH_KNOWN_HOSTS_FILE)
2009       for to_node, to_result in result.iteritems():
2010         msg = to_result.fail_msg
2011         if msg:
2012           msg = ("Copy of file %s to node %s failed: %s" %
2013                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2014           self.proc.LogWarning(msg)
2015
2016     finally:
2017       result = self.rpc.call_node_start_master(master, False, False)
2018       msg = result.fail_msg
2019       if msg:
2020         self.LogWarning("Could not re-enable the master role on"
2021                         " the master, please restart manually: %s", msg)
2022
2023
2024 def _RecursiveCheckIfLVMBased(disk):
2025   """Check if the given disk or its children are lvm-based.
2026
2027   @type disk: L{objects.Disk}
2028   @param disk: the disk to check
2029   @rtype: boolean
2030   @return: boolean indicating whether a LD_LV dev_type was found or not
2031
2032   """
2033   if disk.children:
2034     for chdisk in disk.children:
2035       if _RecursiveCheckIfLVMBased(chdisk):
2036         return True
2037   return disk.dev_type == constants.LD_LV
2038
2039
2040 class LUSetClusterParams(LogicalUnit):
2041   """Change the parameters of the cluster.
2042
2043   """
2044   HPATH = "cluster-modify"
2045   HTYPE = constants.HTYPE_CLUSTER
2046   _OP_REQP = []
2047   REQ_BGL = False
2048
2049   def CheckArguments(self):
2050     """Check parameters
2051
2052     """
2053     if not hasattr(self.op, "candidate_pool_size"):
2054       self.op.candidate_pool_size = None
2055     if self.op.candidate_pool_size is not None:
2056       try:
2057         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2058       except (ValueError, TypeError), err:
2059         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2060                                    str(err), errors.ECODE_INVAL)
2061       if self.op.candidate_pool_size < 1:
2062         raise errors.OpPrereqError("At least one master candidate needed",
2063                                    errors.ECODE_INVAL)
2064
2065   def ExpandNames(self):
2066     # FIXME: in the future maybe other cluster params won't require checking on
2067     # all nodes to be modified.
2068     self.needed_locks = {
2069       locking.LEVEL_NODE: locking.ALL_SET,
2070     }
2071     self.share_locks[locking.LEVEL_NODE] = 1
2072
2073   def BuildHooksEnv(self):
2074     """Build hooks env.
2075
2076     """
2077     env = {
2078       "OP_TARGET": self.cfg.GetClusterName(),
2079       "NEW_VG_NAME": self.op.vg_name,
2080       }
2081     mn = self.cfg.GetMasterNode()
2082     return env, [mn], [mn]
2083
2084   def CheckPrereq(self):
2085     """Check prerequisites.
2086
2087     This checks whether the given params don't conflict and
2088     if the given volume group is valid.
2089
2090     """
2091     if self.op.vg_name is not None and not self.op.vg_name:
2092       instances = self.cfg.GetAllInstancesInfo().values()
2093       for inst in instances:
2094         for disk in inst.disks:
2095           if _RecursiveCheckIfLVMBased(disk):
2096             raise errors.OpPrereqError("Cannot disable lvm storage while"
2097                                        " lvm-based instances exist",
2098                                        errors.ECODE_INVAL)
2099
2100     node_list = self.acquired_locks[locking.LEVEL_NODE]
2101
2102     # if vg_name not None, checks given volume group on all nodes
2103     if self.op.vg_name:
2104       vglist = self.rpc.call_vg_list(node_list)
2105       for node in node_list:
2106         msg = vglist[node].fail_msg
2107         if msg:
2108           # ignoring down node
2109           self.LogWarning("Error while gathering data on node %s"
2110                           " (ignoring node): %s", node, msg)
2111           continue
2112         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2113                                               self.op.vg_name,
2114                                               constants.MIN_VG_SIZE)
2115         if vgstatus:
2116           raise errors.OpPrereqError("Error on node '%s': %s" %
2117                                      (node, vgstatus), errors.ECODE_ENVIRON)
2118
2119     self.cluster = cluster = self.cfg.GetClusterInfo()
2120     # validate params changes
2121     if self.op.beparams:
2122       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2123       self.new_beparams = objects.FillDict(
2124         cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2125
2126     if self.op.nicparams:
2127       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2128       self.new_nicparams = objects.FillDict(
2129         cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2130       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2131       nic_errors = []
2132
2133       # check all instances for consistency
2134       for instance in self.cfg.GetAllInstancesInfo().values():
2135         for nic_idx, nic in enumerate(instance.nics):
2136           params_copy = copy.deepcopy(nic.nicparams)
2137           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2138
2139           # check parameter syntax
2140           try:
2141             objects.NIC.CheckParameterSyntax(params_filled)
2142           except errors.ConfigurationError, err:
2143             nic_errors.append("Instance %s, nic/%d: %s" %
2144                               (instance.name, nic_idx, err))
2145
2146           # if we're moving instances to routed, check that they have an ip
2147           target_mode = params_filled[constants.NIC_MODE]
2148           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2149             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2150                               (instance.name, nic_idx))
2151       if nic_errors:
2152         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2153                                    "\n".join(nic_errors))
2154
2155     # hypervisor list/parameters
2156     self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2157     if self.op.hvparams:
2158       if not isinstance(self.op.hvparams, dict):
2159         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2160                                    errors.ECODE_INVAL)
2161       for hv_name, hv_dict in self.op.hvparams.items():
2162         if hv_name not in self.new_hvparams:
2163           self.new_hvparams[hv_name] = hv_dict
2164         else:
2165           self.new_hvparams[hv_name].update(hv_dict)
2166
2167     # os hypervisor parameters
2168     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2169     if self.op.os_hvp:
2170       if not isinstance(self.op.os_hvp, dict):
2171         raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2172                                    errors.ECODE_INVAL)
2173       for os_name, hvs in self.op.os_hvp.items():
2174         if not isinstance(hvs, dict):
2175           raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2176                                       " input"), errors.ECODE_INVAL)
2177         if os_name not in self.new_os_hvp:
2178           self.new_os_hvp[os_name] = hvs
2179         else:
2180           for hv_name, hv_dict in hvs.items():
2181             if hv_name not in self.new_os_hvp[os_name]:
2182               self.new_os_hvp[os_name][hv_name] = hv_dict
2183             else:
2184               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2185
2186     if self.op.enabled_hypervisors is not None:
2187       self.hv_list = self.op.enabled_hypervisors
2188       if not self.hv_list:
2189         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2190                                    " least one member",
2191                                    errors.ECODE_INVAL)
2192       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2193       if invalid_hvs:
2194         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2195                                    " entries: %s" %
2196                                    utils.CommaJoin(invalid_hvs),
2197                                    errors.ECODE_INVAL)
2198     else:
2199       self.hv_list = cluster.enabled_hypervisors
2200
2201     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2202       # either the enabled list has changed, or the parameters have, validate
2203       for hv_name, hv_params in self.new_hvparams.items():
2204         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2205             (self.op.enabled_hypervisors and
2206              hv_name in self.op.enabled_hypervisors)):
2207           # either this is a new hypervisor, or its parameters have changed
2208           hv_class = hypervisor.GetHypervisor(hv_name)
2209           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2210           hv_class.CheckParameterSyntax(hv_params)
2211           _CheckHVParams(self, node_list, hv_name, hv_params)
2212
2213     if self.op.os_hvp:
2214       # no need to check any newly-enabled hypervisors, since the
2215       # defaults have already been checked in the above code-block
2216       for os_name, os_hvp in self.new_os_hvp.items():
2217         for hv_name, hv_params in os_hvp.items():
2218           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2219           # we need to fill in the new os_hvp on top of the actual hv_p
2220           cluster_defaults = self.new_hvparams.get(hv_name, {})
2221           new_osp = objects.FillDict(cluster_defaults, hv_params)
2222           hv_class = hypervisor.GetHypervisor(hv_name)
2223           hv_class.CheckParameterSyntax(new_osp)
2224           _CheckHVParams(self, node_list, hv_name, new_osp)
2225
2226
2227   def Exec(self, feedback_fn):
2228     """Change the parameters of the cluster.
2229
2230     """
2231     if self.op.vg_name is not None:
2232       new_volume = self.op.vg_name
2233       if not new_volume:
2234         new_volume = None
2235       if new_volume != self.cfg.GetVGName():
2236         self.cfg.SetVGName(new_volume)
2237       else:
2238         feedback_fn("Cluster LVM configuration already in desired"
2239                     " state, not changing")
2240     if self.op.hvparams:
2241       self.cluster.hvparams = self.new_hvparams
2242     if self.op.os_hvp:
2243       self.cluster.os_hvp = self.new_os_hvp
2244     if self.op.enabled_hypervisors is not None:
2245       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2246     if self.op.beparams:
2247       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2248     if self.op.nicparams:
2249       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2250
2251     if self.op.candidate_pool_size is not None:
2252       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2253       # we need to update the pool size here, otherwise the save will fail
2254       _AdjustCandidatePool(self, [])
2255
2256     self.cfg.Update(self.cluster, feedback_fn)
2257
2258
2259 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2260   """Distribute additional files which are part of the cluster configuration.
2261
2262   ConfigWriter takes care of distributing the config and ssconf files, but
2263   there are more files which should be distributed to all nodes. This function
2264   makes sure those are copied.
2265
2266   @param lu: calling logical unit
2267   @param additional_nodes: list of nodes not in the config to distribute to
2268
2269   """
2270   # 1. Gather target nodes
2271   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2272   dist_nodes = lu.cfg.GetOnlineNodeList()
2273   if additional_nodes is not None:
2274     dist_nodes.extend(additional_nodes)
2275   if myself.name in dist_nodes:
2276     dist_nodes.remove(myself.name)
2277
2278   # 2. Gather files to distribute
2279   dist_files = set([constants.ETC_HOSTS,
2280                     constants.SSH_KNOWN_HOSTS_FILE,
2281                     constants.RAPI_CERT_FILE,
2282                     constants.RAPI_USERS_FILE,
2283                     constants.HMAC_CLUSTER_KEY,
2284                    ])
2285
2286   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2287   for hv_name in enabled_hypervisors:
2288     hv_class = hypervisor.GetHypervisor(hv_name)
2289     dist_files.update(hv_class.GetAncillaryFiles())
2290
2291   # 3. Perform the files upload
2292   for fname in dist_files:
2293     if os.path.exists(fname):
2294       result = lu.rpc.call_upload_file(dist_nodes, fname)
2295       for to_node, to_result in result.items():
2296         msg = to_result.fail_msg
2297         if msg:
2298           msg = ("Copy of file %s to node %s failed: %s" %
2299                  (fname, to_node, msg))
2300           lu.proc.LogWarning(msg)
2301
2302
2303 class LURedistributeConfig(NoHooksLU):
2304   """Force the redistribution of cluster configuration.
2305
2306   This is a very simple LU.
2307
2308   """
2309   _OP_REQP = []
2310   REQ_BGL = False
2311
2312   def ExpandNames(self):
2313     self.needed_locks = {
2314       locking.LEVEL_NODE: locking.ALL_SET,
2315     }
2316     self.share_locks[locking.LEVEL_NODE] = 1
2317
2318   def CheckPrereq(self):
2319     """Check prerequisites.
2320
2321     """
2322
2323   def Exec(self, feedback_fn):
2324     """Redistribute the configuration.
2325
2326     """
2327     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2328     _RedistributeAncillaryFiles(self)
2329
2330
2331 def _WaitForSync(lu, instance, oneshot=False):
2332   """Sleep and poll for an instance's disk to sync.
2333
2334   """
2335   if not instance.disks:
2336     return True
2337
2338   if not oneshot:
2339     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2340
2341   node = instance.primary_node
2342
2343   for dev in instance.disks:
2344     lu.cfg.SetDiskID(dev, node)
2345
2346   # TODO: Convert to utils.Retry
2347
2348   retries = 0
2349   degr_retries = 10 # in seconds, as we sleep 1 second each time
2350   while True:
2351     max_time = 0
2352     done = True
2353     cumul_degraded = False
2354     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2355     msg = rstats.fail_msg
2356     if msg:
2357       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2358       retries += 1
2359       if retries >= 10:
2360         raise errors.RemoteError("Can't contact node %s for mirror data,"
2361                                  " aborting." % node)
2362       time.sleep(6)
2363       continue
2364     rstats = rstats.payload
2365     retries = 0
2366     for i, mstat in enumerate(rstats):
2367       if mstat is None:
2368         lu.LogWarning("Can't compute data for node %s/%s",
2369                            node, instance.disks[i].iv_name)
2370         continue
2371
2372       cumul_degraded = (cumul_degraded or
2373                         (mstat.is_degraded and mstat.sync_percent is None))
2374       if mstat.sync_percent is not None:
2375         done = False
2376         if mstat.estimated_time is not None:
2377           rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2378           max_time = mstat.estimated_time
2379         else:
2380           rem_time = "no time estimate"
2381         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2382                         (instance.disks[i].iv_name, mstat.sync_percent,
2383                          rem_time))
2384
2385     # if we're done but degraded, let's do a few small retries, to
2386     # make sure we see a stable and not transient situation; therefore
2387     # we force restart of the loop
2388     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2389       logging.info("Degraded disks found, %d retries left", degr_retries)
2390       degr_retries -= 1
2391       time.sleep(1)
2392       continue
2393
2394     if done or oneshot:
2395       break
2396
2397     time.sleep(min(60, max_time))
2398
2399   if done:
2400     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2401   return not cumul_degraded
2402
2403
2404 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2405   """Check that mirrors are not degraded.
2406
2407   The ldisk parameter, if True, will change the test from the
2408   is_degraded attribute (which represents overall non-ok status for
2409   the device(s)) to the ldisk (representing the local storage status).
2410
2411   """
2412   lu.cfg.SetDiskID(dev, node)
2413
2414   result = True
2415
2416   if on_primary or dev.AssembleOnSecondary():
2417     rstats = lu.rpc.call_blockdev_find(node, dev)
2418     msg = rstats.fail_msg
2419     if msg:
2420       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2421       result = False
2422     elif not rstats.payload:
2423       lu.LogWarning("Can't find disk on node %s", node)
2424       result = False
2425     else:
2426       if ldisk:
2427         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2428       else:
2429         result = result and not rstats.payload.is_degraded
2430
2431   if dev.children:
2432     for child in dev.children:
2433       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2434
2435   return result
2436
2437
2438 class LUDiagnoseOS(NoHooksLU):
2439   """Logical unit for OS diagnose/query.
2440
2441   """
2442   _OP_REQP = ["output_fields", "names"]
2443   REQ_BGL = False
2444   _FIELDS_STATIC = utils.FieldSet()
2445   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2446   # Fields that need calculation of global os validity
2447   _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2448
2449   def ExpandNames(self):
2450     if self.op.names:
2451       raise errors.OpPrereqError("Selective OS query not supported",
2452                                  errors.ECODE_INVAL)
2453
2454     _CheckOutputFields(static=self._FIELDS_STATIC,
2455                        dynamic=self._FIELDS_DYNAMIC,
2456                        selected=self.op.output_fields)
2457
2458     # Lock all nodes, in shared mode
2459     # Temporary removal of locks, should be reverted later
2460     # TODO: reintroduce locks when they are lighter-weight
2461     self.needed_locks = {}
2462     #self.share_locks[locking.LEVEL_NODE] = 1
2463     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2464
2465   def CheckPrereq(self):
2466     """Check prerequisites.
2467
2468     """
2469
2470   @staticmethod
2471   def _DiagnoseByOS(rlist):
2472     """Remaps a per-node return list into an a per-os per-node dictionary
2473
2474     @param rlist: a map with node names as keys and OS objects as values
2475
2476     @rtype: dict
2477     @return: a dictionary with osnames as keys and as value another map, with
2478         nodes as keys and tuples of (path, status, diagnose) as values, eg::
2479
2480           {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2481                                      (/srv/..., False, "invalid api")],
2482                            "node2": [(/srv/..., True, "")]}
2483           }
2484
2485     """
2486     all_os = {}
2487     # we build here the list of nodes that didn't fail the RPC (at RPC
2488     # level), so that nodes with a non-responding node daemon don't
2489     # make all OSes invalid
2490     good_nodes = [node_name for node_name in rlist
2491                   if not rlist[node_name].fail_msg]
2492     for node_name, nr in rlist.items():
2493       if nr.fail_msg or not nr.payload:
2494         continue
2495       for name, path, status, diagnose, variants in nr.payload:
2496         if name not in all_os:
2497           # build a list of nodes for this os containing empty lists
2498           # for each node in node_list
2499           all_os[name] = {}
2500           for nname in good_nodes:
2501             all_os[name][nname] = []
2502         all_os[name][node_name].append((path, status, diagnose, variants))
2503     return all_os
2504
2505   def Exec(self, feedback_fn):
2506     """Compute the list of OSes.
2507
2508     """
2509     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2510     node_data = self.rpc.call_os_diagnose(valid_nodes)
2511     pol = self._DiagnoseByOS(node_data)
2512     output = []
2513     calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2514     calc_variants = "variants" in self.op.output_fields
2515
2516     for os_name, os_data in pol.items():
2517       row = []
2518       if calc_valid:
2519         valid = True
2520         variants = None
2521         for osl in os_data.values():
2522           valid = valid and osl and osl[0][1]
2523           if not valid:
2524             variants = None
2525             break
2526           if calc_variants:
2527             node_variants = osl[0][3]
2528             if variants is None:
2529               variants = node_variants
2530             else:
2531               variants = [v for v in variants if v in node_variants]
2532
2533       for field in self.op.output_fields:
2534         if field == "name":
2535           val = os_name
2536         elif field == "valid":
2537           val = valid
2538         elif field == "node_status":
2539           # this is just a copy of the dict
2540           val = {}
2541           for node_name, nos_list in os_data.items():
2542             val[node_name] = nos_list
2543         elif field == "variants":
2544           val =  variants
2545         else:
2546           raise errors.ParameterError(field)
2547         row.append(val)
2548       output.append(row)
2549
2550     return output
2551
2552
2553 class LURemoveNode(LogicalUnit):
2554   """Logical unit for removing a node.
2555
2556   """
2557   HPATH = "node-remove"
2558   HTYPE = constants.HTYPE_NODE
2559   _OP_REQP = ["node_name"]
2560
2561   def BuildHooksEnv(self):
2562     """Build hooks env.
2563
2564     This doesn't run on the target node in the pre phase as a failed
2565     node would then be impossible to remove.
2566
2567     """
2568     env = {
2569       "OP_TARGET": self.op.node_name,
2570       "NODE_NAME": self.op.node_name,
2571       }
2572     all_nodes = self.cfg.GetNodeList()
2573     try:
2574       all_nodes.remove(self.op.node_name)
2575     except ValueError:
2576       logging.warning("Node %s which is about to be removed not found"
2577                       " in the all nodes list", self.op.node_name)
2578     return env, all_nodes, all_nodes
2579
2580   def CheckPrereq(self):
2581     """Check prerequisites.
2582
2583     This checks:
2584      - the node exists in the configuration
2585      - it does not have primary or secondary instances
2586      - it's not the master
2587
2588     Any errors are signaled by raising errors.OpPrereqError.
2589
2590     """
2591     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2592     node = self.cfg.GetNodeInfo(self.op.node_name)
2593     assert node is not None
2594
2595     instance_list = self.cfg.GetInstanceList()
2596
2597     masternode = self.cfg.GetMasterNode()
2598     if node.name == masternode:
2599       raise errors.OpPrereqError("Node is the master node,"
2600                                  " you need to failover first.",
2601                                  errors.ECODE_INVAL)
2602
2603     for instance_name in instance_list:
2604       instance = self.cfg.GetInstanceInfo(instance_name)
2605       if node.name in instance.all_nodes:
2606         raise errors.OpPrereqError("Instance %s is still running on the node,"
2607                                    " please remove first." % instance_name,
2608                                    errors.ECODE_INVAL)
2609     self.op.node_name = node.name
2610     self.node = node
2611
2612   def Exec(self, feedback_fn):
2613     """Removes the node from the cluster.
2614
2615     """
2616     node = self.node
2617     logging.info("Stopping the node daemon and removing configs from node %s",
2618                  node.name)
2619
2620     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2621
2622     # Promote nodes to master candidate as needed
2623     _AdjustCandidatePool(self, exceptions=[node.name])
2624     self.context.RemoveNode(node.name)
2625
2626     # Run post hooks on the node before it's removed
2627     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2628     try:
2629       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2630     except:
2631       # pylint: disable-msg=W0702
2632       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2633
2634     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2635     msg = result.fail_msg
2636     if msg:
2637       self.LogWarning("Errors encountered on the remote node while leaving"
2638                       " the cluster: %s", msg)
2639
2640
2641 class LUQueryNodes(NoHooksLU):
2642   """Logical unit for querying nodes.
2643
2644   """
2645   # pylint: disable-msg=W0142
2646   _OP_REQP = ["output_fields", "names", "use_locking"]
2647   REQ_BGL = False
2648
2649   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2650                     "master_candidate", "offline", "drained"]
2651
2652   _FIELDS_DYNAMIC = utils.FieldSet(
2653     "dtotal", "dfree",
2654     "mtotal", "mnode", "mfree",
2655     "bootid",
2656     "ctotal", "cnodes", "csockets",
2657     )
2658
2659   _FIELDS_STATIC = utils.FieldSet(*[
2660     "pinst_cnt", "sinst_cnt",
2661     "pinst_list", "sinst_list",
2662     "pip", "sip", "tags",
2663     "master",
2664     "role"] + _SIMPLE_FIELDS
2665     )
2666
2667   def ExpandNames(self):
2668     _CheckOutputFields(static=self._FIELDS_STATIC,
2669                        dynamic=self._FIELDS_DYNAMIC,
2670                        selected=self.op.output_fields)
2671
2672     self.needed_locks = {}
2673     self.share_locks[locking.LEVEL_NODE] = 1
2674
2675     if self.op.names:
2676       self.wanted = _GetWantedNodes(self, self.op.names)
2677     else:
2678       self.wanted = locking.ALL_SET
2679
2680     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2681     self.do_locking = self.do_node_query and self.op.use_locking
2682     if self.do_locking:
2683       # if we don't request only static fields, we need to lock the nodes
2684       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2685
2686   def CheckPrereq(self):
2687     """Check prerequisites.
2688
2689     """
2690     # The validation of the node list is done in the _GetWantedNodes,
2691     # if non empty, and if empty, there's no validation to do
2692     pass
2693
2694   def Exec(self, feedback_fn):
2695     """Computes the list of nodes and their attributes.
2696
2697     """
2698     all_info = self.cfg.GetAllNodesInfo()
2699     if self.do_locking:
2700       nodenames = self.acquired_locks[locking.LEVEL_NODE]
2701     elif self.wanted != locking.ALL_SET:
2702       nodenames = self.wanted
2703       missing = set(nodenames).difference(all_info.keys())
2704       if missing:
2705         raise errors.OpExecError(
2706           "Some nodes were removed before retrieving their data: %s" % missing)
2707     else:
2708       nodenames = all_info.keys()
2709
2710     nodenames = utils.NiceSort(nodenames)
2711     nodelist = [all_info[name] for name in nodenames]
2712
2713     # begin data gathering
2714
2715     if self.do_node_query:
2716       live_data = {}
2717       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2718                                           self.cfg.GetHypervisorType())
2719       for name in nodenames:
2720         nodeinfo = node_data[name]
2721         if not nodeinfo.fail_msg and nodeinfo.payload:
2722           nodeinfo = nodeinfo.payload
2723           fn = utils.TryConvert
2724           live_data[name] = {
2725             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2726             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2727             "mfree": fn(int, nodeinfo.get('memory_free', None)),
2728             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2729             "dfree": fn(int, nodeinfo.get('vg_free', None)),
2730             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2731             "bootid": nodeinfo.get('bootid', None),
2732             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2733             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2734             }
2735         else:
2736           live_data[name] = {}
2737     else:
2738       live_data = dict.fromkeys(nodenames, {})
2739
2740     node_to_primary = dict([(name, set()) for name in nodenames])
2741     node_to_secondary = dict([(name, set()) for name in nodenames])
2742
2743     inst_fields = frozenset(("pinst_cnt", "pinst_list",
2744                              "sinst_cnt", "sinst_list"))
2745     if inst_fields & frozenset(self.op.output_fields):
2746       inst_data = self.cfg.GetAllInstancesInfo()
2747
2748       for inst in inst_data.values():
2749         if inst.primary_node in node_to_primary:
2750           node_to_primary[inst.primary_node].add(inst.name)
2751         for secnode in inst.secondary_nodes:
2752           if secnode in node_to_secondary:
2753             node_to_secondary[secnode].add(inst.name)
2754
2755     master_node = self.cfg.GetMasterNode()
2756
2757     # end data gathering
2758
2759     output = []
2760     for node in nodelist:
2761       node_output = []
2762       for field in self.op.output_fields:
2763         if field in self._SIMPLE_FIELDS:
2764           val = getattr(node, field)
2765         elif field == "pinst_list":
2766           val = list(node_to_primary[node.name])
2767         elif field == "sinst_list":
2768           val = list(node_to_secondary[node.name])
2769         elif field == "pinst_cnt":
2770           val = len(node_to_primary[node.name])
2771         elif field == "sinst_cnt":
2772           val = len(node_to_secondary[node.name])
2773         elif field == "pip":
2774           val = node.primary_ip
2775         elif field == "sip":
2776           val = node.secondary_ip
2777         elif field == "tags":
2778           val = list(node.GetTags())
2779         elif field == "master":
2780           val = node.name == master_node
2781         elif self._FIELDS_DYNAMIC.Matches(field):
2782           val = live_data[node.name].get(field, None)
2783         elif field == "role":
2784           if node.name == master_node:
2785             val = "M"
2786           elif node.master_candidate:
2787             val = "C"
2788           elif node.drained:
2789             val = "D"
2790           elif node.offline:
2791             val = "O"
2792           else:
2793             val = "R"
2794         else:
2795           raise errors.ParameterError(field)
2796         node_output.append(val)
2797       output.append(node_output)
2798
2799     return output
2800
2801
2802 class LUQueryNodeVolumes(NoHooksLU):
2803   """Logical unit for getting volumes on node(s).
2804
2805   """
2806   _OP_REQP = ["nodes", "output_fields"]
2807   REQ_BGL = False
2808   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2809   _FIELDS_STATIC = utils.FieldSet("node")
2810
2811   def ExpandNames(self):
2812     _CheckOutputFields(static=self._FIELDS_STATIC,
2813                        dynamic=self._FIELDS_DYNAMIC,
2814                        selected=self.op.output_fields)
2815
2816     self.needed_locks = {}
2817     self.share_locks[locking.LEVEL_NODE] = 1
2818     if not self.op.nodes:
2819       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2820     else:
2821       self.needed_locks[locking.LEVEL_NODE] = \
2822         _GetWantedNodes(self, self.op.nodes)
2823
2824   def CheckPrereq(self):
2825     """Check prerequisites.
2826
2827     This checks that the fields required are valid output fields.
2828
2829     """
2830     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2831
2832   def Exec(self, feedback_fn):
2833     """Computes the list of nodes and their attributes.
2834
2835     """
2836     nodenames = self.nodes
2837     volumes = self.rpc.call_node_volumes(nodenames)
2838
2839     ilist = [self.cfg.GetInstanceInfo(iname) for iname
2840              in self.cfg.GetInstanceList()]
2841
2842     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2843
2844     output = []
2845     for node in nodenames:
2846       nresult = volumes[node]
2847       if nresult.offline:
2848         continue
2849       msg = nresult.fail_msg
2850       if msg:
2851         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2852         continue
2853
2854       node_vols = nresult.payload[:]
2855       node_vols.sort(key=lambda vol: vol['dev'])
2856
2857       for vol in node_vols:
2858         node_output = []
2859         for field in self.op.output_fields:
2860           if field == "node":
2861             val = node
2862           elif field == "phys":
2863             val = vol['dev']
2864           elif field == "vg":
2865             val = vol['vg']
2866           elif field == "name":
2867             val = vol['name']
2868           elif field == "size":
2869             val = int(float(vol['size']))
2870           elif field == "instance":
2871             for inst in ilist:
2872               if node not in lv_by_node[inst]:
2873                 continue
2874               if vol['name'] in lv_by_node[inst][node]:
2875                 val = inst.name
2876                 break
2877             else:
2878               val = '-'
2879           else:
2880             raise errors.ParameterError(field)
2881           node_output.append(str(val))
2882
2883         output.append(node_output)
2884
2885     return output
2886
2887
2888 class LUQueryNodeStorage(NoHooksLU):
2889   """Logical unit for getting information on storage units on node(s).
2890
2891   """
2892   _OP_REQP = ["nodes", "storage_type", "output_fields"]
2893   REQ_BGL = False
2894   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2895
2896   def ExpandNames(self):
2897     storage_type = self.op.storage_type
2898
2899     if storage_type not in constants.VALID_STORAGE_TYPES:
2900       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2901                                  errors.ECODE_INVAL)
2902
2903     _CheckOutputFields(static=self._FIELDS_STATIC,
2904                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2905                        selected=self.op.output_fields)
2906
2907     self.needed_locks = {}
2908     self.share_locks[locking.LEVEL_NODE] = 1
2909
2910     if self.op.nodes:
2911       self.needed_locks[locking.LEVEL_NODE] = \
2912         _GetWantedNodes(self, self.op.nodes)
2913     else:
2914       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2915
2916   def CheckPrereq(self):
2917     """Check prerequisites.
2918
2919     This checks that the fields required are valid output fields.
2920
2921     """
2922     self.op.name = getattr(self.op, "name", None)
2923
2924     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2925
2926   def Exec(self, feedback_fn):
2927     """Computes the list of nodes and their attributes.
2928
2929     """
2930     # Always get name to sort by
2931     if constants.SF_NAME in self.op.output_fields:
2932       fields = self.op.output_fields[:]
2933     else:
2934       fields = [constants.SF_NAME] + self.op.output_fields
2935
2936     # Never ask for node or type as it's only known to the LU
2937     for extra in [constants.SF_NODE, constants.SF_TYPE]:
2938       while extra in fields:
2939         fields.remove(extra)
2940
2941     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2942     name_idx = field_idx[constants.SF_NAME]
2943
2944     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2945     data = self.rpc.call_storage_list(self.nodes,
2946                                       self.op.storage_type, st_args,
2947                                       self.op.name, fields)
2948
2949     result = []
2950
2951     for node in utils.NiceSort(self.nodes):
2952       nresult = data[node]
2953       if nresult.offline:
2954         continue
2955
2956       msg = nresult.fail_msg
2957       if msg:
2958         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2959         continue
2960
2961       rows = dict([(row[name_idx], row) for row in nresult.payload])
2962
2963       for name in utils.NiceSort(rows.keys()):
2964         row = rows[name]
2965
2966         out = []
2967
2968         for field in self.op.output_fields:
2969           if field == constants.SF_NODE:
2970             val = node
2971           elif field == constants.SF_TYPE:
2972             val = self.op.storage_type
2973           elif field in field_idx:
2974             val = row[field_idx[field]]
2975           else:
2976             raise errors.ParameterError(field)
2977
2978           out.append(val)
2979
2980         result.append(out)
2981
2982     return result
2983
2984
2985 class LUModifyNodeStorage(NoHooksLU):
2986   """Logical unit for modifying a storage volume on a node.
2987
2988   """
2989   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2990   REQ_BGL = False
2991
2992   def CheckArguments(self):
2993     self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
2994
2995     storage_type = self.op.storage_type
2996     if storage_type not in constants.VALID_STORAGE_TYPES:
2997       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2998                                  errors.ECODE_INVAL)
2999
3000   def ExpandNames(self):
3001     self.needed_locks = {
3002       locking.LEVEL_NODE: self.op.node_name,
3003       }
3004
3005   def CheckPrereq(self):
3006     """Check prerequisites.
3007
3008     """
3009     storage_type = self.op.storage_type
3010
3011     try:
3012       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3013     except KeyError:
3014       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3015                                  " modified" % storage_type,
3016                                  errors.ECODE_INVAL)
3017
3018     diff = set(self.op.changes.keys()) - modifiable
3019     if diff:
3020       raise errors.OpPrereqError("The following fields can not be modified for"
3021                                  " storage units of type '%s': %r" %
3022                                  (storage_type, list(diff)),
3023                                  errors.ECODE_INVAL)
3024
3025   def Exec(self, feedback_fn):
3026     """Computes the list of nodes and their attributes.
3027
3028     """
3029     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3030     result = self.rpc.call_storage_modify(self.op.node_name,
3031                                           self.op.storage_type, st_args,
3032                                           self.op.name, self.op.changes)
3033     result.Raise("Failed to modify storage unit '%s' on %s" %
3034                  (self.op.name, self.op.node_name))
3035
3036
3037 class LUAddNode(LogicalUnit):
3038   """Logical unit for adding node to the cluster.
3039
3040   """
3041   HPATH = "node-add"
3042   HTYPE = constants.HTYPE_NODE
3043   _OP_REQP = ["node_name"]
3044
3045   def CheckArguments(self):
3046     # validate/normalize the node name
3047     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3048
3049   def BuildHooksEnv(self):
3050     """Build hooks env.
3051
3052     This will run on all nodes before, and on all nodes + the new node after.
3053
3054     """
3055     env = {
3056       "OP_TARGET": self.op.node_name,
3057       "NODE_NAME": self.op.node_name,
3058       "NODE_PIP": self.op.primary_ip,
3059       "NODE_SIP": self.op.secondary_ip,
3060       }
3061     nodes_0 = self.cfg.GetNodeList()
3062     nodes_1 = nodes_0 + [self.op.node_name, ]
3063     return env, nodes_0, nodes_1
3064
3065   def CheckPrereq(self):
3066     """Check prerequisites.
3067
3068     This checks:
3069      - the new node is not already in the config
3070      - it is resolvable
3071      - its parameters (single/dual homed) matches the cluster
3072
3073     Any errors are signaled by raising errors.OpPrereqError.
3074
3075     """
3076     node_name = self.op.node_name
3077     cfg = self.cfg
3078
3079     dns_data = utils.GetHostInfo(node_name)
3080
3081     node = dns_data.name
3082     primary_ip = self.op.primary_ip = dns_data.ip
3083     secondary_ip = getattr(self.op, "secondary_ip", None)
3084     if secondary_ip is None:
3085       secondary_ip = primary_ip
3086     if not utils.IsValidIP(secondary_ip):
3087       raise errors.OpPrereqError("Invalid secondary IP given",
3088                                  errors.ECODE_INVAL)
3089     self.op.secondary_ip = secondary_ip
3090
3091     node_list = cfg.GetNodeList()
3092     if not self.op.readd and node in node_list:
3093       raise errors.OpPrereqError("Node %s is already in the configuration" %
3094                                  node, errors.ECODE_EXISTS)
3095     elif self.op.readd and node not in node_list:
3096       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3097                                  errors.ECODE_NOENT)
3098
3099     for existing_node_name in node_list:
3100       existing_node = cfg.GetNodeInfo(existing_node_name)
3101
3102       if self.op.readd and node == existing_node_name:
3103         if (existing_node.primary_ip != primary_ip or
3104             existing_node.secondary_ip != secondary_ip):
3105           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3106                                      " address configuration as before",
3107                                      errors.ECODE_INVAL)
3108         continue
3109
3110       if (existing_node.primary_ip == primary_ip or
3111           existing_node.secondary_ip == primary_ip or
3112           existing_node.primary_ip == secondary_ip or
3113           existing_node.secondary_ip == secondary_ip):
3114         raise errors.OpPrereqError("New node ip address(es) conflict with"
3115                                    " existing node %s" % existing_node.name,
3116                                    errors.ECODE_NOTUNIQUE)
3117
3118     # check that the type of the node (single versus dual homed) is the
3119     # same as for the master
3120     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3121     master_singlehomed = myself.secondary_ip == myself.primary_ip
3122     newbie_singlehomed = secondary_ip == primary_ip
3123     if master_singlehomed != newbie_singlehomed:
3124       if master_singlehomed:
3125         raise errors.OpPrereqError("The master has no private ip but the"
3126                                    " new node has one",
3127                                    errors.ECODE_INVAL)
3128       else:
3129         raise errors.OpPrereqError("The master has a private ip but the"
3130                                    " new node doesn't have one",
3131                                    errors.ECODE_INVAL)
3132
3133     # checks reachability
3134     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3135       raise errors.OpPrereqError("Node not reachable by ping",
3136                                  errors.ECODE_ENVIRON)
3137
3138     if not newbie_singlehomed:
3139       # check reachability from my secondary ip to newbie's secondary ip
3140       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3141                            source=myself.secondary_ip):
3142         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3143                                    " based ping to noded port",
3144                                    errors.ECODE_ENVIRON)
3145
3146     if self.op.readd:
3147       exceptions = [node]
3148     else:
3149       exceptions = []
3150
3151     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3152
3153     if self.op.readd:
3154       self.new_node = self.cfg.GetNodeInfo(node)
3155       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3156     else:
3157       self.new_node = objects.Node(name=node,
3158                                    primary_ip=primary_ip,
3159                                    secondary_ip=secondary_ip,
3160                                    master_candidate=self.master_candidate,
3161                                    offline=False, drained=False)
3162
3163   def Exec(self, feedback_fn):
3164     """Adds the new node to the cluster.
3165
3166     """
3167     new_node = self.new_node
3168     node = new_node.name
3169
3170     # for re-adds, reset the offline/drained/master-candidate flags;
3171     # we need to reset here, otherwise offline would prevent RPC calls
3172     # later in the procedure; this also means that if the re-add
3173     # fails, we are left with a non-offlined, broken node
3174     if self.op.readd:
3175       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3176       self.LogInfo("Readding a node, the offline/drained flags were reset")
3177       # if we demote the node, we do cleanup later in the procedure
3178       new_node.master_candidate = self.master_candidate
3179
3180     # notify the user about any possible mc promotion
3181     if new_node.master_candidate:
3182       self.LogInfo("Node will be a master candidate")
3183
3184     # check connectivity
3185     result = self.rpc.call_version([node])[node]
3186     result.Raise("Can't get version information from node %s" % node)
3187     if constants.PROTOCOL_VERSION == result.payload:
3188       logging.info("Communication to node %s fine, sw version %s match",
3189                    node, result.payload)
3190     else:
3191       raise errors.OpExecError("Version mismatch master version %s,"
3192                                " node version %s" %
3193                                (constants.PROTOCOL_VERSION, result.payload))
3194
3195     # setup ssh on node
3196     if self.cfg.GetClusterInfo().modify_ssh_setup:
3197       logging.info("Copy ssh key to node %s", node)
3198       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3199       keyarray = []
3200       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3201                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3202                   priv_key, pub_key]
3203
3204       for i in keyfiles:
3205         keyarray.append(utils.ReadFile(i))
3206
3207       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3208                                       keyarray[2], keyarray[3], keyarray[4],
3209                                       keyarray[5])
3210       result.Raise("Cannot transfer ssh keys to the new node")
3211
3212     # Add node to our /etc/hosts, and add key to known_hosts
3213     if self.cfg.GetClusterInfo().modify_etc_hosts:
3214       utils.AddHostToEtcHosts(new_node.name)
3215
3216     if new_node.secondary_ip != new_node.primary_ip:
3217       result = self.rpc.call_node_has_ip_address(new_node.name,
3218                                                  new_node.secondary_ip)
3219       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3220                    prereq=True, ecode=errors.ECODE_ENVIRON)
3221       if not result.payload:
3222         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3223                                  " you gave (%s). Please fix and re-run this"
3224                                  " command." % new_node.secondary_ip)
3225
3226     node_verify_list = [self.cfg.GetMasterNode()]
3227     node_verify_param = {
3228       constants.NV_NODELIST: [node],
3229       # TODO: do a node-net-test as well?
3230     }
3231
3232     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3233                                        self.cfg.GetClusterName())
3234     for verifier in node_verify_list:
3235       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3236       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3237       if nl_payload:
3238         for failed in nl_payload:
3239           feedback_fn("ssh/hostname verification failed"
3240                       " (checking from %s): %s" %
3241                       (verifier, nl_payload[failed]))
3242         raise errors.OpExecError("ssh/hostname verification failed.")
3243
3244     if self.op.readd:
3245       _RedistributeAncillaryFiles(self)
3246       self.context.ReaddNode(new_node)
3247       # make sure we redistribute the config
3248       self.cfg.Update(new_node, feedback_fn)
3249       # and make sure the new node will not have old files around
3250       if not new_node.master_candidate:
3251         result = self.rpc.call_node_demote_from_mc(new_node.name)
3252         msg = result.fail_msg
3253         if msg:
3254           self.LogWarning("Node failed to demote itself from master"
3255                           " candidate status: %s" % msg)
3256     else:
3257       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3258       self.context.AddNode(new_node, self.proc.GetECId())
3259
3260
3261 class LUSetNodeParams(LogicalUnit):
3262   """Modifies the parameters of a node.
3263
3264   """
3265   HPATH = "node-modify"
3266   HTYPE = constants.HTYPE_NODE
3267   _OP_REQP = ["node_name"]
3268   REQ_BGL = False
3269
3270   def CheckArguments(self):
3271     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3272     _CheckBooleanOpField(self.op, 'master_candidate')
3273     _CheckBooleanOpField(self.op, 'offline')
3274     _CheckBooleanOpField(self.op, 'drained')
3275     _CheckBooleanOpField(self.op, 'auto_promote')
3276     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3277     if all_mods.count(None) == 3:
3278       raise errors.OpPrereqError("Please pass at least one modification",
3279                                  errors.ECODE_INVAL)
3280     if all_mods.count(True) > 1:
3281       raise errors.OpPrereqError("Can't set the node into more than one"
3282                                  " state at the same time",
3283                                  errors.ECODE_INVAL)
3284
3285     # Boolean value that tells us whether we're offlining or draining the node
3286     self.offline_or_drain = (self.op.offline == True or
3287                              self.op.drained == True)
3288     self.deoffline_or_drain = (self.op.offline == False or
3289                                self.op.drained == False)
3290     self.might_demote = (self.op.master_candidate == False or
3291                          self.offline_or_drain)
3292
3293     self.lock_all = self.op.auto_promote and self.might_demote
3294
3295
3296   def ExpandNames(self):
3297     if self.lock_all:
3298       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3299     else:
3300       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3301
3302   def BuildHooksEnv(self):
3303     """Build hooks env.
3304
3305     This runs on the master node.
3306
3307     """
3308     env = {
3309       "OP_TARGET": self.op.node_name,
3310       "MASTER_CANDIDATE": str(self.op.master_candidate),
3311       "OFFLINE": str(self.op.offline),
3312       "DRAINED": str(self.op.drained),
3313       }
3314     nl = [self.cfg.GetMasterNode(),
3315           self.op.node_name]
3316     return env, nl, nl
3317
3318   def CheckPrereq(self):
3319     """Check prerequisites.
3320
3321     This only checks the instance list against the existing names.
3322
3323     """
3324     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3325
3326     if (self.op.master_candidate is not None or
3327         self.op.drained is not None or
3328         self.op.offline is not None):
3329       # we can't change the master's node flags
3330       if self.op.node_name == self.cfg.GetMasterNode():
3331         raise errors.OpPrereqError("The master role can be changed"
3332                                    " only via masterfailover",
3333                                    errors.ECODE_INVAL)
3334
3335
3336     if node.master_candidate and self.might_demote and not self.lock_all:
3337       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3338       # check if after removing the current node, we're missing master
3339       # candidates
3340       (mc_remaining, mc_should, _) = \
3341           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3342       if mc_remaining != mc_should:
3343         raise errors.OpPrereqError("Not enough master candidates, please"
3344                                    " pass auto_promote to allow promotion",
3345                                    errors.ECODE_INVAL)
3346
3347     if (self.op.master_candidate == True and
3348         ((node.offline and not self.op.offline == False) or
3349          (node.drained and not self.op.drained == False))):
3350       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3351                                  " to master_candidate" % node.name,
3352                                  errors.ECODE_INVAL)
3353
3354     # If we're being deofflined/drained, we'll MC ourself if needed
3355     if (self.deoffline_or_drain and not self.offline_or_drain and not
3356         self.op.master_candidate == True and not node.master_candidate):
3357       self.op.master_candidate = _DecideSelfPromotion(self)
3358       if self.op.master_candidate:
3359         self.LogInfo("Autopromoting node to master candidate")
3360
3361     return
3362
3363   def Exec(self, feedback_fn):
3364     """Modifies a node.
3365
3366     """
3367     node = self.node
3368
3369     result = []
3370     changed_mc = False
3371
3372     if self.op.offline is not None:
3373       node.offline = self.op.offline
3374       result.append(("offline", str(self.op.offline)))
3375       if self.op.offline == True:
3376         if node.master_candidate:
3377           node.master_candidate = False
3378           changed_mc = True
3379           result.append(("master_candidate", "auto-demotion due to offline"))
3380         if node.drained:
3381           node.drained = False
3382           result.append(("drained", "clear drained status due to offline"))
3383
3384     if self.op.master_candidate is not None:
3385       node.master_candidate = self.op.master_candidate
3386       changed_mc = True
3387       result.append(("master_candidate", str(self.op.master_candidate)))
3388       if self.op.master_candidate == False:
3389         rrc = self.rpc.call_node_demote_from_mc(node.name)
3390         msg = rrc.fail_msg
3391         if msg:
3392           self.LogWarning("Node failed to demote itself: %s" % msg)
3393
3394     if self.op.drained is not None:
3395       node.drained = self.op.drained
3396       result.append(("drained", str(self.op.drained)))
3397       if self.op.drained == True:
3398         if node.master_candidate:
3399           node.master_candidate = False
3400           changed_mc = True
3401           result.append(("master_candidate", "auto-demotion due to drain"))
3402           rrc = self.rpc.call_node_demote_from_mc(node.name)
3403           msg = rrc.fail_msg
3404           if msg:
3405             self.LogWarning("Node failed to demote itself: %s" % msg)
3406         if node.offline:
3407           node.offline = False
3408           result.append(("offline", "clear offline status due to drain"))
3409
3410     # we locked all nodes, we adjust the CP before updating this node
3411     if self.lock_all:
3412       _AdjustCandidatePool(self, [node.name])
3413
3414     # this will trigger configuration file update, if needed
3415     self.cfg.Update(node, feedback_fn)
3416
3417     # this will trigger job queue propagation or cleanup
3418     if changed_mc:
3419       self.context.ReaddNode(node)
3420
3421     return result
3422
3423
3424 class LUPowercycleNode(NoHooksLU):
3425   """Powercycles a node.
3426
3427   """
3428   _OP_REQP = ["node_name", "force"]
3429   REQ_BGL = False
3430
3431   def CheckArguments(self):
3432     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3433     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3434       raise errors.OpPrereqError("The node is the master and the force"
3435                                  " parameter was not set",
3436                                  errors.ECODE_INVAL)
3437
3438   def ExpandNames(self):
3439     """Locking for PowercycleNode.
3440
3441     This is a last-resort option and shouldn't block on other
3442     jobs. Therefore, we grab no locks.
3443
3444     """
3445     self.needed_locks = {}
3446
3447   def CheckPrereq(self):
3448     """Check prerequisites.
3449
3450     This LU has no prereqs.
3451
3452     """
3453     pass
3454
3455   def Exec(self, feedback_fn):
3456     """Reboots a node.
3457
3458     """
3459     result = self.rpc.call_node_powercycle(self.op.node_name,
3460                                            self.cfg.GetHypervisorType())
3461     result.Raise("Failed to schedule the reboot")
3462     return result.payload
3463
3464
3465 class LUQueryClusterInfo(NoHooksLU):
3466   """Query cluster configuration.
3467
3468   """
3469   _OP_REQP = []
3470   REQ_BGL = False
3471
3472   def ExpandNames(self):
3473     self.needed_locks = {}
3474
3475   def CheckPrereq(self):
3476     """No prerequsites needed for this LU.
3477
3478     """
3479     pass
3480
3481   def Exec(self, feedback_fn):
3482     """Return cluster config.
3483
3484     """
3485     cluster = self.cfg.GetClusterInfo()
3486     os_hvp = {}
3487
3488     # Filter just for enabled hypervisors
3489     for os_name, hv_dict in cluster.os_hvp.items():
3490       os_hvp[os_name] = {}
3491       for hv_name, hv_params in hv_dict.items():
3492         if hv_name in cluster.enabled_hypervisors:
3493           os_hvp[os_name][hv_name] = hv_params
3494
3495     result = {
3496       "software_version": constants.RELEASE_VERSION,
3497       "protocol_version": constants.PROTOCOL_VERSION,
3498       "config_version": constants.CONFIG_VERSION,
3499       "os_api_version": max(constants.OS_API_VERSIONS),
3500       "export_version": constants.EXPORT_VERSION,
3501       "architecture": (platform.architecture()[0], platform.machine()),
3502       "name": cluster.cluster_name,
3503       "master": cluster.master_node,
3504       "default_hypervisor": cluster.enabled_hypervisors[0],
3505       "enabled_hypervisors": cluster.enabled_hypervisors,
3506       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3507                         for hypervisor_name in cluster.enabled_hypervisors]),
3508       "os_hvp": os_hvp,
3509       "beparams": cluster.beparams,
3510       "nicparams": cluster.nicparams,
3511       "candidate_pool_size": cluster.candidate_pool_size,
3512       "master_netdev": cluster.master_netdev,
3513       "volume_group_name": cluster.volume_group_name,
3514       "file_storage_dir": cluster.file_storage_dir,
3515       "ctime": cluster.ctime,
3516       "mtime": cluster.mtime,
3517       "uuid": cluster.uuid,
3518       "tags": list(cluster.GetTags()),
3519       }
3520
3521     return result
3522
3523
3524 class LUQueryConfigValues(NoHooksLU):
3525   """Return configuration values.
3526
3527   """
3528   _OP_REQP = []
3529   REQ_BGL = False
3530   _FIELDS_DYNAMIC = utils.FieldSet()
3531   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3532                                   "watcher_pause")
3533
3534   def ExpandNames(self):
3535     self.needed_locks = {}
3536
3537     _CheckOutputFields(static=self._FIELDS_STATIC,
3538                        dynamic=self._FIELDS_DYNAMIC,
3539                        selected=self.op.output_fields)
3540
3541   def CheckPrereq(self):
3542     """No prerequisites.
3543
3544     """
3545     pass
3546
3547   def Exec(self, feedback_fn):
3548     """Dump a representation of the cluster config to the standard output.
3549
3550     """
3551     values = []
3552     for field in self.op.output_fields:
3553       if field == "cluster_name":
3554         entry = self.cfg.GetClusterName()
3555       elif field == "master_node":
3556         entry = self.cfg.GetMasterNode()
3557       elif field == "drain_flag":
3558         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3559       elif field == "watcher_pause":
3560         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3561       else:
3562         raise errors.ParameterError(field)
3563       values.append(entry)
3564     return values
3565
3566
3567 class LUActivateInstanceDisks(NoHooksLU):
3568   """Bring up an instance's disks.
3569
3570   """
3571   _OP_REQP = ["instance_name"]
3572   REQ_BGL = False
3573
3574   def ExpandNames(self):
3575     self._ExpandAndLockInstance()
3576     self.needed_locks[locking.LEVEL_NODE] = []
3577     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3578
3579   def DeclareLocks(self, level):
3580     if level == locking.LEVEL_NODE:
3581       self._LockInstancesNodes()
3582
3583   def CheckPrereq(self):
3584     """Check prerequisites.
3585
3586     This checks that the instance is in the cluster.
3587
3588     """
3589     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3590     assert self.instance is not None, \
3591       "Cannot retrieve locked instance %s" % self.op.instance_name
3592     _CheckNodeOnline(self, self.instance.primary_node)
3593     if not hasattr(self.op, "ignore_size"):
3594       self.op.ignore_size = False
3595
3596   def Exec(self, feedback_fn):
3597     """Activate the disks.
3598
3599     """
3600     disks_ok, disks_info = \
3601               _AssembleInstanceDisks(self, self.instance,
3602                                      ignore_size=self.op.ignore_size)
3603     if not disks_ok:
3604       raise errors.OpExecError("Cannot activate block devices")
3605
3606     return disks_info
3607
3608
3609 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3610                            ignore_size=False):
3611   """Prepare the block devices for an instance.
3612
3613   This sets up the block devices on all nodes.
3614
3615   @type lu: L{LogicalUnit}
3616   @param lu: the logical unit on whose behalf we execute
3617   @type instance: L{objects.Instance}
3618   @param instance: the instance for whose disks we assemble
3619   @type ignore_secondaries: boolean
3620   @param ignore_secondaries: if true, errors on secondary nodes
3621       won't result in an error return from the function
3622   @type ignore_size: boolean
3623   @param ignore_size: if true, the current known size of the disk
3624       will not be used during the disk activation, useful for cases
3625       when the size is wrong
3626   @return: False if the operation failed, otherwise a list of
3627       (host, instance_visible_name, node_visible_name)
3628       with the mapping from node devices to instance devices
3629
3630   """
3631   device_info = []
3632   disks_ok = True
3633   iname = instance.name
3634   # With the two passes mechanism we try to reduce the window of
3635   # opportunity for the race condition of switching DRBD to primary
3636   # before handshaking occured, but we do not eliminate it
3637
3638   # The proper fix would be to wait (with some limits) until the
3639   # connection has been made and drbd transitions from WFConnection
3640   # into any other network-connected state (Connected, SyncTarget,
3641   # SyncSource, etc.)
3642
3643   # 1st pass, assemble on all nodes in secondary mode
3644   for inst_disk in instance.disks:
3645     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3646       if ignore_size:
3647         node_disk = node_disk.Copy()
3648         node_disk.UnsetSize()
3649       lu.cfg.SetDiskID(node_disk, node)
3650       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3651       msg = result.fail_msg
3652       if msg:
3653         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3654                            " (is_primary=False, pass=1): %s",
3655                            inst_disk.iv_name, node, msg)
3656         if not ignore_secondaries:
3657           disks_ok = False
3658
3659   # FIXME: race condition on drbd migration to primary
3660
3661   # 2nd pass, do only the primary node
3662   for inst_disk in instance.disks:
3663     dev_path = None
3664
3665     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3666       if node != instance.primary_node:
3667         continue
3668       if ignore_size:
3669         node_disk = node_disk.Copy()
3670         node_disk.UnsetSize()
3671       lu.cfg.SetDiskID(node_disk, node)
3672       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3673       msg = result.fail_msg
3674       if msg:
3675         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3676                            " (is_primary=True, pass=2): %s",
3677                            inst_disk.iv_name, node, msg)
3678         disks_ok = False
3679       else:
3680         dev_path = result.payload
3681
3682     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3683
3684   # leave the disks configured for the primary node
3685   # this is a workaround that would be fixed better by
3686   # improving the logical/physical id handling
3687   for disk in instance.disks:
3688     lu.cfg.SetDiskID(disk, instance.primary_node)
3689
3690   return disks_ok, device_info
3691
3692
3693 def _StartInstanceDisks(lu, instance, force):
3694   """Start the disks of an instance.
3695
3696   """
3697   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3698                                            ignore_secondaries=force)
3699   if not disks_ok:
3700     _ShutdownInstanceDisks(lu, instance)
3701     if force is not None and not force:
3702       lu.proc.LogWarning("", hint="If the message above refers to a"
3703                          " secondary node,"
3704                          " you can retry the operation using '--force'.")
3705     raise errors.OpExecError("Disk consistency error")
3706
3707
3708 class LUDeactivateInstanceDisks(NoHooksLU):
3709   """Shutdown an instance's disks.
3710
3711   """
3712   _OP_REQP = ["instance_name"]
3713   REQ_BGL = False
3714
3715   def ExpandNames(self):
3716     self._ExpandAndLockInstance()
3717     self.needed_locks[locking.LEVEL_NODE] = []
3718     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3719
3720   def DeclareLocks(self, level):
3721     if level == locking.LEVEL_NODE:
3722       self._LockInstancesNodes()
3723
3724   def CheckPrereq(self):
3725     """Check prerequisites.
3726
3727     This checks that the instance is in the cluster.
3728
3729     """
3730     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3731     assert self.instance is not None, \
3732       "Cannot retrieve locked instance %s" % self.op.instance_name
3733
3734   def Exec(self, feedback_fn):
3735     """Deactivate the disks
3736
3737     """
3738     instance = self.instance
3739     _SafeShutdownInstanceDisks(self, instance)
3740
3741
3742 def _SafeShutdownInstanceDisks(lu, instance):
3743   """Shutdown block devices of an instance.
3744
3745   This function checks if an instance is running, before calling
3746   _ShutdownInstanceDisks.
3747
3748   """
3749   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3750   _ShutdownInstanceDisks(lu, instance)
3751
3752
3753 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3754   """Shutdown block devices of an instance.
3755
3756   This does the shutdown on all nodes of the instance.
3757
3758   If the ignore_primary is false, errors on the primary node are
3759   ignored.
3760
3761   """
3762   all_result = True
3763   for disk in instance.disks:
3764     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3765       lu.cfg.SetDiskID(top_disk, node)
3766       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3767       msg = result.fail_msg
3768       if msg:
3769         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3770                       disk.iv_name, node, msg)
3771         if not ignore_primary or node != instance.primary_node:
3772           all_result = False
3773   return all_result
3774
3775
3776 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3777   """Checks if a node has enough free memory.
3778
3779   This function check if a given node has the needed amount of free
3780   memory. In case the node has less memory or we cannot get the
3781   information from the node, this function raise an OpPrereqError
3782   exception.
3783
3784   @type lu: C{LogicalUnit}
3785   @param lu: a logical unit from which we get configuration data
3786   @type node: C{str}
3787   @param node: the node to check
3788   @type reason: C{str}
3789   @param reason: string to use in the error message
3790   @type requested: C{int}
3791   @param requested: the amount of memory in MiB to check for
3792   @type hypervisor_name: C{str}
3793   @param hypervisor_name: the hypervisor to ask for memory stats
3794   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3795       we cannot check the node
3796
3797   """
3798   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3799   nodeinfo[node].Raise("Can't get data from node %s" % node,
3800                        prereq=True, ecode=errors.ECODE_ENVIRON)
3801   free_mem = nodeinfo[node].payload.get('memory_free', None)
3802   if not isinstance(free_mem, int):
3803     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3804                                " was '%s'" % (node, free_mem),
3805                                errors.ECODE_ENVIRON)
3806   if requested > free_mem:
3807     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3808                                " needed %s MiB, available %s MiB" %
3809                                (node, reason, requested, free_mem),
3810                                errors.ECODE_NORES)
3811
3812
3813 def _CheckNodesFreeDisk(lu, nodenames, requested):
3814   """Checks if nodes have enough free disk space in the default VG.
3815
3816   This function check if all given nodes have the needed amount of
3817   free disk. In case any node has less disk or we cannot get the
3818   information from the node, this function raise an OpPrereqError
3819   exception.
3820
3821   @type lu: C{LogicalUnit}
3822   @param lu: a logical unit from which we get configuration data
3823   @type nodenames: C{list}
3824   @param node: the list of node names to check
3825   @type requested: C{int}
3826   @param requested: the amount of disk in MiB to check for
3827   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
3828       we cannot check the node
3829
3830   """
3831   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3832                                    lu.cfg.GetHypervisorType())
3833   for node in nodenames:
3834     info = nodeinfo[node]
3835     info.Raise("Cannot get current information from node %s" % node,
3836                prereq=True, ecode=errors.ECODE_ENVIRON)
3837     vg_free = info.payload.get("vg_free", None)
3838     if not isinstance(vg_free, int):
3839       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
3840                                  " result was '%s'" % (node, vg_free),
3841                                  errors.ECODE_ENVIRON)
3842     if requested > vg_free:
3843       raise errors.OpPrereqError("Not enough disk space on target node %s:"
3844                                  " required %d MiB, available %d MiB" %
3845                                  (node, requested, vg_free),
3846                                  errors.ECODE_NORES)
3847
3848
3849 class LUStartupInstance(LogicalUnit):
3850   """Starts an instance.
3851
3852   """
3853   HPATH = "instance-start"
3854   HTYPE = constants.HTYPE_INSTANCE
3855   _OP_REQP = ["instance_name", "force"]
3856   REQ_BGL = False
3857
3858   def ExpandNames(self):
3859     self._ExpandAndLockInstance()
3860
3861   def BuildHooksEnv(self):
3862     """Build hooks env.
3863
3864     This runs on master, primary and secondary nodes of the instance.
3865
3866     """
3867     env = {
3868       "FORCE": self.op.force,
3869       }
3870     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3871     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3872     return env, nl, nl
3873
3874   def CheckPrereq(self):
3875     """Check prerequisites.
3876
3877     This checks that the instance is in the cluster.
3878
3879     """
3880     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3881     assert self.instance is not None, \
3882       "Cannot retrieve locked instance %s" % self.op.instance_name
3883
3884     # extra beparams
3885     self.beparams = getattr(self.op, "beparams", {})
3886     if self.beparams:
3887       if not isinstance(self.beparams, dict):
3888         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3889                                    " dict" % (type(self.beparams), ),
3890                                    errors.ECODE_INVAL)
3891       # fill the beparams dict
3892       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3893       self.op.beparams = self.beparams
3894
3895     # extra hvparams
3896     self.hvparams = getattr(self.op, "hvparams", {})
3897     if self.hvparams:
3898       if not isinstance(self.hvparams, dict):
3899         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3900                                    " dict" % (type(self.hvparams), ),
3901                                    errors.ECODE_INVAL)
3902
3903       # check hypervisor parameter syntax (locally)
3904       cluster = self.cfg.GetClusterInfo()
3905       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3906       filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3907                                     instance.hvparams)
3908       filled_hvp.update(self.hvparams)
3909       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3910       hv_type.CheckParameterSyntax(filled_hvp)
3911       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3912       self.op.hvparams = self.hvparams
3913
3914     _CheckNodeOnline(self, instance.primary_node)
3915
3916     bep = self.cfg.GetClusterInfo().FillBE(instance)
3917     # check bridges existence
3918     _CheckInstanceBridgesExist(self, instance)
3919
3920     remote_info = self.rpc.call_instance_info(instance.primary_node,
3921                                               instance.name,
3922                                               instance.hypervisor)
3923     remote_info.Raise("Error checking node %s" % instance.primary_node,
3924                       prereq=True, ecode=errors.ECODE_ENVIRON)
3925     if not remote_info.payload: # not running already
3926       _CheckNodeFreeMemory(self, instance.primary_node,
3927                            "starting instance %s" % instance.name,
3928                            bep[constants.BE_MEMORY], instance.hypervisor)
3929
3930   def Exec(self, feedback_fn):
3931     """Start the instance.
3932
3933     """
3934     instance = self.instance
3935     force = self.op.force
3936
3937     self.cfg.MarkInstanceUp(instance.name)
3938
3939     node_current = instance.primary_node
3940
3941     _StartInstanceDisks(self, instance, force)
3942
3943     result = self.rpc.call_instance_start(node_current, instance,
3944                                           self.hvparams, self.beparams)
3945     msg = result.fail_msg
3946     if msg:
3947       _ShutdownInstanceDisks(self, instance)
3948       raise errors.OpExecError("Could not start instance: %s" % msg)
3949
3950
3951 class LURebootInstance(LogicalUnit):
3952   """Reboot an instance.
3953
3954   """
3955   HPATH = "instance-reboot"
3956   HTYPE = constants.HTYPE_INSTANCE
3957   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3958   REQ_BGL = False
3959
3960   def CheckArguments(self):
3961     """Check the arguments.
3962
3963     """
3964     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3965                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
3966
3967   def ExpandNames(self):
3968     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3969                                    constants.INSTANCE_REBOOT_HARD,
3970                                    constants.INSTANCE_REBOOT_FULL]:
3971       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3972                                   (constants.INSTANCE_REBOOT_SOFT,
3973                                    constants.INSTANCE_REBOOT_HARD,
3974                                    constants.INSTANCE_REBOOT_FULL))
3975     self._ExpandAndLockInstance()
3976
3977   def BuildHooksEnv(self):
3978     """Build hooks env.
3979
3980     This runs on master, primary and secondary nodes of the instance.
3981
3982     """
3983     env = {
3984       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3985       "REBOOT_TYPE": self.op.reboot_type,
3986       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3987       }
3988     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3989     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3990     return env, nl, nl
3991
3992   def CheckPrereq(self):
3993     """Check prerequisites.
3994
3995     This checks that the instance is in the cluster.
3996
3997     """
3998     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3999     assert self.instance is not None, \
4000       "Cannot retrieve locked instance %s" % self.op.instance_name
4001
4002     _CheckNodeOnline(self, instance.primary_node)
4003
4004     # check bridges existence
4005     _CheckInstanceBridgesExist(self, instance)
4006
4007   def Exec(self, feedback_fn):
4008     """Reboot the instance.
4009
4010     """
4011     instance = self.instance
4012     ignore_secondaries = self.op.ignore_secondaries
4013     reboot_type = self.op.reboot_type
4014
4015     node_current = instance.primary_node
4016
4017     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4018                        constants.INSTANCE_REBOOT_HARD]:
4019       for disk in instance.disks:
4020         self.cfg.SetDiskID(disk, node_current)
4021       result = self.rpc.call_instance_reboot(node_current, instance,
4022                                              reboot_type,
4023                                              self.shutdown_timeout)
4024       result.Raise("Could not reboot instance")
4025     else:
4026       result = self.rpc.call_instance_shutdown(node_current, instance,
4027                                                self.shutdown_timeout)
4028       result.Raise("Could not shutdown instance for full reboot")
4029       _ShutdownInstanceDisks(self, instance)
4030       _StartInstanceDisks(self, instance, ignore_secondaries)
4031       result = self.rpc.call_instance_start(node_current, instance, None, None)
4032       msg = result.fail_msg
4033       if msg:
4034         _ShutdownInstanceDisks(self, instance)
4035         raise errors.OpExecError("Could not start instance for"
4036                                  " full reboot: %s" % msg)
4037
4038     self.cfg.MarkInstanceUp(instance.name)
4039
4040
4041 class LUShutdownInstance(LogicalUnit):
4042   """Shutdown an instance.
4043
4044   """
4045   HPATH = "instance-stop"
4046   HTYPE = constants.HTYPE_INSTANCE
4047   _OP_REQP = ["instance_name"]
4048   REQ_BGL = False
4049
4050   def CheckArguments(self):
4051     """Check the arguments.
4052
4053     """
4054     self.timeout = getattr(self.op, "timeout",
4055                            constants.DEFAULT_SHUTDOWN_TIMEOUT)
4056
4057   def ExpandNames(self):
4058     self._ExpandAndLockInstance()
4059
4060   def BuildHooksEnv(self):
4061     """Build hooks env.
4062
4063     This runs on master, primary and secondary nodes of the instance.
4064
4065     """
4066     env = _BuildInstanceHookEnvByObject(self, self.instance)
4067     env["TIMEOUT"] = self.timeout
4068     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4069     return env, nl, nl
4070
4071   def CheckPrereq(self):
4072     """Check prerequisites.
4073
4074     This checks that the instance is in the cluster.
4075
4076     """
4077     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4078     assert self.instance is not None, \
4079       "Cannot retrieve locked instance %s" % self.op.instance_name
4080     _CheckNodeOnline(self, self.instance.primary_node)
4081
4082   def Exec(self, feedback_fn):
4083     """Shutdown the instance.
4084
4085     """
4086     instance = self.instance
4087     node_current = instance.primary_node
4088     timeout = self.timeout
4089     self.cfg.MarkInstanceDown(instance.name)
4090     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4091     msg = result.fail_msg
4092     if msg:
4093       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4094
4095     _ShutdownInstanceDisks(self, instance)
4096
4097
4098 class LUReinstallInstance(LogicalUnit):
4099   """Reinstall an instance.
4100
4101   """
4102   HPATH = "instance-reinstall"
4103   HTYPE = constants.HTYPE_INSTANCE
4104   _OP_REQP = ["instance_name"]
4105   REQ_BGL = False
4106
4107   def ExpandNames(self):
4108     self._ExpandAndLockInstance()
4109
4110   def BuildHooksEnv(self):
4111     """Build hooks env.
4112
4113     This runs on master, primary and secondary nodes of the instance.
4114
4115     """
4116     env = _BuildInstanceHookEnvByObject(self, self.instance)
4117     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4118     return env, nl, nl
4119
4120   def CheckPrereq(self):
4121     """Check prerequisites.
4122
4123     This checks that the instance is in the cluster and is not running.
4124
4125     """
4126     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4127     assert instance is not None, \
4128       "Cannot retrieve locked instance %s" % self.op.instance_name
4129     _CheckNodeOnline(self, instance.primary_node)
4130
4131     if instance.disk_template == constants.DT_DISKLESS:
4132       raise errors.OpPrereqError("Instance '%s' has no disks" %
4133                                  self.op.instance_name,
4134                                  errors.ECODE_INVAL)
4135     _CheckInstanceDown(self, instance, "cannot reinstall")
4136
4137     self.op.os_type = getattr(self.op, "os_type", None)
4138     self.op.force_variant = getattr(self.op, "force_variant", False)
4139     if self.op.os_type is not None:
4140       # OS verification
4141       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4142       result = self.rpc.call_os_get(pnode, self.op.os_type)
4143       result.Raise("OS '%s' not in supported OS list for primary node %s" %
4144                    (self.op.os_type, pnode),
4145                    prereq=True, ecode=errors.ECODE_INVAL)
4146       if not self.op.force_variant:
4147         _CheckOSVariant(result.payload, self.op.os_type)
4148
4149     self.instance = instance
4150
4151   def Exec(self, feedback_fn):
4152     """Reinstall the instance.
4153
4154     """
4155     inst = self.instance
4156
4157     if self.op.os_type is not None:
4158       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4159       inst.os = self.op.os_type
4160       self.cfg.Update(inst, feedback_fn)
4161
4162     _StartInstanceDisks(self, inst, None)
4163     try:
4164       feedback_fn("Running the instance OS create scripts...")
4165       # FIXME: pass debug option from opcode to backend
4166       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4167                                              self.op.debug_level)
4168       result.Raise("Could not install OS for instance %s on node %s" %
4169                    (inst.name, inst.primary_node))
4170     finally:
4171       _ShutdownInstanceDisks(self, inst)
4172
4173
4174 class LURecreateInstanceDisks(LogicalUnit):
4175   """Recreate an instance's missing disks.
4176
4177   """
4178   HPATH = "instance-recreate-disks"
4179   HTYPE = constants.HTYPE_INSTANCE
4180   _OP_REQP = ["instance_name", "disks"]
4181   REQ_BGL = False
4182
4183   def CheckArguments(self):
4184     """Check the arguments.
4185
4186     """
4187     if not isinstance(self.op.disks, list):
4188       raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4189     for item in self.op.disks:
4190       if (not isinstance(item, int) or
4191           item < 0):
4192         raise errors.OpPrereqError("Invalid disk specification '%s'" %
4193                                    str(item), errors.ECODE_INVAL)
4194
4195   def ExpandNames(self):
4196     self._ExpandAndLockInstance()
4197
4198   def BuildHooksEnv(self):
4199     """Build hooks env.
4200
4201     This runs on master, primary and secondary nodes of the instance.
4202
4203     """
4204     env = _BuildInstanceHookEnvByObject(self, self.instance)
4205     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4206     return env, nl, nl
4207
4208   def CheckPrereq(self):
4209     """Check prerequisites.
4210
4211     This checks that the instance is in the cluster and is not running.
4212
4213     """
4214     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4215     assert instance is not None, \
4216       "Cannot retrieve locked instance %s" % self.op.instance_name
4217     _CheckNodeOnline(self, instance.primary_node)
4218
4219     if instance.disk_template == constants.DT_DISKLESS:
4220       raise errors.OpPrereqError("Instance '%s' has no disks" %
4221                                  self.op.instance_name, errors.ECODE_INVAL)
4222     _CheckInstanceDown(self, instance, "cannot recreate disks")
4223
4224     if not self.op.disks:
4225       self.op.disks = range(len(instance.disks))
4226     else:
4227       for idx in self.op.disks:
4228         if idx >= len(instance.disks):
4229           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4230                                      errors.ECODE_INVAL)
4231
4232     self.instance = instance
4233
4234   def Exec(self, feedback_fn):
4235     """Recreate the disks.
4236
4237     """
4238     to_skip = []
4239     for idx, _ in enumerate(self.instance.disks):
4240       if idx not in self.op.disks: # disk idx has not been passed in
4241         to_skip.append(idx)
4242         continue
4243
4244     _CreateDisks(self, self.instance, to_skip=to_skip)
4245
4246
4247 class LURenameInstance(LogicalUnit):
4248   """Rename an instance.
4249
4250   """
4251   HPATH = "instance-rename"
4252   HTYPE = constants.HTYPE_INSTANCE
4253   _OP_REQP = ["instance_name", "new_name"]
4254
4255   def BuildHooksEnv(self):
4256     """Build hooks env.
4257
4258     This runs on master, primary and secondary nodes of the instance.
4259
4260     """
4261     env = _BuildInstanceHookEnvByObject(self, self.instance)
4262     env["INSTANCE_NEW_NAME"] = self.op.new_name
4263     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4264     return env, nl, nl
4265
4266   def CheckPrereq(self):
4267     """Check prerequisites.
4268
4269     This checks that the instance is in the cluster and is not running.
4270
4271     """
4272     self.op.instance_name = _ExpandInstanceName(self.cfg,
4273                                                 self.op.instance_name)
4274     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4275     assert instance is not None
4276     _CheckNodeOnline(self, instance.primary_node)
4277     _CheckInstanceDown(self, instance, "cannot rename")
4278     self.instance = instance
4279
4280     # new name verification
4281     name_info = utils.GetHostInfo(self.op.new_name)
4282
4283     self.op.new_name = new_name = name_info.name
4284     instance_list = self.cfg.GetInstanceList()
4285     if new_name in instance_list:
4286       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4287                                  new_name, errors.ECODE_EXISTS)
4288
4289     if not getattr(self.op, "ignore_ip", False):
4290       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4291         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4292                                    (name_info.ip, new_name),
4293                                    errors.ECODE_NOTUNIQUE)
4294
4295
4296   def Exec(self, feedback_fn):
4297     """Reinstall the instance.
4298
4299     """
4300     inst = self.instance
4301     old_name = inst.name
4302
4303     if inst.disk_template == constants.DT_FILE:
4304       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4305
4306     self.cfg.RenameInstance(inst.name, self.op.new_name)
4307     # Change the instance lock. This is definitely safe while we hold the BGL
4308     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4309     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4310
4311     # re-read the instance from the configuration after rename
4312     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4313
4314     if inst.disk_template == constants.DT_FILE:
4315       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4316       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4317                                                      old_file_storage_dir,
4318                                                      new_file_storage_dir)
4319       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4320                    " (but the instance has been renamed in Ganeti)" %
4321                    (inst.primary_node, old_file_storage_dir,
4322                     new_file_storage_dir))
4323
4324     _StartInstanceDisks(self, inst, None)
4325     try:
4326       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4327                                                  old_name, self.op.debug_level)
4328       msg = result.fail_msg
4329       if msg:
4330         msg = ("Could not run OS rename script for instance %s on node %s"
4331                " (but the instance has been renamed in Ganeti): %s" %
4332                (inst.name, inst.primary_node, msg))
4333         self.proc.LogWarning(msg)
4334     finally:
4335       _ShutdownInstanceDisks(self, inst)
4336
4337
4338 class LURemoveInstance(LogicalUnit):
4339   """Remove an instance.
4340
4341   """
4342   HPATH = "instance-remove"
4343   HTYPE = constants.HTYPE_INSTANCE
4344   _OP_REQP = ["instance_name", "ignore_failures"]
4345   REQ_BGL = False
4346
4347   def CheckArguments(self):
4348     """Check the arguments.
4349
4350     """
4351     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4352                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4353
4354   def ExpandNames(self):
4355     self._ExpandAndLockInstance()
4356     self.needed_locks[locking.LEVEL_NODE] = []
4357     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4358
4359   def DeclareLocks(self, level):
4360     if level == locking.LEVEL_NODE:
4361       self._LockInstancesNodes()
4362
4363   def BuildHooksEnv(self):
4364     """Build hooks env.
4365
4366     This runs on master, primary and secondary nodes of the instance.
4367
4368     """
4369     env = _BuildInstanceHookEnvByObject(self, self.instance)
4370     env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4371     nl = [self.cfg.GetMasterNode()]
4372     nl_post = list(self.instance.all_nodes) + nl
4373     return env, nl, nl_post
4374
4375   def CheckPrereq(self):
4376     """Check prerequisites.
4377
4378     This checks that the instance is in the cluster.
4379
4380     """
4381     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4382     assert self.instance is not None, \
4383       "Cannot retrieve locked instance %s" % self.op.instance_name
4384
4385   def Exec(self, feedback_fn):
4386     """Remove the instance.
4387
4388     """
4389     instance = self.instance
4390     logging.info("Shutting down instance %s on node %s",
4391                  instance.name, instance.primary_node)
4392
4393     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4394                                              self.shutdown_timeout)
4395     msg = result.fail_msg
4396     if msg:
4397       if self.op.ignore_failures:
4398         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4399       else:
4400         raise errors.OpExecError("Could not shutdown instance %s on"
4401                                  " node %s: %s" %
4402                                  (instance.name, instance.primary_node, msg))
4403
4404     logging.info("Removing block devices for instance %s", instance.name)
4405
4406     if not _RemoveDisks(self, instance):
4407       if self.op.ignore_failures:
4408         feedback_fn("Warning: can't remove instance's disks")
4409       else:
4410         raise errors.OpExecError("Can't remove instance's disks")
4411
4412     logging.info("Removing instance %s out of cluster config", instance.name)
4413
4414     self.cfg.RemoveInstance(instance.name)
4415     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4416
4417
4418 class LUQueryInstances(NoHooksLU):
4419   """Logical unit for querying instances.
4420
4421   """
4422   # pylint: disable-msg=W0142
4423   _OP_REQP = ["output_fields", "names", "use_locking"]
4424   REQ_BGL = False
4425   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4426                     "serial_no", "ctime", "mtime", "uuid"]
4427   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4428                                     "admin_state",
4429                                     "disk_template", "ip", "mac", "bridge",
4430                                     "nic_mode", "nic_link",
4431                                     "sda_size", "sdb_size", "vcpus", "tags",
4432                                     "network_port", "beparams",
4433                                     r"(disk)\.(size)/([0-9]+)",
4434                                     r"(disk)\.(sizes)", "disk_usage",
4435                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4436                                     r"(nic)\.(bridge)/([0-9]+)",
4437                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4438                                     r"(disk|nic)\.(count)",
4439                                     "hvparams",
4440                                     ] + _SIMPLE_FIELDS +
4441                                   ["hv/%s" % name
4442                                    for name in constants.HVS_PARAMETERS
4443                                    if name not in constants.HVC_GLOBALS] +
4444                                   ["be/%s" % name
4445                                    for name in constants.BES_PARAMETERS])
4446   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4447
4448
4449   def ExpandNames(self):
4450     _CheckOutputFields(static=self._FIELDS_STATIC,
4451                        dynamic=self._FIELDS_DYNAMIC,
4452                        selected=self.op.output_fields)
4453
4454     self.needed_locks = {}
4455     self.share_locks[locking.LEVEL_INSTANCE] = 1
4456     self.share_locks[locking.LEVEL_NODE] = 1
4457
4458     if self.op.names:
4459       self.wanted = _GetWantedInstances(self, self.op.names)
4460     else:
4461       self.wanted = locking.ALL_SET
4462
4463     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4464     self.do_locking = self.do_node_query and self.op.use_locking
4465     if self.do_locking:
4466       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4467       self.needed_locks[locking.LEVEL_NODE] = []
4468       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4469
4470   def DeclareLocks(self, level):
4471     if level == locking.LEVEL_NODE and self.do_locking:
4472       self._LockInstancesNodes()
4473
4474   def CheckPrereq(self):
4475     """Check prerequisites.
4476
4477     """
4478     pass
4479
4480   def Exec(self, feedback_fn):
4481     """Computes the list of nodes and their attributes.
4482
4483     """
4484     # pylint: disable-msg=R0912
4485     # way too many branches here
4486     all_info = self.cfg.GetAllInstancesInfo()
4487     if self.wanted == locking.ALL_SET:
4488       # caller didn't specify instance names, so ordering is not important
4489       if self.do_locking:
4490         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4491       else:
4492         instance_names = all_info.keys()
4493       instance_names = utils.NiceSort(instance_names)
4494     else:
4495       # caller did specify names, so we must keep the ordering
4496       if self.do_locking:
4497         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4498       else:
4499         tgt_set = all_info.keys()
4500       missing = set(self.wanted).difference(tgt_set)
4501       if missing:
4502         raise errors.OpExecError("Some instances were removed before"
4503                                  " retrieving their data: %s" % missing)
4504       instance_names = self.wanted
4505
4506     instance_list = [all_info[iname] for iname in instance_names]
4507
4508     # begin data gathering
4509
4510     nodes = frozenset([inst.primary_node for inst in instance_list])
4511     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4512
4513     bad_nodes = []
4514     off_nodes = []
4515     if self.do_node_query:
4516       live_data = {}
4517       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4518       for name in nodes:
4519         result = node_data[name]
4520         if result.offline:
4521           # offline nodes will be in both lists
4522           off_nodes.append(name)
4523         if result.fail_msg:
4524           bad_nodes.append(name)
4525         else:
4526           if result.payload:
4527             live_data.update(result.payload)
4528           # else no instance is alive
4529     else:
4530       live_data = dict([(name, {}) for name in instance_names])
4531
4532     # end data gathering
4533
4534     HVPREFIX = "hv/"
4535     BEPREFIX = "be/"
4536     output = []
4537     cluster = self.cfg.GetClusterInfo()
4538     for instance in instance_list:
4539       iout = []
4540       i_hv = cluster.FillHV(instance, skip_globals=True)
4541       i_be = cluster.FillBE(instance)
4542       i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4543                                  nic.nicparams) for nic in instance.nics]
4544       for field in self.op.output_fields:
4545         st_match = self._FIELDS_STATIC.Matches(field)
4546         if field in self._SIMPLE_FIELDS:
4547           val = getattr(instance, field)
4548         elif field == "pnode":
4549           val = instance.primary_node
4550         elif field == "snodes":
4551           val = list(instance.secondary_nodes)
4552         elif field == "admin_state":
4553           val = instance.admin_up
4554         elif field == "oper_state":
4555           if instance.primary_node in bad_nodes:
4556             val = None
4557           else:
4558             val = bool(live_data.get(instance.name))
4559         elif field == "status":
4560           if instance.primary_node in off_nodes:
4561             val = "ERROR_nodeoffline"
4562           elif instance.primary_node in bad_nodes:
4563             val = "ERROR_nodedown"
4564           else:
4565             running = bool(live_data.get(instance.name))
4566             if running:
4567               if instance.admin_up:
4568                 val = "running"
4569               else:
4570                 val = "ERROR_up"
4571             else:
4572               if instance.admin_up:
4573                 val = "ERROR_down"
4574               else:
4575                 val = "ADMIN_down"
4576         elif field == "oper_ram":
4577           if instance.primary_node in bad_nodes:
4578             val = None
4579           elif instance.name in live_data:
4580             val = live_data[instance.name].get("memory", "?")
4581           else:
4582             val = "-"
4583         elif field == "vcpus":
4584           val = i_be[constants.BE_VCPUS]
4585         elif field == "disk_template":
4586           val = instance.disk_template
4587         elif field == "ip":
4588           if instance.nics:
4589             val = instance.nics[0].ip
4590           else:
4591             val = None
4592         elif field == "nic_mode":
4593           if instance.nics:
4594             val = i_nicp[0][constants.NIC_MODE]
4595           else:
4596             val = None
4597         elif field == "nic_link":
4598           if instance.nics:
4599             val = i_nicp[0][constants.NIC_LINK]
4600           else:
4601             val = None
4602         elif field == "bridge":
4603           if (instance.nics and
4604               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4605             val = i_nicp[0][constants.NIC_LINK]
4606           else:
4607             val = None
4608         elif field == "mac":
4609           if instance.nics:
4610             val = instance.nics[0].mac
4611           else:
4612             val = None
4613         elif field == "sda_size" or field == "sdb_size":
4614           idx = ord(field[2]) - ord('a')
4615           try:
4616             val = instance.FindDisk(idx).size
4617           except errors.OpPrereqError:
4618             val = None
4619         elif field == "disk_usage": # total disk usage per node
4620           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4621           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4622         elif field == "tags":
4623           val = list(instance.GetTags())
4624         elif field == "hvparams":
4625           val = i_hv
4626         elif (field.startswith(HVPREFIX) and
4627               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4628               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4629           val = i_hv.get(field[len(HVPREFIX):], None)
4630         elif field == "beparams":
4631           val = i_be
4632         elif (field.startswith(BEPREFIX) and
4633               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4634           val = i_be.get(field[len(BEPREFIX):], None)
4635         elif st_match and st_match.groups():
4636           # matches a variable list
4637           st_groups = st_match.groups()
4638           if st_groups and st_groups[0] == "disk":
4639             if st_groups[1] == "count":
4640               val = len(instance.disks)
4641             elif st_groups[1] == "sizes":
4642               val = [disk.size for disk in instance.disks]
4643             elif st_groups[1] == "size":
4644               try:
4645                 val = instance.FindDisk(st_groups[2]).size
4646               except errors.OpPrereqError:
4647                 val = None
4648             else:
4649               assert False, "Unhandled disk parameter"
4650           elif st_groups[0] == "nic":
4651             if st_groups[1] == "count":
4652               val = len(instance.nics)
4653             elif st_groups[1] == "macs":
4654               val = [nic.mac for nic in instance.nics]
4655             elif st_groups[1] == "ips":
4656               val = [nic.ip for nic in instance.nics]
4657             elif st_groups[1] == "modes":
4658               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4659             elif st_groups[1] == "links":
4660               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4661             elif st_groups[1] == "bridges":
4662               val = []
4663               for nicp in i_nicp:
4664                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4665                   val.append(nicp[constants.NIC_LINK])
4666                 else:
4667                   val.append(None)
4668             else:
4669               # index-based item
4670               nic_idx = int(st_groups[2])
4671               if nic_idx >= len(instance.nics):
4672                 val = None
4673               else:
4674                 if st_groups[1] == "mac":
4675                   val = instance.nics[nic_idx].mac
4676                 elif st_groups[1] == "ip":
4677                   val = instance.nics[nic_idx].ip
4678                 elif st_groups[1] == "mode":
4679                   val = i_nicp[nic_idx][constants.NIC_MODE]
4680                 elif st_groups[1] == "link":
4681                   val = i_nicp[nic_idx][constants.NIC_LINK]
4682                 elif st_groups[1] == "bridge":
4683                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4684                   if nic_mode == constants.NIC_MODE_BRIDGED:
4685                     val = i_nicp[nic_idx][constants.NIC_LINK]
4686                   else:
4687                     val = None
4688                 else:
4689                   assert False, "Unhandled NIC parameter"
4690           else:
4691             assert False, ("Declared but unhandled variable parameter '%s'" %
4692                            field)
4693         else:
4694           assert False, "Declared but unhandled parameter '%s'" % field
4695         iout.append(val)
4696       output.append(iout)
4697
4698     return output
4699
4700
4701 class LUFailoverInstance(LogicalUnit):
4702   """Failover an instance.
4703
4704   """
4705   HPATH = "instance-failover"
4706   HTYPE = constants.HTYPE_INSTANCE
4707   _OP_REQP = ["instance_name", "ignore_consistency"]
4708   REQ_BGL = False
4709
4710   def CheckArguments(self):
4711     """Check the arguments.
4712
4713     """
4714     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4715                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4716
4717   def ExpandNames(self):
4718     self._ExpandAndLockInstance()
4719     self.needed_locks[locking.LEVEL_NODE] = []
4720     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4721
4722   def DeclareLocks(self, level):
4723     if level == locking.LEVEL_NODE:
4724       self._LockInstancesNodes()
4725
4726   def BuildHooksEnv(self):
4727     """Build hooks env.
4728
4729     This runs on master, primary and secondary nodes of the instance.
4730
4731     """
4732     instance = self.instance
4733     source_node = instance.primary_node
4734     target_node = instance.secondary_nodes[0]
4735     env = {
4736       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4737       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4738       "OLD_PRIMARY": source_node,
4739       "OLD_SECONDARY": target_node,
4740       "NEW_PRIMARY": target_node,
4741       "NEW_SECONDARY": source_node,
4742       }
4743     env.update(_BuildInstanceHookEnvByObject(self, instance))
4744     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4745     nl_post = list(nl)
4746     nl_post.append(source_node)
4747     return env, nl, nl_post
4748
4749   def CheckPrereq(self):
4750     """Check prerequisites.
4751
4752     This checks that the instance is in the cluster.
4753
4754     """
4755     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4756     assert self.instance is not None, \
4757       "Cannot retrieve locked instance %s" % self.op.instance_name
4758
4759     bep = self.cfg.GetClusterInfo().FillBE(instance)
4760     if instance.disk_template not in constants.DTS_NET_MIRROR:
4761       raise errors.OpPrereqError("Instance's disk layout is not"
4762                                  " network mirrored, cannot failover.",
4763                                  errors.ECODE_STATE)
4764
4765     secondary_nodes = instance.secondary_nodes
4766     if not secondary_nodes:
4767       raise errors.ProgrammerError("no secondary node but using "
4768                                    "a mirrored disk template")
4769
4770     target_node = secondary_nodes[0]
4771     _CheckNodeOnline(self, target_node)
4772     _CheckNodeNotDrained(self, target_node)
4773     if instance.admin_up:
4774       # check memory requirements on the secondary node
4775       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4776                            instance.name, bep[constants.BE_MEMORY],
4777                            instance.hypervisor)
4778     else:
4779       self.LogInfo("Not checking memory on the secondary node as"
4780                    " instance will not be started")
4781
4782     # check bridge existance
4783     _CheckInstanceBridgesExist(self, instance, node=target_node)
4784
4785   def Exec(self, feedback_fn):
4786     """Failover an instance.
4787
4788     The failover is done by shutting it down on its present node and
4789     starting it on the secondary.
4790
4791     """
4792     instance = self.instance
4793
4794     source_node = instance.primary_node
4795     target_node = instance.secondary_nodes[0]
4796
4797     if instance.admin_up:
4798       feedback_fn("* checking disk consistency between source and target")
4799       for dev in instance.disks:
4800         # for drbd, these are drbd over lvm
4801         if not _CheckDiskConsistency(self, dev, target_node, False):
4802           if not self.op.ignore_consistency:
4803             raise errors.OpExecError("Disk %s is degraded on target node,"
4804                                      " aborting failover." % dev.iv_name)
4805     else:
4806       feedback_fn("* not checking disk consistency as instance is not running")
4807
4808     feedback_fn("* shutting down instance on source node")
4809     logging.info("Shutting down instance %s on node %s",
4810                  instance.name, source_node)
4811
4812     result = self.rpc.call_instance_shutdown(source_node, instance,
4813                                              self.shutdown_timeout)
4814     msg = result.fail_msg
4815     if msg:
4816       if self.op.ignore_consistency:
4817         self.proc.LogWarning("Could not shutdown instance %s on node %s."
4818                              " Proceeding anyway. Please make sure node"
4819                              " %s is down. Error details: %s",
4820                              instance.name, source_node, source_node, msg)
4821       else:
4822         raise errors.OpExecError("Could not shutdown instance %s on"
4823                                  " node %s: %s" %
4824                                  (instance.name, source_node, msg))
4825
4826     feedback_fn("* deactivating the instance's disks on source node")
4827     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4828       raise errors.OpExecError("Can't shut down the instance's disks.")
4829
4830     instance.primary_node = target_node
4831     # distribute new instance config to the other nodes
4832     self.cfg.Update(instance, feedback_fn)
4833
4834     # Only start the instance if it's marked as up
4835     if instance.admin_up:
4836       feedback_fn("* activating the instance's disks on target node")
4837       logging.info("Starting instance %s on node %s",
4838                    instance.name, target_node)
4839
4840       disks_ok, _ = _AssembleInstanceDisks(self, instance,
4841                                                ignore_secondaries=True)
4842       if not disks_ok:
4843         _ShutdownInstanceDisks(self, instance)
4844         raise errors.OpExecError("Can't activate the instance's disks")
4845
4846       feedback_fn("* starting the instance on the target node")
4847       result = self.rpc.call_instance_start(target_node, instance, None, None)
4848       msg = result.fail_msg
4849       if msg:
4850         _ShutdownInstanceDisks(self, instance)
4851         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4852                                  (instance.name, target_node, msg))
4853
4854
4855 class LUMigrateInstance(LogicalUnit):
4856   """Migrate an instance.
4857
4858   This is migration without shutting down, compared to the failover,
4859   which is done with shutdown.
4860
4861   """
4862   HPATH = "instance-migrate"
4863   HTYPE = constants.HTYPE_INSTANCE
4864   _OP_REQP = ["instance_name", "live", "cleanup"]
4865
4866   REQ_BGL = False
4867
4868   def ExpandNames(self):
4869     self._ExpandAndLockInstance()
4870
4871     self.needed_locks[locking.LEVEL_NODE] = []
4872     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4873
4874     self._migrater = TLMigrateInstance(self, self.op.instance_name,
4875                                        self.op.live, self.op.cleanup)
4876     self.tasklets = [self._migrater]
4877
4878   def DeclareLocks(self, level):
4879     if level == locking.LEVEL_NODE:
4880       self._LockInstancesNodes()
4881
4882   def BuildHooksEnv(self):
4883     """Build hooks env.
4884
4885     This runs on master, primary and secondary nodes of the instance.
4886
4887     """
4888     instance = self._migrater.instance
4889     source_node = instance.primary_node
4890     target_node = instance.secondary_nodes[0]
4891     env = _BuildInstanceHookEnvByObject(self, instance)
4892     env["MIGRATE_LIVE"] = self.op.live
4893     env["MIGRATE_CLEANUP"] = self.op.cleanup
4894     env.update({
4895         "OLD_PRIMARY": source_node,
4896         "OLD_SECONDARY": target_node,
4897         "NEW_PRIMARY": target_node,
4898         "NEW_SECONDARY": source_node,
4899         })
4900     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4901     nl_post = list(nl)
4902     nl_post.append(source_node)
4903     return env, nl, nl_post
4904
4905
4906 class LUMoveInstance(LogicalUnit):
4907   """Move an instance by data-copying.
4908
4909   """
4910   HPATH = "instance-move"
4911   HTYPE = constants.HTYPE_INSTANCE
4912   _OP_REQP = ["instance_name", "target_node"]
4913   REQ_BGL = False
4914
4915   def CheckArguments(self):
4916     """Check the arguments.
4917
4918     """
4919     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4920                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4921
4922   def ExpandNames(self):
4923     self._ExpandAndLockInstance()
4924     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4925     self.op.target_node = target_node
4926     self.needed_locks[locking.LEVEL_NODE] = [target_node]
4927     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4928
4929   def DeclareLocks(self, level):
4930     if level == locking.LEVEL_NODE:
4931       self._LockInstancesNodes(primary_only=True)
4932
4933   def BuildHooksEnv(self):
4934     """Build hooks env.
4935
4936     This runs on master, primary and secondary nodes of the instance.
4937
4938     """
4939     env = {
4940       "TARGET_NODE": self.op.target_node,
4941       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4942       }
4943     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4944     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4945                                        self.op.target_node]
4946     return env, nl, nl
4947
4948   def CheckPrereq(self):
4949     """Check prerequisites.
4950
4951     This checks that the instance is in the cluster.
4952
4953     """
4954     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4955     assert self.instance is not None, \
4956       "Cannot retrieve locked instance %s" % self.op.instance_name
4957
4958     node = self.cfg.GetNodeInfo(self.op.target_node)
4959     assert node is not None, \
4960       "Cannot retrieve locked node %s" % self.op.target_node
4961
4962     self.target_node = target_node = node.name
4963
4964     if target_node == instance.primary_node:
4965       raise errors.OpPrereqError("Instance %s is already on the node %s" %
4966                                  (instance.name, target_node),
4967                                  errors.ECODE_STATE)
4968
4969     bep = self.cfg.GetClusterInfo().FillBE(instance)
4970
4971     for idx, dsk in enumerate(instance.disks):
4972       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4973         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4974                                    " cannot copy" % idx, errors.ECODE_STATE)
4975
4976     _CheckNodeOnline(self, target_node)
4977     _CheckNodeNotDrained(self, target_node)
4978
4979     if instance.admin_up:
4980       # check memory requirements on the secondary node
4981       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4982                            instance.name, bep[constants.BE_MEMORY],
4983                            instance.hypervisor)
4984     else:
4985       self.LogInfo("Not checking memory on the secondary node as"
4986                    " instance will not be started")
4987
4988     # check bridge existance
4989     _CheckInstanceBridgesExist(self, instance, node=target_node)
4990
4991   def Exec(self, feedback_fn):
4992     """Move an instance.
4993
4994     The move is done by shutting it down on its present node, copying
4995     the data over (slow) and starting it on the new node.
4996
4997     """
4998     instance = self.instance
4999
5000     source_node = instance.primary_node
5001     target_node = self.target_node
5002
5003     self.LogInfo("Shutting down instance %s on source node %s",
5004                  instance.name, source_node)
5005
5006     result = self.rpc.call_instance_shutdown(source_node, instance,
5007                                              self.shutdown_timeout)
5008     msg = result.fail_msg
5009     if msg:
5010       if self.op.ignore_consistency:
5011         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5012                              " Proceeding anyway. Please make sure node"
5013                              " %s is down. Error details: %s",
5014                              instance.name, source_node, source_node, msg)
5015       else:
5016         raise errors.OpExecError("Could not shutdown instance %s on"
5017                                  " node %s: %s" %
5018                                  (instance.name, source_node, msg))
5019
5020     # create the target disks
5021     try:
5022       _CreateDisks(self, instance, target_node=target_node)
5023     except errors.OpExecError:
5024       self.LogWarning("Device creation failed, reverting...")
5025       try:
5026         _RemoveDisks(self, instance, target_node=target_node)
5027       finally:
5028         self.cfg.ReleaseDRBDMinors(instance.name)
5029         raise
5030
5031     cluster_name = self.cfg.GetClusterInfo().cluster_name
5032
5033     errs = []
5034     # activate, get path, copy the data over
5035     for idx, disk in enumerate(instance.disks):
5036       self.LogInfo("Copying data for disk %d", idx)
5037       result = self.rpc.call_blockdev_assemble(target_node, disk,
5038                                                instance.name, True)
5039       if result.fail_msg:
5040         self.LogWarning("Can't assemble newly created disk %d: %s",
5041                         idx, result.fail_msg)
5042         errs.append(result.fail_msg)
5043         break
5044       dev_path = result.payload
5045       result = self.rpc.call_blockdev_export(source_node, disk,
5046                                              target_node, dev_path,
5047                                              cluster_name)
5048       if result.fail_msg:
5049         self.LogWarning("Can't copy data over for disk %d: %s",
5050                         idx, result.fail_msg)
5051         errs.append(result.fail_msg)
5052         break
5053
5054     if errs:
5055       self.LogWarning("Some disks failed to copy, aborting")
5056       try:
5057         _RemoveDisks(self, instance, target_node=target_node)
5058       finally:
5059         self.cfg.ReleaseDRBDMinors(instance.name)
5060         raise errors.OpExecError("Errors during disk copy: %s" %
5061                                  (",".join(errs),))
5062
5063     instance.primary_node = target_node
5064     self.cfg.Update(instance, feedback_fn)
5065
5066     self.LogInfo("Removing the disks on the original node")
5067     _RemoveDisks(self, instance, target_node=source_node)
5068
5069     # Only start the instance if it's marked as up
5070     if instance.admin_up:
5071       self.LogInfo("Starting instance %s on node %s",
5072                    instance.name, target_node)
5073
5074       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5075                                            ignore_secondaries=True)
5076       if not disks_ok:
5077         _ShutdownInstanceDisks(self, instance)
5078         raise errors.OpExecError("Can't activate the instance's disks")
5079
5080       result = self.rpc.call_instance_start(target_node, instance, None, None)
5081       msg = result.fail_msg
5082       if msg:
5083         _ShutdownInstanceDisks(self, instance)
5084         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5085                                  (instance.name, target_node, msg))
5086
5087
5088 class LUMigrateNode(LogicalUnit):
5089   """Migrate all instances from a node.
5090
5091   """
5092   HPATH = "node-migrate"
5093   HTYPE = constants.HTYPE_NODE
5094   _OP_REQP = ["node_name", "live"]
5095   REQ_BGL = False
5096
5097   def ExpandNames(self):
5098     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5099
5100     self.needed_locks = {
5101       locking.LEVEL_NODE: [self.op.node_name],
5102       }
5103
5104     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5105
5106     # Create tasklets for migrating instances for all instances on this node
5107     names = []
5108     tasklets = []
5109
5110     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5111       logging.debug("Migrating instance %s", inst.name)
5112       names.append(inst.name)
5113
5114       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5115
5116     self.tasklets = tasklets
5117
5118     # Declare instance locks
5119     self.needed_locks[locking.LEVEL_INSTANCE] = names
5120
5121   def DeclareLocks(self, level):
5122     if level == locking.LEVEL_NODE:
5123       self._LockInstancesNodes()
5124
5125   def BuildHooksEnv(self):
5126     """Build hooks env.
5127
5128     This runs on the master, the primary and all the secondaries.
5129
5130     """
5131     env = {
5132       "NODE_NAME": self.op.node_name,
5133       }
5134
5135     nl = [self.cfg.GetMasterNode()]
5136
5137     return (env, nl, nl)
5138
5139
5140 class TLMigrateInstance(Tasklet):
5141   def __init__(self, lu, instance_name, live, cleanup):
5142     """Initializes this class.
5143
5144     """
5145     Tasklet.__init__(self, lu)
5146
5147     # Parameters
5148     self.instance_name = instance_name
5149     self.live = live
5150     self.cleanup = cleanup
5151
5152   def CheckPrereq(self):
5153     """Check prerequisites.
5154
5155     This checks that the instance is in the cluster.
5156
5157     """
5158     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5159     instance = self.cfg.GetInstanceInfo(instance_name)
5160     assert instance is not None
5161
5162     if instance.disk_template != constants.DT_DRBD8:
5163       raise errors.OpPrereqError("Instance's disk layout is not"
5164                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5165
5166     secondary_nodes = instance.secondary_nodes
5167     if not secondary_nodes:
5168       raise errors.ConfigurationError("No secondary node but using"
5169                                       " drbd8 disk template")
5170
5171     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5172
5173     target_node = secondary_nodes[0]
5174     # check memory requirements on the secondary node
5175     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5176                          instance.name, i_be[constants.BE_MEMORY],
5177                          instance.hypervisor)
5178
5179     # check bridge existance
5180     _CheckInstanceBridgesExist(self, instance, node=target_node)
5181
5182     if not self.cleanup:
5183       _CheckNodeNotDrained(self, target_node)
5184       result = self.rpc.call_instance_migratable(instance.primary_node,
5185                                                  instance)
5186       result.Raise("Can't migrate, please use failover",
5187                    prereq=True, ecode=errors.ECODE_STATE)
5188
5189     self.instance = instance
5190
5191   def _WaitUntilSync(self):
5192     """Poll with custom rpc for disk sync.
5193
5194     This uses our own step-based rpc call.
5195
5196     """
5197     self.feedback_fn("* wait until resync is done")
5198     all_done = False
5199     while not all_done:
5200       all_done = True
5201       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5202                                             self.nodes_ip,
5203                                             self.instance.disks)
5204       min_percent = 100
5205       for node, nres in result.items():
5206         nres.Raise("Cannot resync disks on node %s" % node)
5207         node_done, node_percent = nres.payload
5208         all_done = all_done and node_done
5209         if node_percent is not None:
5210           min_percent = min(min_percent, node_percent)
5211       if not all_done:
5212         if min_percent < 100:
5213           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5214         time.sleep(2)
5215
5216   def _EnsureSecondary(self, node):
5217     """Demote a node to secondary.
5218
5219     """
5220     self.feedback_fn("* switching node %s to secondary mode" % node)
5221
5222     for dev in self.instance.disks:
5223       self.cfg.SetDiskID(dev, node)
5224
5225     result = self.rpc.call_blockdev_close(node, self.instance.name,
5226                                           self.instance.disks)
5227     result.Raise("Cannot change disk to secondary on node %s" % node)
5228
5229   def _GoStandalone(self):
5230     """Disconnect from the network.
5231
5232     """
5233     self.feedback_fn("* changing into standalone mode")
5234     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5235                                                self.instance.disks)
5236     for node, nres in result.items():
5237       nres.Raise("Cannot disconnect disks node %s" % node)
5238
5239   def _GoReconnect(self, multimaster):
5240     """Reconnect to the network.
5241
5242     """
5243     if multimaster:
5244       msg = "dual-master"
5245     else:
5246       msg = "single-master"
5247     self.feedback_fn("* changing disks into %s mode" % msg)
5248     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5249                                            self.instance.disks,
5250                                            self.instance.name, multimaster)
5251     for node, nres in result.items():
5252       nres.Raise("Cannot change disks config on node %s" % node)
5253
5254   def _ExecCleanup(self):
5255     """Try to cleanup after a failed migration.
5256
5257     The cleanup is done by:
5258       - check that the instance is running only on one node
5259         (and update the config if needed)
5260       - change disks on its secondary node to secondary
5261       - wait until disks are fully synchronized
5262       - disconnect from the network
5263       - change disks into single-master mode
5264       - wait again until disks are fully synchronized
5265
5266     """
5267     instance = self.instance
5268     target_node = self.target_node
5269     source_node = self.source_node
5270
5271     # check running on only one node
5272     self.feedback_fn("* checking where the instance actually runs"
5273                      " (if this hangs, the hypervisor might be in"
5274                      " a bad state)")
5275     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5276     for node, result in ins_l.items():
5277       result.Raise("Can't contact node %s" % node)
5278
5279     runningon_source = instance.name in ins_l[source_node].payload
5280     runningon_target = instance.name in ins_l[target_node].payload
5281
5282     if runningon_source and runningon_target:
5283       raise errors.OpExecError("Instance seems to be running on two nodes,"
5284                                " or the hypervisor is confused. You will have"
5285                                " to ensure manually that it runs only on one"
5286                                " and restart this operation.")
5287
5288     if not (runningon_source or runningon_target):
5289       raise errors.OpExecError("Instance does not seem to be running at all."
5290                                " In this case, it's safer to repair by"
5291                                " running 'gnt-instance stop' to ensure disk"
5292                                " shutdown, and then restarting it.")
5293
5294     if runningon_target:
5295       # the migration has actually succeeded, we need to update the config
5296       self.feedback_fn("* instance running on secondary node (%s),"
5297                        " updating config" % target_node)
5298       instance.primary_node = target_node
5299       self.cfg.Update(instance, self.feedback_fn)
5300       demoted_node = source_node
5301     else:
5302       self.feedback_fn("* instance confirmed to be running on its"
5303                        " primary node (%s)" % source_node)
5304       demoted_node = target_node
5305
5306     self._EnsureSecondary(demoted_node)
5307     try:
5308       self._WaitUntilSync()
5309     except errors.OpExecError:
5310       # we ignore here errors, since if the device is standalone, it
5311       # won't be able to sync
5312       pass
5313     self._GoStandalone()
5314     self._GoReconnect(False)
5315     self._WaitUntilSync()
5316
5317     self.feedback_fn("* done")
5318
5319   def _RevertDiskStatus(self):
5320     """Try to revert the disk status after a failed migration.
5321
5322     """
5323     target_node = self.target_node
5324     try:
5325       self._EnsureSecondary(target_node)
5326       self._GoStandalone()
5327       self._GoReconnect(False)
5328       self._WaitUntilSync()
5329     except errors.OpExecError, err:
5330       self.lu.LogWarning("Migration failed and I can't reconnect the"
5331                          " drives: error '%s'\n"
5332                          "Please look and recover the instance status" %
5333                          str(err))
5334
5335   def _AbortMigration(self):
5336     """Call the hypervisor code to abort a started migration.
5337
5338     """
5339     instance = self.instance
5340     target_node = self.target_node
5341     migration_info = self.migration_info
5342
5343     abort_result = self.rpc.call_finalize_migration(target_node,
5344                                                     instance,
5345                                                     migration_info,
5346                                                     False)
5347     abort_msg = abort_result.fail_msg
5348     if abort_msg:
5349       logging.error("Aborting migration failed on target node %s: %s",
5350                     target_node, abort_msg)
5351       # Don't raise an exception here, as we stil have to try to revert the
5352       # disk status, even if this step failed.
5353
5354   def _ExecMigration(self):
5355     """Migrate an instance.
5356
5357     The migrate is done by:
5358       - change the disks into dual-master mode
5359       - wait until disks are fully synchronized again
5360       - migrate the instance
5361       - change disks on the new secondary node (the old primary) to secondary
5362       - wait until disks are fully synchronized
5363       - change disks into single-master mode
5364
5365     """
5366     instance = self.instance
5367     target_node = self.target_node
5368     source_node = self.source_node
5369
5370     self.feedback_fn("* checking disk consistency between source and target")
5371     for dev in instance.disks:
5372       if not _CheckDiskConsistency(self, dev, target_node, False):
5373         raise errors.OpExecError("Disk %s is degraded or not fully"
5374                                  " synchronized on target node,"
5375                                  " aborting migrate." % dev.iv_name)
5376
5377     # First get the migration information from the remote node
5378     result = self.rpc.call_migration_info(source_node, instance)
5379     msg = result.fail_msg
5380     if msg:
5381       log_err = ("Failed fetching source migration information from %s: %s" %
5382                  (source_node, msg))
5383       logging.error(log_err)
5384       raise errors.OpExecError(log_err)
5385
5386     self.migration_info = migration_info = result.payload
5387
5388     # Then switch the disks to master/master mode
5389     self._EnsureSecondary(target_node)
5390     self._GoStandalone()
5391     self._GoReconnect(True)
5392     self._WaitUntilSync()
5393
5394     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5395     result = self.rpc.call_accept_instance(target_node,
5396                                            instance,
5397                                            migration_info,
5398                                            self.nodes_ip[target_node])
5399
5400     msg = result.fail_msg
5401     if msg:
5402       logging.error("Instance pre-migration failed, trying to revert"
5403                     " disk status: %s", msg)
5404       self.feedback_fn("Pre-migration failed, aborting")
5405       self._AbortMigration()
5406       self._RevertDiskStatus()
5407       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5408                                (instance.name, msg))
5409
5410     self.feedback_fn("* migrating instance to %s" % target_node)
5411     time.sleep(10)
5412     result = self.rpc.call_instance_migrate(source_node, instance,
5413                                             self.nodes_ip[target_node],
5414                                             self.live)
5415     msg = result.fail_msg
5416     if msg:
5417       logging.error("Instance migration failed, trying to revert"
5418                     " disk status: %s", msg)
5419       self.feedback_fn("Migration failed, aborting")
5420       self._AbortMigration()
5421       self._RevertDiskStatus()
5422       raise errors.OpExecError("Could not migrate instance %s: %s" %
5423                                (instance.name, msg))
5424     time.sleep(10)
5425
5426     instance.primary_node = target_node
5427     # distribute new instance config to the other nodes
5428     self.cfg.Update(instance, self.feedback_fn)
5429
5430     result = self.rpc.call_finalize_migration(target_node,
5431                                               instance,
5432                                               migration_info,
5433                                               True)
5434     msg = result.fail_msg
5435     if msg:
5436       logging.error("Instance migration succeeded, but finalization failed:"
5437                     " %s", msg)
5438       raise errors.OpExecError("Could not finalize instance migration: %s" %
5439                                msg)
5440
5441     self._EnsureSecondary(source_node)
5442     self._WaitUntilSync()
5443     self._GoStandalone()
5444     self._GoReconnect(False)
5445     self._WaitUntilSync()
5446
5447     self.feedback_fn("* done")
5448
5449   def Exec(self, feedback_fn):
5450     """Perform the migration.
5451
5452     """
5453     feedback_fn("Migrating instance %s" % self.instance.name)
5454
5455     self.feedback_fn = feedback_fn
5456
5457     self.source_node = self.instance.primary_node
5458     self.target_node = self.instance.secondary_nodes[0]
5459     self.all_nodes = [self.source_node, self.target_node]
5460     self.nodes_ip = {
5461       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5462       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5463       }
5464
5465     if self.cleanup:
5466       return self._ExecCleanup()
5467     else:
5468       return self._ExecMigration()
5469
5470
5471 def _CreateBlockDev(lu, node, instance, device, force_create,
5472                     info, force_open):
5473   """Create a tree of block devices on a given node.
5474
5475   If this device type has to be created on secondaries, create it and
5476   all its children.
5477
5478   If not, just recurse to children keeping the same 'force' value.
5479
5480   @param lu: the lu on whose behalf we execute
5481   @param node: the node on which to create the device
5482   @type instance: L{objects.Instance}
5483   @param instance: the instance which owns the device
5484   @type device: L{objects.Disk}
5485   @param device: the device to create
5486   @type force_create: boolean
5487   @param force_create: whether to force creation of this device; this
5488       will be change to True whenever we find a device which has
5489       CreateOnSecondary() attribute
5490   @param info: the extra 'metadata' we should attach to the device
5491       (this will be represented as a LVM tag)
5492   @type force_open: boolean
5493   @param force_open: this parameter will be passes to the
5494       L{backend.BlockdevCreate} function where it specifies
5495       whether we run on primary or not, and it affects both
5496       the child assembly and the device own Open() execution
5497
5498   """
5499   if device.CreateOnSecondary():
5500     force_create = True
5501
5502   if device.children:
5503     for child in device.children:
5504       _CreateBlockDev(lu, node, instance, child, force_create,
5505                       info, force_open)
5506
5507   if not force_create:
5508     return
5509
5510   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5511
5512
5513 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5514   """Create a single block device on a given node.
5515
5516   This will not recurse over children of the device, so they must be
5517   created in advance.
5518
5519   @param lu: the lu on whose behalf we execute
5520   @param node: the node on which to create the device
5521   @type instance: L{objects.Instance}
5522   @param instance: the instance which owns the device
5523   @type device: L{objects.Disk}
5524   @param device: the device to create
5525   @param info: the extra 'metadata' we should attach to the device
5526       (this will be represented as a LVM tag)
5527   @type force_open: boolean
5528   @param force_open: this parameter will be passes to the
5529       L{backend.BlockdevCreate} function where it specifies
5530       whether we run on primary or not, and it affects both
5531       the child assembly and the device own Open() execution
5532
5533   """
5534   lu.cfg.SetDiskID(device, node)
5535   result = lu.rpc.call_blockdev_create(node, device, device.size,
5536                                        instance.name, force_open, info)
5537   result.Raise("Can't create block device %s on"
5538                " node %s for instance %s" % (device, node, instance.name))
5539   if device.physical_id is None:
5540     device.physical_id = result.payload
5541
5542
5543 def _GenerateUniqueNames(lu, exts):
5544   """Generate a suitable LV name.
5545
5546   This will generate a logical volume name for the given instance.
5547
5548   """
5549   results = []
5550   for val in exts:
5551     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5552     results.append("%s%s" % (new_id, val))
5553   return results
5554
5555
5556 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5557                          p_minor, s_minor):
5558   """Generate a drbd8 device complete with its children.
5559
5560   """
5561   port = lu.cfg.AllocatePort()
5562   vgname = lu.cfg.GetVGName()
5563   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5564   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5565                           logical_id=(vgname, names[0]))
5566   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5567                           logical_id=(vgname, names[1]))
5568   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5569                           logical_id=(primary, secondary, port,
5570                                       p_minor, s_minor,
5571                                       shared_secret),
5572                           children=[dev_data, dev_meta],
5573                           iv_name=iv_name)
5574   return drbd_dev
5575
5576
5577 def _GenerateDiskTemplate(lu, template_name,
5578                           instance_name, primary_node,
5579                           secondary_nodes, disk_info,
5580                           file_storage_dir, file_driver,
5581                           base_index):
5582   """Generate the entire disk layout for a given template type.
5583
5584   """
5585   #TODO: compute space requirements
5586
5587   vgname = lu.cfg.GetVGName()
5588   disk_count = len(disk_info)
5589   disks = []
5590   if template_name == constants.DT_DISKLESS:
5591     pass
5592   elif template_name == constants.DT_PLAIN:
5593     if len(secondary_nodes) != 0:
5594       raise errors.ProgrammerError("Wrong template configuration")
5595
5596     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5597                                       for i in range(disk_count)])
5598     for idx, disk in enumerate(disk_info):
5599       disk_index = idx + base_index
5600       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5601                               logical_id=(vgname, names[idx]),
5602                               iv_name="disk/%d" % disk_index,
5603                               mode=disk["mode"])
5604       disks.append(disk_dev)
5605   elif template_name == constants.DT_DRBD8:
5606     if len(secondary_nodes) != 1:
5607       raise errors.ProgrammerError("Wrong template configuration")
5608     remote_node = secondary_nodes[0]
5609     minors = lu.cfg.AllocateDRBDMinor(
5610       [primary_node, remote_node] * len(disk_info), instance_name)
5611
5612     names = []
5613     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5614                                                for i in range(disk_count)]):
5615       names.append(lv_prefix + "_data")
5616       names.append(lv_prefix + "_meta")
5617     for idx, disk in enumerate(disk_info):
5618       disk_index = idx + base_index
5619       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5620                                       disk["size"], names[idx*2:idx*2+2],
5621                                       "disk/%d" % disk_index,
5622                                       minors[idx*2], minors[idx*2+1])
5623       disk_dev.mode = disk["mode"]
5624       disks.append(disk_dev)
5625   elif template_name == constants.DT_FILE:
5626     if len(secondary_nodes) != 0:
5627       raise errors.ProgrammerError("Wrong template configuration")
5628
5629     for idx, disk in enumerate(disk_info):
5630       disk_index = idx + base_index
5631       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5632                               iv_name="disk/%d" % disk_index,
5633                               logical_id=(file_driver,
5634                                           "%s/disk%d" % (file_storage_dir,
5635                                                          disk_index)),
5636                               mode=disk["mode"])
5637       disks.append(disk_dev)
5638   else:
5639     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5640   return disks
5641
5642
5643 def _GetInstanceInfoText(instance):
5644   """Compute that text that should be added to the disk's metadata.
5645
5646   """
5647   return "originstname+%s" % instance.name
5648
5649
5650 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5651   """Create all disks for an instance.
5652
5653   This abstracts away some work from AddInstance.
5654
5655   @type lu: L{LogicalUnit}
5656   @param lu: the logical unit on whose behalf we execute
5657   @type instance: L{objects.Instance}
5658   @param instance: the instance whose disks we should create
5659   @type to_skip: list
5660   @param to_skip: list of indices to skip
5661   @type target_node: string
5662   @param target_node: if passed, overrides the target node for creation
5663   @rtype: boolean
5664   @return: the success of the creation
5665
5666   """
5667   info = _GetInstanceInfoText(instance)
5668   if target_node is None:
5669     pnode = instance.primary_node
5670     all_nodes = instance.all_nodes
5671   else:
5672     pnode = target_node
5673     all_nodes = [pnode]
5674
5675   if instance.disk_template == constants.DT_FILE:
5676     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5677     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5678
5679     result.Raise("Failed to create directory '%s' on"
5680                  " node %s" % (file_storage_dir, pnode))
5681
5682   # Note: this needs to be kept in sync with adding of disks in
5683   # LUSetInstanceParams
5684   for idx, device in enumerate(instance.disks):
5685     if to_skip and idx in to_skip:
5686       continue
5687     logging.info("Creating volume %s for instance %s",
5688                  device.iv_name, instance.name)
5689     #HARDCODE
5690     for node in all_nodes:
5691       f_create = node == pnode
5692       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5693
5694
5695 def _RemoveDisks(lu, instance, target_node=None):
5696   """Remove all disks for an instance.
5697
5698   This abstracts away some work from `AddInstance()` and
5699   `RemoveInstance()`. Note that in case some of the devices couldn't
5700   be removed, the removal will continue with the other ones (compare
5701   with `_CreateDisks()`).
5702
5703   @type lu: L{LogicalUnit}
5704   @param lu: the logical unit on whose behalf we execute
5705   @type instance: L{objects.Instance}
5706   @param instance: the instance whose disks we should remove
5707   @type target_node: string
5708   @param target_node: used to override the node on which to remove the disks
5709   @rtype: boolean
5710   @return: the success of the removal
5711
5712   """
5713   logging.info("Removing block devices for instance %s", instance.name)
5714
5715   all_result = True
5716   for device in instance.disks:
5717     if target_node:
5718       edata = [(target_node, device)]
5719     else:
5720       edata = device.ComputeNodeTree(instance.primary_node)
5721     for node, disk in edata:
5722       lu.cfg.SetDiskID(disk, node)
5723       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5724       if msg:
5725         lu.LogWarning("Could not remove block device %s on node %s,"
5726                       " continuing anyway: %s", device.iv_name, node, msg)
5727         all_result = False
5728
5729   if instance.disk_template == constants.DT_FILE:
5730     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5731     if target_node:
5732       tgt = target_node
5733     else:
5734       tgt = instance.primary_node
5735     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5736     if result.fail_msg:
5737       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5738                     file_storage_dir, instance.primary_node, result.fail_msg)
5739       all_result = False
5740
5741   return all_result
5742
5743
5744 def _ComputeDiskSize(disk_template, disks):
5745   """Compute disk size requirements in the volume group
5746
5747   """
5748   # Required free disk space as a function of disk and swap space
5749   req_size_dict = {
5750     constants.DT_DISKLESS: None,
5751     constants.DT_PLAIN: sum(d["size"] for d in disks),
5752     # 128 MB are added for drbd metadata for each disk
5753     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5754     constants.DT_FILE: None,
5755   }
5756
5757   if disk_template not in req_size_dict:
5758     raise errors.ProgrammerError("Disk template '%s' size requirement"
5759                                  " is unknown" %  disk_template)
5760
5761   return req_size_dict[disk_template]
5762
5763
5764 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5765   """Hypervisor parameter validation.
5766
5767   This function abstract the hypervisor parameter validation to be
5768   used in both instance create and instance modify.
5769
5770   @type lu: L{LogicalUnit}
5771   @param lu: the logical unit for which we check
5772   @type nodenames: list
5773   @param nodenames: the list of nodes on which we should check
5774   @type hvname: string
5775   @param hvname: the name of the hypervisor we should use
5776   @type hvparams: dict
5777   @param hvparams: the parameters which we need to check
5778   @raise errors.OpPrereqError: if the parameters are not valid
5779
5780   """
5781   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5782                                                   hvname,
5783                                                   hvparams)
5784   for node in nodenames:
5785     info = hvinfo[node]
5786     if info.offline:
5787       continue
5788     info.Raise("Hypervisor parameter validation failed on node %s" % node)
5789
5790
5791 class LUCreateInstance(LogicalUnit):
5792   """Create an instance.
5793
5794   """
5795   HPATH = "instance-add"
5796   HTYPE = constants.HTYPE_INSTANCE
5797   _OP_REQP = ["instance_name", "disks", "disk_template",
5798               "mode", "start",
5799               "wait_for_sync", "ip_check", "nics",
5800               "hvparams", "beparams"]
5801   REQ_BGL = False
5802
5803   def CheckArguments(self):
5804     """Check arguments.
5805
5806     """
5807     # set optional parameters to none if they don't exist
5808     for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5809       if not hasattr(self.op, attr):
5810         setattr(self.op, attr, None)
5811
5812     # do not require name_check to ease forward/backward compatibility
5813     # for tools
5814     if not hasattr(self.op, "name_check"):
5815       self.op.name_check = True
5816     # validate/normalize the instance name
5817     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
5818     if self.op.ip_check and not self.op.name_check:
5819       # TODO: make the ip check more flexible and not depend on the name check
5820       raise errors.OpPrereqError("Cannot do ip checks without a name check",
5821                                  errors.ECODE_INVAL)
5822     if (self.op.disk_template == constants.DT_FILE and
5823         not constants.ENABLE_FILE_STORAGE):
5824       raise errors.OpPrereqError("File storage disabled at configure time",
5825                                  errors.ECODE_INVAL)
5826     # check disk information: either all adopt, or no adopt
5827     has_adopt = has_no_adopt = False
5828     for disk in self.op.disks:
5829       if "adopt" in disk:
5830         has_adopt = True
5831       else:
5832         has_no_adopt = True
5833     if has_adopt and has_no_adopt:
5834       raise errors.OpPrereqError("Either all disks have are adoped or none is",
5835                                  errors.ECODE_INVAL)
5836     if has_adopt:
5837       if self.op.disk_template != constants.DT_PLAIN:
5838         raise errors.OpPrereqError("Disk adoption is only supported for the"
5839                                    " 'plain' disk template",
5840                                    errors.ECODE_INVAL)
5841       if self.op.iallocator is not None:
5842         raise errors.OpPrereqError("Disk adoption not allowed with an"
5843                                    " iallocator script", errors.ECODE_INVAL)
5844       if self.op.mode == constants.INSTANCE_IMPORT:
5845         raise errors.OpPrereqError("Disk adoption not allowed for"
5846                                    " instance import", errors.ECODE_INVAL)
5847
5848     self.adopt_disks = has_adopt
5849
5850   def ExpandNames(self):
5851     """ExpandNames for CreateInstance.
5852
5853     Figure out the right locks for instance creation.
5854
5855     """
5856     self.needed_locks = {}
5857
5858     # cheap checks, mostly valid constants given
5859
5860     # verify creation mode
5861     if self.op.mode not in (constants.INSTANCE_CREATE,
5862                             constants.INSTANCE_IMPORT):
5863       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5864                                  self.op.mode, errors.ECODE_INVAL)
5865
5866     # disk template and mirror node verification
5867     _CheckDiskTemplate(self.op.disk_template)
5868
5869     if self.op.hypervisor is None:
5870       self.op.hypervisor = self.cfg.GetHypervisorType()
5871
5872     cluster = self.cfg.GetClusterInfo()
5873     enabled_hvs = cluster.enabled_hypervisors
5874     if self.op.hypervisor not in enabled_hvs:
5875       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5876                                  " cluster (%s)" % (self.op.hypervisor,
5877                                   ",".join(enabled_hvs)),
5878                                  errors.ECODE_STATE)
5879
5880     # check hypervisor parameter syntax (locally)
5881     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5882     filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5883                                   self.op.hvparams)
5884     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5885     hv_type.CheckParameterSyntax(filled_hvp)
5886     self.hv_full = filled_hvp
5887     # check that we don't specify global parameters on an instance
5888     _CheckGlobalHvParams(self.op.hvparams)
5889
5890     # fill and remember the beparams dict
5891     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5892     self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5893                                     self.op.beparams)
5894
5895     #### instance parameters check
5896
5897     # instance name verification
5898     if self.op.name_check:
5899       hostname1 = utils.GetHostInfo(self.op.instance_name)
5900       self.op.instance_name = instance_name = hostname1.name
5901       # used in CheckPrereq for ip ping check
5902       self.check_ip = hostname1.ip
5903     else:
5904       instance_name = self.op.instance_name
5905       self.check_ip = None
5906
5907     # this is just a preventive check, but someone might still add this
5908     # instance in the meantime, and creation will fail at lock-add time
5909     if instance_name in self.cfg.GetInstanceList():
5910       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5911                                  instance_name, errors.ECODE_EXISTS)
5912
5913     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5914
5915     # NIC buildup
5916     self.nics = []
5917     for idx, nic in enumerate(self.op.nics):
5918       nic_mode_req = nic.get("mode", None)
5919       nic_mode = nic_mode_req
5920       if nic_mode is None:
5921         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5922
5923       # in routed mode, for the first nic, the default ip is 'auto'
5924       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5925         default_ip_mode = constants.VALUE_AUTO
5926       else:
5927         default_ip_mode = constants.VALUE_NONE
5928
5929       # ip validity checks
5930       ip = nic.get("ip", default_ip_mode)
5931       if ip is None or ip.lower() == constants.VALUE_NONE:
5932         nic_ip = None
5933       elif ip.lower() == constants.VALUE_AUTO:
5934         if not self.op.name_check:
5935           raise errors.OpPrereqError("IP address set to auto but name checks"
5936                                      " have been skipped. Aborting.",
5937                                      errors.ECODE_INVAL)
5938         nic_ip = hostname1.ip
5939       else:
5940         if not utils.IsValidIP(ip):
5941           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5942                                      " like a valid IP" % ip,
5943                                      errors.ECODE_INVAL)
5944         nic_ip = ip
5945
5946       # TODO: check the ip address for uniqueness
5947       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5948         raise errors.OpPrereqError("Routed nic mode requires an ip address",
5949                                    errors.ECODE_INVAL)
5950
5951       # MAC address verification
5952       mac = nic.get("mac", constants.VALUE_AUTO)
5953       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5954         mac = utils.NormalizeAndValidateMac(mac)
5955
5956         try:
5957           self.cfg.ReserveMAC(mac, self.proc.GetECId())
5958         except errors.ReservationError:
5959           raise errors.OpPrereqError("MAC address %s already in use"
5960                                      " in cluster" % mac,
5961                                      errors.ECODE_NOTUNIQUE)
5962
5963       # bridge verification
5964       bridge = nic.get("bridge", None)
5965       link = nic.get("link", None)
5966       if bridge and link:
5967         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5968                                    " at the same time", errors.ECODE_INVAL)
5969       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5970         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5971                                    errors.ECODE_INVAL)
5972       elif bridge:
5973         link = bridge
5974
5975       nicparams = {}
5976       if nic_mode_req:
5977         nicparams[constants.NIC_MODE] = nic_mode_req
5978       if link:
5979         nicparams[constants.NIC_LINK] = link
5980
5981       check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5982                                       nicparams)
5983       objects.NIC.CheckParameterSyntax(check_params)
5984       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5985
5986     # disk checks/pre-build
5987     self.disks = []
5988     for disk in self.op.disks:
5989       mode = disk.get("mode", constants.DISK_RDWR)
5990       if mode not in constants.DISK_ACCESS_SET:
5991         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5992                                    mode, errors.ECODE_INVAL)
5993       size = disk.get("size", None)
5994       if size is None:
5995         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
5996       try:
5997         size = int(size)
5998       except (TypeError, ValueError):
5999         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6000                                    errors.ECODE_INVAL)
6001       new_disk = {"size": size, "mode": mode}
6002       if "adopt" in disk:
6003         new_disk["adopt"] = disk["adopt"]
6004       self.disks.append(new_disk)
6005
6006     # file storage checks
6007     if (self.op.file_driver and
6008         not self.op.file_driver in constants.FILE_DRIVER):
6009       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6010                                  self.op.file_driver, errors.ECODE_INVAL)
6011
6012     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6013       raise errors.OpPrereqError("File storage directory path not absolute",
6014                                  errors.ECODE_INVAL)
6015
6016     ### Node/iallocator related checks
6017     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6018       raise errors.OpPrereqError("One and only one of iallocator and primary"
6019                                  " node must be given",
6020                                  errors.ECODE_INVAL)
6021
6022     if self.op.iallocator:
6023       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6024     else:
6025       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6026       nodelist = [self.op.pnode]
6027       if self.op.snode is not None:
6028         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6029         nodelist.append(self.op.snode)
6030       self.needed_locks[locking.LEVEL_NODE] = nodelist
6031
6032     # in case of import lock the source node too
6033     if self.op.mode == constants.INSTANCE_IMPORT:
6034       src_node = getattr(self.op, "src_node", None)
6035       src_path = getattr(self.op, "src_path", None)
6036
6037       if src_path is None:
6038         self.op.src_path = src_path = self.op.instance_name
6039
6040       if src_node is None:
6041         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6042         self.op.src_node = None
6043         if os.path.isabs(src_path):
6044           raise errors.OpPrereqError("Importing an instance from an absolute"
6045                                      " path requires a source node option.",
6046                                      errors.ECODE_INVAL)
6047       else:
6048         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6049         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6050           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6051         if not os.path.isabs(src_path):
6052           self.op.src_path = src_path = \
6053             utils.PathJoin(constants.EXPORT_DIR, src_path)
6054
6055       # On import force_variant must be True, because if we forced it at
6056       # initial install, our only chance when importing it back is that it
6057       # works again!
6058       self.op.force_variant = True
6059
6060     else: # INSTANCE_CREATE
6061       if getattr(self.op, "os_type", None) is None:
6062         raise errors.OpPrereqError("No guest OS specified",
6063                                    errors.ECODE_INVAL)
6064       self.op.force_variant = getattr(self.op, "force_variant", False)
6065
6066   def _RunAllocator(self):
6067     """Run the allocator based on input opcode.
6068
6069     """
6070     nics = [n.ToDict() for n in self.nics]
6071     ial = IAllocator(self.cfg, self.rpc,
6072                      mode=constants.IALLOCATOR_MODE_ALLOC,
6073                      name=self.op.instance_name,
6074                      disk_template=self.op.disk_template,
6075                      tags=[],
6076                      os=self.op.os_type,
6077                      vcpus=self.be_full[constants.BE_VCPUS],
6078                      mem_size=self.be_full[constants.BE_MEMORY],
6079                      disks=self.disks,
6080                      nics=nics,
6081                      hypervisor=self.op.hypervisor,
6082                      )
6083
6084     ial.Run(self.op.iallocator)
6085
6086     if not ial.success:
6087       raise errors.OpPrereqError("Can't compute nodes using"
6088                                  " iallocator '%s': %s" %
6089                                  (self.op.iallocator, ial.info),
6090                                  errors.ECODE_NORES)
6091     if len(ial.result) != ial.required_nodes:
6092       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6093                                  " of nodes (%s), required %s" %
6094                                  (self.op.iallocator, len(ial.result),
6095                                   ial.required_nodes), errors.ECODE_FAULT)
6096     self.op.pnode = ial.result[0]
6097     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6098                  self.op.instance_name, self.op.iallocator,
6099                  utils.CommaJoin(ial.result))
6100     if ial.required_nodes == 2:
6101       self.op.snode = ial.result[1]
6102
6103   def BuildHooksEnv(self):
6104     """Build hooks env.
6105
6106     This runs on master, primary and secondary nodes of the instance.
6107
6108     """
6109     env = {
6110       "ADD_MODE": self.op.mode,
6111       }
6112     if self.op.mode == constants.INSTANCE_IMPORT:
6113       env["SRC_NODE"] = self.op.src_node
6114       env["SRC_PATH"] = self.op.src_path
6115       env["SRC_IMAGES"] = self.src_images
6116
6117     env.update(_BuildInstanceHookEnv(
6118       name=self.op.instance_name,
6119       primary_node=self.op.pnode,
6120       secondary_nodes=self.secondaries,
6121       status=self.op.start,
6122       os_type=self.op.os_type,
6123       memory=self.be_full[constants.BE_MEMORY],
6124       vcpus=self.be_full[constants.BE_VCPUS],
6125       nics=_NICListToTuple(self, self.nics),
6126       disk_template=self.op.disk_template,
6127       disks=[(d["size"], d["mode"]) for d in self.disks],
6128       bep=self.be_full,
6129       hvp=self.hv_full,
6130       hypervisor_name=self.op.hypervisor,
6131     ))
6132
6133     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6134           self.secondaries)
6135     return env, nl, nl
6136
6137
6138   def CheckPrereq(self):
6139     """Check prerequisites.
6140
6141     """
6142     if (not self.cfg.GetVGName() and
6143         self.op.disk_template not in constants.DTS_NOT_LVM):
6144       raise errors.OpPrereqError("Cluster does not support lvm-based"
6145                                  " instances", errors.ECODE_STATE)
6146
6147     if self.op.mode == constants.INSTANCE_IMPORT:
6148       src_node = self.op.src_node
6149       src_path = self.op.src_path
6150
6151       if src_node is None:
6152         locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6153         exp_list = self.rpc.call_export_list(locked_nodes)
6154         found = False
6155         for node in exp_list:
6156           if exp_list[node].fail_msg:
6157             continue
6158           if src_path in exp_list[node].payload:
6159             found = True
6160             self.op.src_node = src_node = node
6161             self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6162                                                          src_path)
6163             break
6164         if not found:
6165           raise errors.OpPrereqError("No export found for relative path %s" %
6166                                       src_path, errors.ECODE_INVAL)
6167
6168       _CheckNodeOnline(self, src_node)
6169       result = self.rpc.call_export_info(src_node, src_path)
6170       result.Raise("No export or invalid export found in dir %s" % src_path)
6171
6172       export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6173       if not export_info.has_section(constants.INISECT_EXP):
6174         raise errors.ProgrammerError("Corrupted export config",
6175                                      errors.ECODE_ENVIRON)
6176
6177       ei_version = export_info.get(constants.INISECT_EXP, 'version')
6178       if (int(ei_version) != constants.EXPORT_VERSION):
6179         raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6180                                    (ei_version, constants.EXPORT_VERSION),
6181                                    errors.ECODE_ENVIRON)
6182
6183       # Check that the new instance doesn't have less disks than the export
6184       instance_disks = len(self.disks)
6185       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6186       if instance_disks < export_disks:
6187         raise errors.OpPrereqError("Not enough disks to import."
6188                                    " (instance: %d, export: %d)" %
6189                                    (instance_disks, export_disks),
6190                                    errors.ECODE_INVAL)
6191
6192       self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6193       disk_images = []
6194       for idx in range(export_disks):
6195         option = 'disk%d_dump' % idx
6196         if export_info.has_option(constants.INISECT_INS, option):
6197           # FIXME: are the old os-es, disk sizes, etc. useful?
6198           export_name = export_info.get(constants.INISECT_INS, option)
6199           image = utils.PathJoin(src_path, export_name)
6200           disk_images.append(image)
6201         else:
6202           disk_images.append(False)
6203
6204       self.src_images = disk_images
6205
6206       old_name = export_info.get(constants.INISECT_INS, 'name')
6207       # FIXME: int() here could throw a ValueError on broken exports
6208       exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6209       if self.op.instance_name == old_name:
6210         for idx, nic in enumerate(self.nics):
6211           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6212             nic_mac_ini = 'nic%d_mac' % idx
6213             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6214
6215     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6216
6217     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6218     if self.op.ip_check:
6219       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6220         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6221                                    (self.check_ip, self.op.instance_name),
6222                                    errors.ECODE_NOTUNIQUE)
6223
6224     #### mac address generation
6225     # By generating here the mac address both the allocator and the hooks get
6226     # the real final mac address rather than the 'auto' or 'generate' value.
6227     # There is a race condition between the generation and the instance object
6228     # creation, which means that we know the mac is valid now, but we're not
6229     # sure it will be when we actually add the instance. If things go bad
6230     # adding the instance will abort because of a duplicate mac, and the
6231     # creation job will fail.
6232     for nic in self.nics:
6233       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6234         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6235
6236     #### allocator run
6237
6238     if self.op.iallocator is not None:
6239       self._RunAllocator()
6240
6241     #### node related checks
6242
6243     # check primary node
6244     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6245     assert self.pnode is not None, \
6246       "Cannot retrieve locked node %s" % self.op.pnode
6247     if pnode.offline:
6248       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6249                                  pnode.name, errors.ECODE_STATE)
6250     if pnode.drained:
6251       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6252                                  pnode.name, errors.ECODE_STATE)
6253
6254     self.secondaries = []
6255
6256     # mirror node verification
6257     if self.op.disk_template in constants.DTS_NET_MIRROR:
6258       if self.op.snode is None:
6259         raise errors.OpPrereqError("The networked disk templates need"
6260                                    " a mirror node", errors.ECODE_INVAL)
6261       if self.op.snode == pnode.name:
6262         raise errors.OpPrereqError("The secondary node cannot be the"
6263                                    " primary node.", errors.ECODE_INVAL)
6264       _CheckNodeOnline(self, self.op.snode)
6265       _CheckNodeNotDrained(self, self.op.snode)
6266       self.secondaries.append(self.op.snode)
6267
6268     nodenames = [pnode.name] + self.secondaries
6269
6270     req_size = _ComputeDiskSize(self.op.disk_template,
6271                                 self.disks)
6272
6273     # Check lv size requirements, if not adopting
6274     if req_size is not None and not self.adopt_disks:
6275       _CheckNodesFreeDisk(self, nodenames, req_size)
6276
6277     if self.adopt_disks: # instead, we must check the adoption data
6278       all_lvs = set([i["adopt"] for i in self.disks])
6279       if len(all_lvs) != len(self.disks):
6280         raise errors.OpPrereqError("Duplicate volume names given for adoption",
6281                                    errors.ECODE_INVAL)
6282       for lv_name in all_lvs:
6283         try:
6284           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6285         except errors.ReservationError:
6286           raise errors.OpPrereqError("LV named %s used by another instance" %
6287                                      lv_name, errors.ECODE_NOTUNIQUE)
6288
6289       node_lvs = self.rpc.call_lv_list([pnode.name],
6290                                        self.cfg.GetVGName())[pnode.name]
6291       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6292       node_lvs = node_lvs.payload
6293       delta = all_lvs.difference(node_lvs.keys())
6294       if delta:
6295         raise errors.OpPrereqError("Missing logical volume(s): %s" %
6296                                    utils.CommaJoin(delta),
6297                                    errors.ECODE_INVAL)
6298       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6299       if online_lvs:
6300         raise errors.OpPrereqError("Online logical volumes found, cannot"
6301                                    " adopt: %s" % utils.CommaJoin(online_lvs),
6302                                    errors.ECODE_STATE)
6303       # update the size of disk based on what is found
6304       for dsk in self.disks:
6305         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6306
6307     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6308
6309     # os verification
6310     result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6311     result.Raise("OS '%s' not in supported os list for primary node %s" %
6312                  (self.op.os_type, pnode.name),
6313                  prereq=True, ecode=errors.ECODE_INVAL)
6314     if not self.op.force_variant:
6315       _CheckOSVariant(result.payload, self.op.os_type)
6316
6317     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6318
6319     # memory check on primary node
6320     if self.op.start:
6321       _CheckNodeFreeMemory(self, self.pnode.name,
6322                            "creating instance %s" % self.op.instance_name,
6323                            self.be_full[constants.BE_MEMORY],
6324                            self.op.hypervisor)
6325
6326     self.dry_run_result = list(nodenames)
6327
6328   def Exec(self, feedback_fn):
6329     """Create and add the instance to the cluster.
6330
6331     """
6332     instance = self.op.instance_name
6333     pnode_name = self.pnode.name
6334
6335     ht_kind = self.op.hypervisor
6336     if ht_kind in constants.HTS_REQ_PORT:
6337       network_port = self.cfg.AllocatePort()
6338     else:
6339       network_port = None
6340
6341     ##if self.op.vnc_bind_address is None:
6342     ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6343
6344     # this is needed because os.path.join does not accept None arguments
6345     if self.op.file_storage_dir is None:
6346       string_file_storage_dir = ""
6347     else:
6348       string_file_storage_dir = self.op.file_storage_dir
6349
6350     # build the full file storage dir path
6351     file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6352                                       string_file_storage_dir, instance)
6353
6354
6355     disks = _GenerateDiskTemplate(self,
6356                                   self.op.disk_template,
6357                                   instance, pnode_name,
6358                                   self.secondaries,
6359                                   self.disks,
6360                                   file_storage_dir,
6361                                   self.op.file_driver,
6362                                   0)
6363
6364     iobj = objects.Instance(name=instance, os=self.op.os_type,
6365                             primary_node=pnode_name,
6366                             nics=self.nics, disks=disks,
6367                             disk_template=self.op.disk_template,
6368                             admin_up=False,
6369                             network_port=network_port,
6370                             beparams=self.op.beparams,
6371                             hvparams=self.op.hvparams,
6372                             hypervisor=self.op.hypervisor,
6373                             )
6374
6375     if self.adopt_disks:
6376       # rename LVs to the newly-generated names; we need to construct
6377       # 'fake' LV disks with the old data, plus the new unique_id
6378       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6379       rename_to = []
6380       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6381         rename_to.append(t_dsk.logical_id)
6382         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6383         self.cfg.SetDiskID(t_dsk, pnode_name)
6384       result = self.rpc.call_blockdev_rename(pnode_name,
6385                                              zip(tmp_disks, rename_to))
6386       result.Raise("Failed to rename adoped LVs")
6387     else:
6388       feedback_fn("* creating instance disks...")
6389       try:
6390         _CreateDisks(self, iobj)
6391       except errors.OpExecError:
6392         self.LogWarning("Device creation failed, reverting...")
6393         try:
6394           _RemoveDisks(self, iobj)
6395         finally:
6396           self.cfg.ReleaseDRBDMinors(instance)
6397           raise
6398
6399     feedback_fn("adding instance %s to cluster config" % instance)
6400
6401     self.cfg.AddInstance(iobj, self.proc.GetECId())
6402
6403     # Declare that we don't want to remove the instance lock anymore, as we've
6404     # added the instance to the config
6405     del self.remove_locks[locking.LEVEL_INSTANCE]
6406     # Unlock all the nodes
6407     if self.op.mode == constants.INSTANCE_IMPORT:
6408       nodes_keep = [self.op.src_node]
6409       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6410                        if node != self.op.src_node]
6411       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6412       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6413     else:
6414       self.context.glm.release(locking.LEVEL_NODE)
6415       del self.acquired_locks[locking.LEVEL_NODE]
6416
6417     if self.op.wait_for_sync:
6418       disk_abort = not _WaitForSync(self, iobj)
6419     elif iobj.disk_template in constants.DTS_NET_MIRROR:
6420       # make sure the disks are not degraded (still sync-ing is ok)
6421       time.sleep(15)
6422       feedback_fn("* checking mirrors status")
6423       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6424     else:
6425       disk_abort = False
6426
6427     if disk_abort:
6428       _RemoveDisks(self, iobj)
6429       self.cfg.RemoveInstance(iobj.name)
6430       # Make sure the instance lock gets removed
6431       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6432       raise errors.OpExecError("There are some degraded disks for"
6433                                " this instance")
6434
6435     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6436       if self.op.mode == constants.INSTANCE_CREATE:
6437         feedback_fn("* running the instance OS create scripts...")
6438         # FIXME: pass debug option from opcode to backend
6439         result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6440                                                self.op.debug_level)
6441         result.Raise("Could not add os for instance %s"
6442                      " on node %s" % (instance, pnode_name))
6443
6444       elif self.op.mode == constants.INSTANCE_IMPORT:
6445         feedback_fn("* running the instance OS import scripts...")
6446         src_node = self.op.src_node
6447         src_images = self.src_images
6448         cluster_name = self.cfg.GetClusterName()
6449         # FIXME: pass debug option from opcode to backend
6450         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6451                                                          src_node, src_images,
6452                                                          cluster_name,
6453                                                          self.op.debug_level)
6454         msg = import_result.fail_msg
6455         if msg:
6456           self.LogWarning("Error while importing the disk images for instance"
6457                           " %s on node %s: %s" % (instance, pnode_name, msg))
6458       else:
6459         # also checked in the prereq part
6460         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6461                                      % self.op.mode)
6462
6463     if self.op.start:
6464       iobj.admin_up = True
6465       self.cfg.Update(iobj, feedback_fn)
6466       logging.info("Starting instance %s on node %s", instance, pnode_name)
6467       feedback_fn("* starting instance...")
6468       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6469       result.Raise("Could not start instance")
6470
6471     return list(iobj.all_nodes)
6472
6473
6474 class LUConnectConsole(NoHooksLU):
6475   """Connect to an instance's console.
6476
6477   This is somewhat special in that it returns the command line that
6478   you need to run on the master node in order to connect to the
6479   console.
6480
6481   """
6482   _OP_REQP = ["instance_name"]
6483   REQ_BGL = False
6484
6485   def ExpandNames(self):
6486     self._ExpandAndLockInstance()
6487
6488   def CheckPrereq(self):
6489     """Check prerequisites.
6490
6491     This checks that the instance is in the cluster.
6492
6493     """
6494     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6495     assert self.instance is not None, \
6496       "Cannot retrieve locked instance %s" % self.op.instance_name
6497     _CheckNodeOnline(self, self.instance.primary_node)
6498
6499   def Exec(self, feedback_fn):
6500     """Connect to the console of an instance
6501
6502     """
6503     instance = self.instance
6504     node = instance.primary_node
6505
6506     node_insts = self.rpc.call_instance_list([node],
6507                                              [instance.hypervisor])[node]
6508     node_insts.Raise("Can't get node information from %s" % node)
6509
6510     if instance.name not in node_insts.payload:
6511       raise errors.OpExecError("Instance %s is not running." % instance.name)
6512
6513     logging.debug("Connecting to console of %s on %s", instance.name, node)
6514
6515     hyper = hypervisor.GetHypervisor(instance.hypervisor)
6516     cluster = self.cfg.GetClusterInfo()
6517     # beparams and hvparams are passed separately, to avoid editing the
6518     # instance and then saving the defaults in the instance itself.
6519     hvparams = cluster.FillHV(instance)
6520     beparams = cluster.FillBE(instance)
6521     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6522
6523     # build ssh cmdline
6524     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6525
6526
6527 class LUReplaceDisks(LogicalUnit):
6528   """Replace the disks of an instance.
6529
6530   """
6531   HPATH = "mirrors-replace"
6532   HTYPE = constants.HTYPE_INSTANCE
6533   _OP_REQP = ["instance_name", "mode", "disks"]
6534   REQ_BGL = False
6535
6536   def CheckArguments(self):
6537     if not hasattr(self.op, "remote_node"):
6538       self.op.remote_node = None
6539     if not hasattr(self.op, "iallocator"):
6540       self.op.iallocator = None
6541     if not hasattr(self.op, "early_release"):
6542       self.op.early_release = False
6543
6544     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6545                                   self.op.iallocator)
6546
6547   def ExpandNames(self):
6548     self._ExpandAndLockInstance()
6549
6550     if self.op.iallocator is not None:
6551       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6552
6553     elif self.op.remote_node is not None:
6554       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6555       self.op.remote_node = remote_node
6556
6557       # Warning: do not remove the locking of the new secondary here
6558       # unless DRBD8.AddChildren is changed to work in parallel;
6559       # currently it doesn't since parallel invocations of
6560       # FindUnusedMinor will conflict
6561       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6562       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6563
6564     else:
6565       self.needed_locks[locking.LEVEL_NODE] = []
6566       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6567
6568     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6569                                    self.op.iallocator, self.op.remote_node,
6570                                    self.op.disks, False, self.op.early_release)
6571
6572     self.tasklets = [self.replacer]
6573
6574   def DeclareLocks(self, level):
6575     # If we're not already locking all nodes in the set we have to declare the
6576     # instance's primary/secondary nodes.
6577     if (level == locking.LEVEL_NODE and
6578         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6579       self._LockInstancesNodes()
6580
6581   def BuildHooksEnv(self):
6582     """Build hooks env.
6583
6584     This runs on the master, the primary and all the secondaries.
6585
6586     """
6587     instance = self.replacer.instance
6588     env = {
6589       "MODE": self.op.mode,
6590       "NEW_SECONDARY": self.op.remote_node,
6591       "OLD_SECONDARY": instance.secondary_nodes[0],
6592       }
6593     env.update(_BuildInstanceHookEnvByObject(self, instance))
6594     nl = [
6595       self.cfg.GetMasterNode(),
6596       instance.primary_node,
6597       ]
6598     if self.op.remote_node is not None:
6599       nl.append(self.op.remote_node)
6600     return env, nl, nl
6601
6602
6603 class LUEvacuateNode(LogicalUnit):
6604   """Relocate the secondary instances from a node.
6605
6606   """
6607   HPATH = "node-evacuate"
6608   HTYPE = constants.HTYPE_NODE
6609   _OP_REQP = ["node_name"]
6610   REQ_BGL = False
6611
6612   def CheckArguments(self):
6613     if not hasattr(self.op, "remote_node"):
6614       self.op.remote_node = None
6615     if not hasattr(self.op, "iallocator"):
6616       self.op.iallocator = None
6617     if not hasattr(self.op, "early_release"):
6618       self.op.early_release = False
6619
6620     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6621                                   self.op.remote_node,
6622                                   self.op.iallocator)
6623
6624   def ExpandNames(self):
6625     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6626
6627     self.needed_locks = {}
6628
6629     # Declare node locks
6630     if self.op.iallocator is not None:
6631       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6632
6633     elif self.op.remote_node is not None:
6634       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6635
6636       # Warning: do not remove the locking of the new secondary here
6637       # unless DRBD8.AddChildren is changed to work in parallel;
6638       # currently it doesn't since parallel invocations of
6639       # FindUnusedMinor will conflict
6640       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6641       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6642
6643     else:
6644       raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6645
6646     # Create tasklets for replacing disks for all secondary instances on this
6647     # node
6648     names = []
6649     tasklets = []
6650
6651     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6652       logging.debug("Replacing disks for instance %s", inst.name)
6653       names.append(inst.name)
6654
6655       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6656                                 self.op.iallocator, self.op.remote_node, [],
6657                                 True, self.op.early_release)
6658       tasklets.append(replacer)
6659
6660     self.tasklets = tasklets
6661     self.instance_names = names
6662
6663     # Declare instance locks
6664     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6665
6666   def DeclareLocks(self, level):
6667     # If we're not already locking all nodes in the set we have to declare the
6668     # instance's primary/secondary nodes.
6669     if (level == locking.LEVEL_NODE and
6670         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6671       self._LockInstancesNodes()
6672
6673   def BuildHooksEnv(self):
6674     """Build hooks env.
6675
6676     This runs on the master, the primary and all the secondaries.
6677
6678     """
6679     env = {
6680       "NODE_NAME": self.op.node_name,
6681       }
6682
6683     nl = [self.cfg.GetMasterNode()]
6684
6685     if self.op.remote_node is not None:
6686       env["NEW_SECONDARY"] = self.op.remote_node
6687       nl.append(self.op.remote_node)
6688
6689     return (env, nl, nl)
6690
6691
6692 class TLReplaceDisks(Tasklet):
6693   """Replaces disks for an instance.
6694
6695   Note: Locking is not within the scope of this class.
6696
6697   """
6698   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6699                disks, delay_iallocator, early_release):
6700     """Initializes this class.
6701
6702     """
6703     Tasklet.__init__(self, lu)
6704
6705     # Parameters
6706     self.instance_name = instance_name
6707     self.mode = mode
6708     self.iallocator_name = iallocator_name
6709     self.remote_node = remote_node
6710     self.disks = disks
6711     self.delay_iallocator = delay_iallocator
6712     self.early_release = early_release
6713
6714     # Runtime data
6715     self.instance = None
6716     self.new_node = None
6717     self.target_node = None
6718     self.other_node = None
6719     self.remote_node_info = None
6720     self.node_secondary_ip = None
6721
6722   @staticmethod
6723   def CheckArguments(mode, remote_node, iallocator):
6724     """Helper function for users of this class.
6725
6726     """
6727     # check for valid parameter combination
6728     if mode == constants.REPLACE_DISK_CHG:
6729       if remote_node is None and iallocator is None:
6730         raise errors.OpPrereqError("When changing the secondary either an"
6731                                    " iallocator script must be used or the"
6732                                    " new node given", errors.ECODE_INVAL)
6733
6734       if remote_node is not None and iallocator is not None:
6735         raise errors.OpPrereqError("Give either the iallocator or the new"
6736                                    " secondary, not both", errors.ECODE_INVAL)
6737
6738     elif remote_node is not None or iallocator is not None:
6739       # Not replacing the secondary
6740       raise errors.OpPrereqError("The iallocator and new node options can"
6741                                  " only be used when changing the"
6742                                  " secondary node", errors.ECODE_INVAL)
6743
6744   @staticmethod
6745   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6746     """Compute a new secondary node using an IAllocator.
6747
6748     """
6749     ial = IAllocator(lu.cfg, lu.rpc,
6750                      mode=constants.IALLOCATOR_MODE_RELOC,
6751                      name=instance_name,
6752                      relocate_from=relocate_from)
6753
6754     ial.Run(iallocator_name)
6755
6756     if not ial.success:
6757       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6758                                  " %s" % (iallocator_name, ial.info),
6759                                  errors.ECODE_NORES)
6760
6761     if len(ial.result) != ial.required_nodes:
6762       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6763                                  " of nodes (%s), required %s" %
6764                                  (iallocator_name,
6765                                   len(ial.result), ial.required_nodes),
6766                                  errors.ECODE_FAULT)
6767
6768     remote_node_name = ial.result[0]
6769
6770     lu.LogInfo("Selected new secondary for instance '%s': %s",
6771                instance_name, remote_node_name)
6772
6773     return remote_node_name
6774
6775   def _FindFaultyDisks(self, node_name):
6776     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6777                                     node_name, True)
6778
6779   def CheckPrereq(self):
6780     """Check prerequisites.
6781
6782     This checks that the instance is in the cluster.
6783
6784     """
6785     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6786     assert instance is not None, \
6787       "Cannot retrieve locked instance %s" % self.instance_name
6788
6789     if instance.disk_template != constants.DT_DRBD8:
6790       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6791                                  " instances", errors.ECODE_INVAL)
6792
6793     if len(instance.secondary_nodes) != 1:
6794       raise errors.OpPrereqError("The instance has a strange layout,"
6795                                  " expected one secondary but found %d" %
6796                                  len(instance.secondary_nodes),
6797                                  errors.ECODE_FAULT)
6798
6799     if not self.delay_iallocator:
6800       self._CheckPrereq2()
6801
6802   def _CheckPrereq2(self):
6803     """Check prerequisites, second part.
6804
6805     This function should always be part of CheckPrereq. It was separated and is
6806     now called from Exec because during node evacuation iallocator was only
6807     called with an unmodified cluster model, not taking planned changes into
6808     account.
6809
6810     """
6811     instance = self.instance
6812     secondary_node = instance.secondary_nodes[0]
6813
6814     if self.iallocator_name is None:
6815       remote_node = self.remote_node
6816     else:
6817       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6818                                        instance.name, instance.secondary_nodes)
6819
6820     if remote_node is not None:
6821       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6822       assert self.remote_node_info is not None, \
6823         "Cannot retrieve locked node %s" % remote_node
6824     else:
6825       self.remote_node_info = None
6826
6827     if remote_node == self.instance.primary_node:
6828       raise errors.OpPrereqError("The specified node is the primary node of"
6829                                  " the instance.", errors.ECODE_INVAL)
6830
6831     if remote_node == secondary_node:
6832       raise errors.OpPrereqError("The specified node is already the"
6833                                  " secondary node of the instance.",
6834                                  errors.ECODE_INVAL)
6835
6836     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6837                                     constants.REPLACE_DISK_CHG):
6838       raise errors.OpPrereqError("Cannot specify disks to be replaced",
6839                                  errors.ECODE_INVAL)
6840
6841     if self.mode == constants.REPLACE_DISK_AUTO:
6842       faulty_primary = self._FindFaultyDisks(instance.primary_node)
6843       faulty_secondary = self._FindFaultyDisks(secondary_node)
6844
6845       if faulty_primary and faulty_secondary:
6846         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6847                                    " one node and can not be repaired"
6848                                    " automatically" % self.instance_name,
6849                                    errors.ECODE_STATE)
6850
6851       if faulty_primary:
6852         self.disks = faulty_primary
6853         self.target_node = instance.primary_node
6854         self.other_node = secondary_node
6855         check_nodes = [self.target_node, self.other_node]
6856       elif faulty_secondary:
6857         self.disks = faulty_secondary
6858         self.target_node = secondary_node
6859         self.other_node = instance.primary_node
6860         check_nodes = [self.target_node, self.other_node]
6861       else:
6862         self.disks = []
6863         check_nodes = []
6864
6865     else:
6866       # Non-automatic modes
6867       if self.mode == constants.REPLACE_DISK_PRI:
6868         self.target_node = instance.primary_node
6869         self.other_node = secondary_node
6870         check_nodes = [self.target_node, self.other_node]
6871
6872       elif self.mode == constants.REPLACE_DISK_SEC:
6873         self.target_node = secondary_node
6874         self.other_node = instance.primary_node
6875         check_nodes = [self.target_node, self.other_node]
6876
6877       elif self.mode == constants.REPLACE_DISK_CHG:
6878         self.new_node = remote_node
6879         self.other_node = instance.primary_node
6880         self.target_node = secondary_node
6881         check_nodes = [self.new_node, self.other_node]
6882
6883         _CheckNodeNotDrained(self.lu, remote_node)
6884
6885         old_node_info = self.cfg.GetNodeInfo(secondary_node)
6886         assert old_node_info is not None
6887         if old_node_info.offline and not self.early_release:
6888           # doesn't make sense to delay the release
6889           self.early_release = True
6890           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
6891                           " early-release mode", secondary_node)
6892
6893       else:
6894         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6895                                      self.mode)
6896
6897       # If not specified all disks should be replaced
6898       if not self.disks:
6899         self.disks = range(len(self.instance.disks))
6900
6901     for node in check_nodes:
6902       _CheckNodeOnline(self.lu, node)
6903
6904     # Check whether disks are valid
6905     for disk_idx in self.disks:
6906       instance.FindDisk(disk_idx)
6907
6908     # Get secondary node IP addresses
6909     node_2nd_ip = {}
6910
6911     for node_name in [self.target_node, self.other_node, self.new_node]:
6912       if node_name is not None:
6913         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6914
6915     self.node_secondary_ip = node_2nd_ip
6916
6917   def Exec(self, feedback_fn):
6918     """Execute disk replacement.
6919
6920     This dispatches the disk replacement to the appropriate handler.
6921
6922     """
6923     if self.delay_iallocator:
6924       self._CheckPrereq2()
6925
6926     if not self.disks:
6927       feedback_fn("No disks need replacement")
6928       return
6929
6930     feedback_fn("Replacing disk(s) %s for %s" %
6931                 (utils.CommaJoin(self.disks), self.instance.name))
6932
6933     activate_disks = (not self.instance.admin_up)
6934
6935     # Activate the instance disks if we're replacing them on a down instance
6936     if activate_disks:
6937       _StartInstanceDisks(self.lu, self.instance, True)
6938
6939     try:
6940       # Should we replace the secondary node?
6941       if self.new_node is not None:
6942         fn = self._ExecDrbd8Secondary
6943       else:
6944         fn = self._ExecDrbd8DiskOnly
6945
6946       return fn(feedback_fn)
6947
6948     finally:
6949       # Deactivate the instance disks if we're replacing them on a
6950       # down instance
6951       if activate_disks:
6952         _SafeShutdownInstanceDisks(self.lu, self.instance)
6953
6954   def _CheckVolumeGroup(self, nodes):
6955     self.lu.LogInfo("Checking volume groups")
6956
6957     vgname = self.cfg.GetVGName()
6958
6959     # Make sure volume group exists on all involved nodes
6960     results = self.rpc.call_vg_list(nodes)
6961     if not results:
6962       raise errors.OpExecError("Can't list volume groups on the nodes")
6963
6964     for node in nodes:
6965       res = results[node]
6966       res.Raise("Error checking node %s" % node)
6967       if vgname not in res.payload:
6968         raise errors.OpExecError("Volume group '%s' not found on node %s" %
6969                                  (vgname, node))
6970
6971   def _CheckDisksExistence(self, nodes):
6972     # Check disk existence
6973     for idx, dev in enumerate(self.instance.disks):
6974       if idx not in self.disks:
6975         continue
6976
6977       for node in nodes:
6978         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6979         self.cfg.SetDiskID(dev, node)
6980
6981         result = self.rpc.call_blockdev_find(node, dev)
6982
6983         msg = result.fail_msg
6984         if msg or not result.payload:
6985           if not msg:
6986             msg = "disk not found"
6987           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6988                                    (idx, node, msg))
6989
6990   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6991     for idx, dev in enumerate(self.instance.disks):
6992       if idx not in self.disks:
6993         continue
6994
6995       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6996                       (idx, node_name))
6997
6998       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6999                                    ldisk=ldisk):
7000         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7001                                  " replace disks for instance %s" %
7002                                  (node_name, self.instance.name))
7003
7004   def _CreateNewStorage(self, node_name):
7005     vgname = self.cfg.GetVGName()
7006     iv_names = {}
7007
7008     for idx, dev in enumerate(self.instance.disks):
7009       if idx not in self.disks:
7010         continue
7011
7012       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7013
7014       self.cfg.SetDiskID(dev, node_name)
7015
7016       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7017       names = _GenerateUniqueNames(self.lu, lv_names)
7018
7019       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7020                              logical_id=(vgname, names[0]))
7021       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7022                              logical_id=(vgname, names[1]))
7023
7024       new_lvs = [lv_data, lv_meta]
7025       old_lvs = dev.children
7026       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7027
7028       # we pass force_create=True to force the LVM creation
7029       for new_lv in new_lvs:
7030         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7031                         _GetInstanceInfoText(self.instance), False)
7032
7033     return iv_names
7034
7035   def _CheckDevices(self, node_name, iv_names):
7036     for name, (dev, _, _) in iv_names.iteritems():
7037       self.cfg.SetDiskID(dev, node_name)
7038
7039       result = self.rpc.call_blockdev_find(node_name, dev)
7040
7041       msg = result.fail_msg
7042       if msg or not result.payload:
7043         if not msg:
7044           msg = "disk not found"
7045         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7046                                  (name, msg))
7047
7048       if result.payload.is_degraded:
7049         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7050
7051   def _RemoveOldStorage(self, node_name, iv_names):
7052     for name, (_, old_lvs, _) in iv_names.iteritems():
7053       self.lu.LogInfo("Remove logical volumes for %s" % name)
7054
7055       for lv in old_lvs:
7056         self.cfg.SetDiskID(lv, node_name)
7057
7058         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7059         if msg:
7060           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7061                              hint="remove unused LVs manually")
7062
7063   def _ReleaseNodeLock(self, node_name):
7064     """Releases the lock for a given node."""
7065     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7066
7067   def _ExecDrbd8DiskOnly(self, feedback_fn):
7068     """Replace a disk on the primary or secondary for DRBD 8.
7069
7070     The algorithm for replace is quite complicated:
7071
7072       1. for each disk to be replaced:
7073
7074         1. create new LVs on the target node with unique names
7075         1. detach old LVs from the drbd device
7076         1. rename old LVs to name_replaced.<time_t>
7077         1. rename new LVs to old LVs
7078         1. attach the new LVs (with the old names now) to the drbd device
7079
7080       1. wait for sync across all devices
7081
7082       1. for each modified disk:
7083
7084         1. remove old LVs (which have the name name_replaces.<time_t>)
7085
7086     Failures are not very well handled.
7087
7088     """
7089     steps_total = 6
7090
7091     # Step: check device activation
7092     self.lu.LogStep(1, steps_total, "Check device existence")
7093     self._CheckDisksExistence([self.other_node, self.target_node])
7094     self._CheckVolumeGroup([self.target_node, self.other_node])
7095
7096     # Step: check other node consistency
7097     self.lu.LogStep(2, steps_total, "Check peer consistency")
7098     self._CheckDisksConsistency(self.other_node,
7099                                 self.other_node == self.instance.primary_node,
7100                                 False)
7101
7102     # Step: create new storage
7103     self.lu.LogStep(3, steps_total, "Allocate new storage")
7104     iv_names = self._CreateNewStorage(self.target_node)
7105
7106     # Step: for each lv, detach+rename*2+attach
7107     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7108     for dev, old_lvs, new_lvs in iv_names.itervalues():
7109       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7110
7111       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7112                                                      old_lvs)
7113       result.Raise("Can't detach drbd from local storage on node"
7114                    " %s for device %s" % (self.target_node, dev.iv_name))
7115       #dev.children = []
7116       #cfg.Update(instance)
7117
7118       # ok, we created the new LVs, so now we know we have the needed
7119       # storage; as such, we proceed on the target node to rename
7120       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7121       # using the assumption that logical_id == physical_id (which in
7122       # turn is the unique_id on that node)
7123
7124       # FIXME(iustin): use a better name for the replaced LVs
7125       temp_suffix = int(time.time())
7126       ren_fn = lambda d, suff: (d.physical_id[0],
7127                                 d.physical_id[1] + "_replaced-%s" % suff)
7128
7129       # Build the rename list based on what LVs exist on the node
7130       rename_old_to_new = []
7131       for to_ren in old_lvs:
7132         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7133         if not result.fail_msg and result.payload:
7134           # device exists
7135           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7136
7137       self.lu.LogInfo("Renaming the old LVs on the target node")
7138       result = self.rpc.call_blockdev_rename(self.target_node,
7139                                              rename_old_to_new)
7140       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7141
7142       # Now we rename the new LVs to the old LVs
7143       self.lu.LogInfo("Renaming the new LVs on the target node")
7144       rename_new_to_old = [(new, old.physical_id)
7145                            for old, new in zip(old_lvs, new_lvs)]
7146       result = self.rpc.call_blockdev_rename(self.target_node,
7147                                              rename_new_to_old)
7148       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7149
7150       for old, new in zip(old_lvs, new_lvs):
7151         new.logical_id = old.logical_id
7152         self.cfg.SetDiskID(new, self.target_node)
7153
7154       for disk in old_lvs:
7155         disk.logical_id = ren_fn(disk, temp_suffix)
7156         self.cfg.SetDiskID(disk, self.target_node)
7157
7158       # Now that the new lvs have the old name, we can add them to the device
7159       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7160       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7161                                                   new_lvs)
7162       msg = result.fail_msg
7163       if msg:
7164         for new_lv in new_lvs:
7165           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7166                                                new_lv).fail_msg
7167           if msg2:
7168             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7169                                hint=("cleanup manually the unused logical"
7170                                      "volumes"))
7171         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7172
7173       dev.children = new_lvs
7174
7175       self.cfg.Update(self.instance, feedback_fn)
7176
7177     cstep = 5
7178     if self.early_release:
7179       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7180       cstep += 1
7181       self._RemoveOldStorage(self.target_node, iv_names)
7182       # WARNING: we release both node locks here, do not do other RPCs
7183       # than WaitForSync to the primary node
7184       self._ReleaseNodeLock([self.target_node, self.other_node])
7185
7186     # Wait for sync
7187     # This can fail as the old devices are degraded and _WaitForSync
7188     # does a combined result over all disks, so we don't check its return value
7189     self.lu.LogStep(cstep, steps_total, "Sync devices")
7190     cstep += 1
7191     _WaitForSync(self.lu, self.instance)
7192
7193     # Check all devices manually
7194     self._CheckDevices(self.instance.primary_node, iv_names)
7195
7196     # Step: remove old storage
7197     if not self.early_release:
7198       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7199       cstep += 1
7200       self._RemoveOldStorage(self.target_node, iv_names)
7201
7202   def _ExecDrbd8Secondary(self, feedback_fn):
7203     """Replace the secondary node for DRBD 8.
7204
7205     The algorithm for replace is quite complicated:
7206       - for all disks of the instance:
7207         - create new LVs on the new node with same names
7208         - shutdown the drbd device on the old secondary
7209         - disconnect the drbd network on the primary
7210         - create the drbd device on the new secondary
7211         - network attach the drbd on the primary, using an artifice:
7212           the drbd code for Attach() will connect to the network if it
7213           finds a device which is connected to the good local disks but
7214           not network enabled
7215       - wait for sync across all devices
7216       - remove all disks from the old secondary
7217
7218     Failures are not very well handled.
7219
7220     """
7221     steps_total = 6
7222
7223     # Step: check device activation
7224     self.lu.LogStep(1, steps_total, "Check device existence")
7225     self._CheckDisksExistence([self.instance.primary_node])
7226     self._CheckVolumeGroup([self.instance.primary_node])
7227
7228     # Step: check other node consistency
7229     self.lu.LogStep(2, steps_total, "Check peer consistency")
7230     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7231
7232     # Step: create new storage
7233     self.lu.LogStep(3, steps_total, "Allocate new storage")
7234     for idx, dev in enumerate(self.instance.disks):
7235       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7236                       (self.new_node, idx))
7237       # we pass force_create=True to force LVM creation
7238       for new_lv in dev.children:
7239         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7240                         _GetInstanceInfoText(self.instance), False)
7241
7242     # Step 4: dbrd minors and drbd setups changes
7243     # after this, we must manually remove the drbd minors on both the
7244     # error and the success paths
7245     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7246     minors = self.cfg.AllocateDRBDMinor([self.new_node
7247                                          for dev in self.instance.disks],
7248                                         self.instance.name)
7249     logging.debug("Allocated minors %r", minors)
7250
7251     iv_names = {}
7252     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7253       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7254                       (self.new_node, idx))
7255       # create new devices on new_node; note that we create two IDs:
7256       # one without port, so the drbd will be activated without
7257       # networking information on the new node at this stage, and one
7258       # with network, for the latter activation in step 4
7259       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7260       if self.instance.primary_node == o_node1:
7261         p_minor = o_minor1
7262       else:
7263         assert self.instance.primary_node == o_node2, "Three-node instance?"
7264         p_minor = o_minor2
7265
7266       new_alone_id = (self.instance.primary_node, self.new_node, None,
7267                       p_minor, new_minor, o_secret)
7268       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7269                     p_minor, new_minor, o_secret)
7270
7271       iv_names[idx] = (dev, dev.children, new_net_id)
7272       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7273                     new_net_id)
7274       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7275                               logical_id=new_alone_id,
7276                               children=dev.children,
7277                               size=dev.size)
7278       try:
7279         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7280                               _GetInstanceInfoText(self.instance), False)
7281       except errors.GenericError:
7282         self.cfg.ReleaseDRBDMinors(self.instance.name)
7283         raise
7284
7285     # We have new devices, shutdown the drbd on the old secondary
7286     for idx, dev in enumerate(self.instance.disks):
7287       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7288       self.cfg.SetDiskID(dev, self.target_node)
7289       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7290       if msg:
7291         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7292                            "node: %s" % (idx, msg),
7293                            hint=("Please cleanup this device manually as"
7294                                  " soon as possible"))
7295
7296     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7297     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7298                                                self.node_secondary_ip,
7299                                                self.instance.disks)\
7300                                               [self.instance.primary_node]
7301
7302     msg = result.fail_msg
7303     if msg:
7304       # detaches didn't succeed (unlikely)
7305       self.cfg.ReleaseDRBDMinors(self.instance.name)
7306       raise errors.OpExecError("Can't detach the disks from the network on"
7307                                " old node: %s" % (msg,))
7308
7309     # if we managed to detach at least one, we update all the disks of
7310     # the instance to point to the new secondary
7311     self.lu.LogInfo("Updating instance configuration")
7312     for dev, _, new_logical_id in iv_names.itervalues():
7313       dev.logical_id = new_logical_id
7314       self.cfg.SetDiskID(dev, self.instance.primary_node)
7315
7316     self.cfg.Update(self.instance, feedback_fn)
7317
7318     # and now perform the drbd attach
7319     self.lu.LogInfo("Attaching primary drbds to new secondary"
7320                     " (standalone => connected)")
7321     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7322                                             self.new_node],
7323                                            self.node_secondary_ip,
7324                                            self.instance.disks,
7325                                            self.instance.name,
7326                                            False)
7327     for to_node, to_result in result.items():
7328       msg = to_result.fail_msg
7329       if msg:
7330         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7331                            to_node, msg,
7332                            hint=("please do a gnt-instance info to see the"
7333                                  " status of disks"))
7334     cstep = 5
7335     if self.early_release:
7336       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7337       cstep += 1
7338       self._RemoveOldStorage(self.target_node, iv_names)
7339       # WARNING: we release all node locks here, do not do other RPCs
7340       # than WaitForSync to the primary node
7341       self._ReleaseNodeLock([self.instance.primary_node,
7342                              self.target_node,
7343                              self.new_node])
7344
7345     # Wait for sync
7346     # This can fail as the old devices are degraded and _WaitForSync
7347     # does a combined result over all disks, so we don't check its return value
7348     self.lu.LogStep(cstep, steps_total, "Sync devices")
7349     cstep += 1
7350     _WaitForSync(self.lu, self.instance)
7351
7352     # Check all devices manually
7353     self._CheckDevices(self.instance.primary_node, iv_names)
7354
7355     # Step: remove old storage
7356     if not self.early_release:
7357       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7358       self._RemoveOldStorage(self.target_node, iv_names)
7359
7360
7361 class LURepairNodeStorage(NoHooksLU):
7362   """Repairs the volume group on a node.
7363
7364   """
7365   _OP_REQP = ["node_name"]
7366   REQ_BGL = False
7367
7368   def CheckArguments(self):
7369     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7370
7371   def ExpandNames(self):
7372     self.needed_locks = {
7373       locking.LEVEL_NODE: [self.op.node_name],
7374       }
7375
7376   def _CheckFaultyDisks(self, instance, node_name):
7377     """Ensure faulty disks abort the opcode or at least warn."""
7378     try:
7379       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7380                                   node_name, True):
7381         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7382                                    " node '%s'" % (instance.name, node_name),
7383                                    errors.ECODE_STATE)
7384     except errors.OpPrereqError, err:
7385       if self.op.ignore_consistency:
7386         self.proc.LogWarning(str(err.args[0]))
7387       else:
7388         raise
7389
7390   def CheckPrereq(self):
7391     """Check prerequisites.
7392
7393     """
7394     storage_type = self.op.storage_type
7395
7396     if (constants.SO_FIX_CONSISTENCY not in
7397         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7398       raise errors.OpPrereqError("Storage units of type '%s' can not be"
7399                                  " repaired" % storage_type,
7400                                  errors.ECODE_INVAL)
7401
7402     # Check whether any instance on this node has faulty disks
7403     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7404       if not inst.admin_up:
7405         continue
7406       check_nodes = set(inst.all_nodes)
7407       check_nodes.discard(self.op.node_name)
7408       for inst_node_name in check_nodes:
7409         self._CheckFaultyDisks(inst, inst_node_name)
7410
7411   def Exec(self, feedback_fn):
7412     feedback_fn("Repairing storage unit '%s' on %s ..." %
7413                 (self.op.name, self.op.node_name))
7414
7415     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7416     result = self.rpc.call_storage_execute(self.op.node_name,
7417                                            self.op.storage_type, st_args,
7418                                            self.op.name,
7419                                            constants.SO_FIX_CONSISTENCY)
7420     result.Raise("Failed to repair storage unit '%s' on %s" %
7421                  (self.op.name, self.op.node_name))
7422
7423
7424 class LUNodeEvacuationStrategy(NoHooksLU):
7425   """Computes the node evacuation strategy.
7426
7427   """
7428   _OP_REQP = ["nodes"]
7429   REQ_BGL = False
7430
7431   def CheckArguments(self):
7432     if not hasattr(self.op, "remote_node"):
7433       self.op.remote_node = None
7434     if not hasattr(self.op, "iallocator"):
7435       self.op.iallocator = None
7436     if self.op.remote_node is not None and self.op.iallocator is not None:
7437       raise errors.OpPrereqError("Give either the iallocator or the new"
7438                                  " secondary, not both", errors.ECODE_INVAL)
7439
7440   def ExpandNames(self):
7441     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7442     self.needed_locks = locks = {}
7443     if self.op.remote_node is None:
7444       locks[locking.LEVEL_NODE] = locking.ALL_SET
7445     else:
7446       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7447       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7448
7449   def CheckPrereq(self):
7450     pass
7451
7452   def Exec(self, feedback_fn):
7453     if self.op.remote_node is not None:
7454       instances = []
7455       for node in self.op.nodes:
7456         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7457       result = []
7458       for i in instances:
7459         if i.primary_node == self.op.remote_node:
7460           raise errors.OpPrereqError("Node %s is the primary node of"
7461                                      " instance %s, cannot use it as"
7462                                      " secondary" %
7463                                      (self.op.remote_node, i.name),
7464                                      errors.ECODE_INVAL)
7465         result.append([i.name, self.op.remote_node])
7466     else:
7467       ial = IAllocator(self.cfg, self.rpc,
7468                        mode=constants.IALLOCATOR_MODE_MEVAC,
7469                        evac_nodes=self.op.nodes)
7470       ial.Run(self.op.iallocator, validate=True)
7471       if not ial.success:
7472         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7473                                  errors.ECODE_NORES)
7474       result = ial.result
7475     return result
7476
7477
7478 class LUGrowDisk(LogicalUnit):
7479   """Grow a disk of an instance.
7480
7481   """
7482   HPATH = "disk-grow"
7483   HTYPE = constants.HTYPE_INSTANCE
7484   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7485   REQ_BGL = False
7486
7487   def ExpandNames(self):
7488     self._ExpandAndLockInstance()
7489     self.needed_locks[locking.LEVEL_NODE] = []
7490     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7491
7492   def DeclareLocks(self, level):
7493     if level == locking.LEVEL_NODE:
7494       self._LockInstancesNodes()
7495
7496   def BuildHooksEnv(self):
7497     """Build hooks env.
7498
7499     This runs on the master, the primary and all the secondaries.
7500
7501     """
7502     env = {
7503       "DISK": self.op.disk,
7504       "AMOUNT": self.op.amount,
7505       }
7506     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7507     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7508     return env, nl, nl
7509
7510   def CheckPrereq(self):
7511     """Check prerequisites.
7512
7513     This checks that the instance is in the cluster.
7514
7515     """
7516     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7517     assert instance is not None, \
7518       "Cannot retrieve locked instance %s" % self.op.instance_name
7519     nodenames = list(instance.all_nodes)
7520     for node in nodenames:
7521       _CheckNodeOnline(self, node)
7522
7523
7524     self.instance = instance
7525
7526     if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7527       raise errors.OpPrereqError("Instance's disk layout does not support"
7528                                  " growing.", errors.ECODE_INVAL)
7529
7530     self.disk = instance.FindDisk(self.op.disk)
7531
7532     _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7533
7534   def Exec(self, feedback_fn):
7535     """Execute disk grow.
7536
7537     """
7538     instance = self.instance
7539     disk = self.disk
7540     for node in instance.all_nodes:
7541       self.cfg.SetDiskID(disk, node)
7542       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7543       result.Raise("Grow request failed to node %s" % node)
7544
7545       # TODO: Rewrite code to work properly
7546       # DRBD goes into sync mode for a short amount of time after executing the
7547       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7548       # calling "resize" in sync mode fails. Sleeping for a short amount of
7549       # time is a work-around.
7550       time.sleep(5)
7551
7552     disk.RecordGrow(self.op.amount)
7553     self.cfg.Update(instance, feedback_fn)
7554     if self.op.wait_for_sync:
7555       disk_abort = not _WaitForSync(self, instance)
7556       if disk_abort:
7557         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7558                              " status.\nPlease check the instance.")
7559
7560
7561 class LUQueryInstanceData(NoHooksLU):
7562   """Query runtime instance data.
7563
7564   """
7565   _OP_REQP = ["instances", "static"]
7566   REQ_BGL = False
7567
7568   def ExpandNames(self):
7569     self.needed_locks = {}
7570     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7571
7572     if not isinstance(self.op.instances, list):
7573       raise errors.OpPrereqError("Invalid argument type 'instances'",
7574                                  errors.ECODE_INVAL)
7575
7576     if self.op.instances:
7577       self.wanted_names = []
7578       for name in self.op.instances:
7579         full_name = _ExpandInstanceName(self.cfg, name)
7580         self.wanted_names.append(full_name)
7581       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7582     else:
7583       self.wanted_names = None
7584       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7585
7586     self.needed_locks[locking.LEVEL_NODE] = []
7587     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7588
7589   def DeclareLocks(self, level):
7590     if level == locking.LEVEL_NODE:
7591       self._LockInstancesNodes()
7592
7593   def CheckPrereq(self):
7594     """Check prerequisites.
7595
7596     This only checks the optional instance list against the existing names.
7597
7598     """
7599     if self.wanted_names is None:
7600       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7601
7602     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7603                              in self.wanted_names]
7604     return
7605
7606   def _ComputeBlockdevStatus(self, node, instance_name, dev):
7607     """Returns the status of a block device
7608
7609     """
7610     if self.op.static or not node:
7611       return None
7612
7613     self.cfg.SetDiskID(dev, node)
7614
7615     result = self.rpc.call_blockdev_find(node, dev)
7616     if result.offline:
7617       return None
7618
7619     result.Raise("Can't compute disk status for %s" % instance_name)
7620
7621     status = result.payload
7622     if status is None:
7623       return None
7624
7625     return (status.dev_path, status.major, status.minor,
7626             status.sync_percent, status.estimated_time,
7627             status.is_degraded, status.ldisk_status)
7628
7629   def _ComputeDiskStatus(self, instance, snode, dev):
7630     """Compute block device status.
7631
7632     """
7633     if dev.dev_type in constants.LDS_DRBD:
7634       # we change the snode then (otherwise we use the one passed in)
7635       if dev.logical_id[0] == instance.primary_node:
7636         snode = dev.logical_id[1]
7637       else:
7638         snode = dev.logical_id[0]
7639
7640     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7641                                               instance.name, dev)
7642     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7643
7644     if dev.children:
7645       dev_children = [self._ComputeDiskStatus(instance, snode, child)
7646                       for child in dev.children]
7647     else:
7648       dev_children = []
7649
7650     data = {
7651       "iv_name": dev.iv_name,
7652       "dev_type": dev.dev_type,
7653       "logical_id": dev.logical_id,
7654       "physical_id": dev.physical_id,
7655       "pstatus": dev_pstatus,
7656       "sstatus": dev_sstatus,
7657       "children": dev_children,
7658       "mode": dev.mode,
7659       "size": dev.size,
7660       }
7661
7662     return data
7663
7664   def Exec(self, feedback_fn):
7665     """Gather and return data"""
7666     result = {}
7667
7668     cluster = self.cfg.GetClusterInfo()
7669
7670     for instance in self.wanted_instances:
7671       if not self.op.static:
7672         remote_info = self.rpc.call_instance_info(instance.primary_node,
7673                                                   instance.name,
7674                                                   instance.hypervisor)
7675         remote_info.Raise("Error checking node %s" % instance.primary_node)
7676         remote_info = remote_info.payload
7677         if remote_info and "state" in remote_info:
7678           remote_state = "up"
7679         else:
7680           remote_state = "down"
7681       else:
7682         remote_state = None
7683       if instance.admin_up:
7684         config_state = "up"
7685       else:
7686         config_state = "down"
7687
7688       disks = [self._ComputeDiskStatus(instance, None, device)
7689                for device in instance.disks]
7690
7691       idict = {
7692         "name": instance.name,
7693         "config_state": config_state,
7694         "run_state": remote_state,
7695         "pnode": instance.primary_node,
7696         "snodes": instance.secondary_nodes,
7697         "os": instance.os,
7698         # this happens to be the same format used for hooks
7699         "nics": _NICListToTuple(self, instance.nics),
7700         "disks": disks,
7701         "hypervisor": instance.hypervisor,
7702         "network_port": instance.network_port,
7703         "hv_instance": instance.hvparams,
7704         "hv_actual": cluster.FillHV(instance, skip_globals=True),
7705         "be_instance": instance.beparams,
7706         "be_actual": cluster.FillBE(instance),
7707         "serial_no": instance.serial_no,
7708         "mtime": instance.mtime,
7709         "ctime": instance.ctime,
7710         "uuid": instance.uuid,
7711         }
7712
7713       result[instance.name] = idict
7714
7715     return result
7716
7717
7718 class LUSetInstanceParams(LogicalUnit):
7719   """Modifies an instances's parameters.
7720
7721   """
7722   HPATH = "instance-modify"
7723   HTYPE = constants.HTYPE_INSTANCE
7724   _OP_REQP = ["instance_name"]
7725   REQ_BGL = False
7726
7727   def CheckArguments(self):
7728     if not hasattr(self.op, 'nics'):
7729       self.op.nics = []
7730     if not hasattr(self.op, 'disks'):
7731       self.op.disks = []
7732     if not hasattr(self.op, 'beparams'):
7733       self.op.beparams = {}
7734     if not hasattr(self.op, 'hvparams'):
7735       self.op.hvparams = {}
7736     if not hasattr(self.op, "disk_template"):
7737       self.op.disk_template = None
7738     if not hasattr(self.op, "remote_node"):
7739       self.op.remote_node = None
7740     self.op.force = getattr(self.op, "force", False)
7741     if not (self.op.nics or self.op.disks or self.op.disk_template or
7742             self.op.hvparams or self.op.beparams):
7743       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7744
7745     if self.op.hvparams:
7746       _CheckGlobalHvParams(self.op.hvparams)
7747
7748     # Disk validation
7749     disk_addremove = 0
7750     for disk_op, disk_dict in self.op.disks:
7751       if disk_op == constants.DDM_REMOVE:
7752         disk_addremove += 1
7753         continue
7754       elif disk_op == constants.DDM_ADD:
7755         disk_addremove += 1
7756       else:
7757         if not isinstance(disk_op, int):
7758           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7759         if not isinstance(disk_dict, dict):
7760           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7761           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7762
7763       if disk_op == constants.DDM_ADD:
7764         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7765         if mode not in constants.DISK_ACCESS_SET:
7766           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7767                                      errors.ECODE_INVAL)
7768         size = disk_dict.get('size', None)
7769         if size is None:
7770           raise errors.OpPrereqError("Required disk parameter size missing",
7771                                      errors.ECODE_INVAL)
7772         try:
7773           size = int(size)
7774         except (TypeError, ValueError), err:
7775           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7776                                      str(err), errors.ECODE_INVAL)
7777         disk_dict['size'] = size
7778       else:
7779         # modification of disk
7780         if 'size' in disk_dict:
7781           raise errors.OpPrereqError("Disk size change not possible, use"
7782                                      " grow-disk", errors.ECODE_INVAL)
7783
7784     if disk_addremove > 1:
7785       raise errors.OpPrereqError("Only one disk add or remove operation"
7786                                  " supported at a time", errors.ECODE_INVAL)
7787
7788     if self.op.disks and self.op.disk_template is not None:
7789       raise errors.OpPrereqError("Disk template conversion and other disk"
7790                                  " changes not supported at the same time",
7791                                  errors.ECODE_INVAL)
7792
7793     if self.op.disk_template:
7794       _CheckDiskTemplate(self.op.disk_template)
7795       if (self.op.disk_template in constants.DTS_NET_MIRROR and
7796           self.op.remote_node is None):
7797         raise errors.OpPrereqError("Changing the disk template to a mirrored"
7798                                    " one requires specifying a secondary node",
7799                                    errors.ECODE_INVAL)
7800
7801     # NIC validation
7802     nic_addremove = 0
7803     for nic_op, nic_dict in self.op.nics:
7804       if nic_op == constants.DDM_REMOVE:
7805         nic_addremove += 1
7806         continue
7807       elif nic_op == constants.DDM_ADD:
7808         nic_addremove += 1
7809       else:
7810         if not isinstance(nic_op, int):
7811           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7812         if not isinstance(nic_dict, dict):
7813           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7814           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7815
7816       # nic_dict should be a dict
7817       nic_ip = nic_dict.get('ip', None)
7818       if nic_ip is not None:
7819         if nic_ip.lower() == constants.VALUE_NONE:
7820           nic_dict['ip'] = None
7821         else:
7822           if not utils.IsValidIP(nic_ip):
7823             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7824                                        errors.ECODE_INVAL)
7825
7826       nic_bridge = nic_dict.get('bridge', None)
7827       nic_link = nic_dict.get('link', None)
7828       if nic_bridge and nic_link:
7829         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7830                                    " at the same time", errors.ECODE_INVAL)
7831       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7832         nic_dict['bridge'] = None
7833       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7834         nic_dict['link'] = None
7835
7836       if nic_op == constants.DDM_ADD:
7837         nic_mac = nic_dict.get('mac', None)
7838         if nic_mac is None:
7839           nic_dict['mac'] = constants.VALUE_AUTO
7840
7841       if 'mac' in nic_dict:
7842         nic_mac = nic_dict['mac']
7843         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7844           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7845
7846         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7847           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7848                                      " modifying an existing nic",
7849                                      errors.ECODE_INVAL)
7850
7851     if nic_addremove > 1:
7852       raise errors.OpPrereqError("Only one NIC add or remove operation"
7853                                  " supported at a time", errors.ECODE_INVAL)
7854
7855   def ExpandNames(self):
7856     self._ExpandAndLockInstance()
7857     self.needed_locks[locking.LEVEL_NODE] = []
7858     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7859
7860   def DeclareLocks(self, level):
7861     if level == locking.LEVEL_NODE:
7862       self._LockInstancesNodes()
7863       if self.op.disk_template and self.op.remote_node:
7864         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7865         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
7866
7867   def BuildHooksEnv(self):
7868     """Build hooks env.
7869
7870     This runs on the master, primary and secondaries.
7871
7872     """
7873     args = dict()
7874     if constants.BE_MEMORY in self.be_new:
7875       args['memory'] = self.be_new[constants.BE_MEMORY]
7876     if constants.BE_VCPUS in self.be_new:
7877       args['vcpus'] = self.be_new[constants.BE_VCPUS]
7878     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7879     # information at all.
7880     if self.op.nics:
7881       args['nics'] = []
7882       nic_override = dict(self.op.nics)
7883       c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7884       for idx, nic in enumerate(self.instance.nics):
7885         if idx in nic_override:
7886           this_nic_override = nic_override[idx]
7887         else:
7888           this_nic_override = {}
7889         if 'ip' in this_nic_override:
7890           ip = this_nic_override['ip']
7891         else:
7892           ip = nic.ip
7893         if 'mac' in this_nic_override:
7894           mac = this_nic_override['mac']
7895         else:
7896           mac = nic.mac
7897         if idx in self.nic_pnew:
7898           nicparams = self.nic_pnew[idx]
7899         else:
7900           nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7901         mode = nicparams[constants.NIC_MODE]
7902         link = nicparams[constants.NIC_LINK]
7903         args['nics'].append((ip, mac, mode, link))
7904       if constants.DDM_ADD in nic_override:
7905         ip = nic_override[constants.DDM_ADD].get('ip', None)
7906         mac = nic_override[constants.DDM_ADD]['mac']
7907         nicparams = self.nic_pnew[constants.DDM_ADD]
7908         mode = nicparams[constants.NIC_MODE]
7909         link = nicparams[constants.NIC_LINK]
7910         args['nics'].append((ip, mac, mode, link))
7911       elif constants.DDM_REMOVE in nic_override:
7912         del args['nics'][-1]
7913
7914     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7915     if self.op.disk_template:
7916       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
7917     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7918     return env, nl, nl
7919
7920   @staticmethod
7921   def _GetUpdatedParams(old_params, update_dict,
7922                         default_values, parameter_types):
7923     """Return the new params dict for the given params.
7924
7925     @type old_params: dict
7926     @param old_params: old parameters
7927     @type update_dict: dict
7928     @param update_dict: dict containing new parameter values,
7929                         or constants.VALUE_DEFAULT to reset the
7930                         parameter to its default value
7931     @type default_values: dict
7932     @param default_values: default values for the filled parameters
7933     @type parameter_types: dict
7934     @param parameter_types: dict mapping target dict keys to types
7935                             in constants.ENFORCEABLE_TYPES
7936     @rtype: (dict, dict)
7937     @return: (new_parameters, filled_parameters)
7938
7939     """
7940     params_copy = copy.deepcopy(old_params)
7941     for key, val in update_dict.iteritems():
7942       if val == constants.VALUE_DEFAULT:
7943         try:
7944           del params_copy[key]
7945         except KeyError:
7946           pass
7947       else:
7948         params_copy[key] = val
7949     utils.ForceDictType(params_copy, parameter_types)
7950     params_filled = objects.FillDict(default_values, params_copy)
7951     return (params_copy, params_filled)
7952
7953   def CheckPrereq(self):
7954     """Check prerequisites.
7955
7956     This only checks the instance list against the existing names.
7957
7958     """
7959     self.force = self.op.force
7960
7961     # checking the new params on the primary/secondary nodes
7962
7963     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7964     cluster = self.cluster = self.cfg.GetClusterInfo()
7965     assert self.instance is not None, \
7966       "Cannot retrieve locked instance %s" % self.op.instance_name
7967     pnode = instance.primary_node
7968     nodelist = list(instance.all_nodes)
7969
7970     if self.op.disk_template:
7971       if instance.disk_template == self.op.disk_template:
7972         raise errors.OpPrereqError("Instance already has disk template %s" %
7973                                    instance.disk_template, errors.ECODE_INVAL)
7974
7975       if (instance.disk_template,
7976           self.op.disk_template) not in self._DISK_CONVERSIONS:
7977         raise errors.OpPrereqError("Unsupported disk template conversion from"
7978                                    " %s to %s" % (instance.disk_template,
7979                                                   self.op.disk_template),
7980                                    errors.ECODE_INVAL)
7981       if self.op.disk_template in constants.DTS_NET_MIRROR:
7982         _CheckNodeOnline(self, self.op.remote_node)
7983         _CheckNodeNotDrained(self, self.op.remote_node)
7984         disks = [{"size": d.size} for d in instance.disks]
7985         required = _ComputeDiskSize(self.op.disk_template, disks)
7986         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
7987         _CheckInstanceDown(self, instance, "cannot change disk template")
7988
7989     # hvparams processing
7990     if self.op.hvparams:
7991       i_hvdict, hv_new = self._GetUpdatedParams(
7992                              instance.hvparams, self.op.hvparams,
7993                              cluster.hvparams[instance.hypervisor],
7994                              constants.HVS_PARAMETER_TYPES)
7995       # local check
7996       hypervisor.GetHypervisor(
7997         instance.hypervisor).CheckParameterSyntax(hv_new)
7998       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7999       self.hv_new = hv_new # the new actual values
8000       self.hv_inst = i_hvdict # the new dict (without defaults)
8001     else:
8002       self.hv_new = self.hv_inst = {}
8003
8004     # beparams processing
8005     if self.op.beparams:
8006       i_bedict, be_new = self._GetUpdatedParams(
8007                              instance.beparams, self.op.beparams,
8008                              cluster.beparams[constants.PP_DEFAULT],
8009                              constants.BES_PARAMETER_TYPES)
8010       self.be_new = be_new # the new actual values
8011       self.be_inst = i_bedict # the new dict (without defaults)
8012     else:
8013       self.be_new = self.be_inst = {}
8014
8015     self.warn = []
8016
8017     if constants.BE_MEMORY in self.op.beparams and not self.force:
8018       mem_check_list = [pnode]
8019       if be_new[constants.BE_AUTO_BALANCE]:
8020         # either we changed auto_balance to yes or it was from before
8021         mem_check_list.extend(instance.secondary_nodes)
8022       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8023                                                   instance.hypervisor)
8024       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8025                                          instance.hypervisor)
8026       pninfo = nodeinfo[pnode]
8027       msg = pninfo.fail_msg
8028       if msg:
8029         # Assume the primary node is unreachable and go ahead
8030         self.warn.append("Can't get info from primary node %s: %s" %
8031                          (pnode,  msg))
8032       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8033         self.warn.append("Node data from primary node %s doesn't contain"
8034                          " free memory information" % pnode)
8035       elif instance_info.fail_msg:
8036         self.warn.append("Can't get instance runtime information: %s" %
8037                         instance_info.fail_msg)
8038       else:
8039         if instance_info.payload:
8040           current_mem = int(instance_info.payload['memory'])
8041         else:
8042           # Assume instance not running
8043           # (there is a slight race condition here, but it's not very probable,
8044           # and we have no other way to check)
8045           current_mem = 0
8046         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8047                     pninfo.payload['memory_free'])
8048         if miss_mem > 0:
8049           raise errors.OpPrereqError("This change will prevent the instance"
8050                                      " from starting, due to %d MB of memory"
8051                                      " missing on its primary node" % miss_mem,
8052                                      errors.ECODE_NORES)
8053
8054       if be_new[constants.BE_AUTO_BALANCE]:
8055         for node, nres in nodeinfo.items():
8056           if node not in instance.secondary_nodes:
8057             continue
8058           msg = nres.fail_msg
8059           if msg:
8060             self.warn.append("Can't get info from secondary node %s: %s" %
8061                              (node, msg))
8062           elif not isinstance(nres.payload.get('memory_free', None), int):
8063             self.warn.append("Secondary node %s didn't return free"
8064                              " memory information" % node)
8065           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8066             self.warn.append("Not enough memory to failover instance to"
8067                              " secondary node %s" % node)
8068
8069     # NIC processing
8070     self.nic_pnew = {}
8071     self.nic_pinst = {}
8072     for nic_op, nic_dict in self.op.nics:
8073       if nic_op == constants.DDM_REMOVE:
8074         if not instance.nics:
8075           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8076                                      errors.ECODE_INVAL)
8077         continue
8078       if nic_op != constants.DDM_ADD:
8079         # an existing nic
8080         if not instance.nics:
8081           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8082                                      " no NICs" % nic_op,
8083                                      errors.ECODE_INVAL)
8084         if nic_op < 0 or nic_op >= len(instance.nics):
8085           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8086                                      " are 0 to %d" %
8087                                      (nic_op, len(instance.nics) - 1),
8088                                      errors.ECODE_INVAL)
8089         old_nic_params = instance.nics[nic_op].nicparams
8090         old_nic_ip = instance.nics[nic_op].ip
8091       else:
8092         old_nic_params = {}
8093         old_nic_ip = None
8094
8095       update_params_dict = dict([(key, nic_dict[key])
8096                                  for key in constants.NICS_PARAMETERS
8097                                  if key in nic_dict])
8098
8099       if 'bridge' in nic_dict:
8100         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8101
8102       new_nic_params, new_filled_nic_params = \
8103           self._GetUpdatedParams(old_nic_params, update_params_dict,
8104                                  cluster.nicparams[constants.PP_DEFAULT],
8105                                  constants.NICS_PARAMETER_TYPES)
8106       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8107       self.nic_pinst[nic_op] = new_nic_params
8108       self.nic_pnew[nic_op] = new_filled_nic_params
8109       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8110
8111       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8112         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8113         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8114         if msg:
8115           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8116           if self.force:
8117             self.warn.append(msg)
8118           else:
8119             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8120       if new_nic_mode == constants.NIC_MODE_ROUTED:
8121         if 'ip' in nic_dict:
8122           nic_ip = nic_dict['ip']
8123         else:
8124           nic_ip = old_nic_ip
8125         if nic_ip is None:
8126           raise errors.OpPrereqError('Cannot set the nic ip to None'
8127                                      ' on a routed nic', errors.ECODE_INVAL)
8128       if 'mac' in nic_dict:
8129         nic_mac = nic_dict['mac']
8130         if nic_mac is None:
8131           raise errors.OpPrereqError('Cannot set the nic mac to None',
8132                                      errors.ECODE_INVAL)
8133         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8134           # otherwise generate the mac
8135           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8136         else:
8137           # or validate/reserve the current one
8138           try:
8139             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8140           except errors.ReservationError:
8141             raise errors.OpPrereqError("MAC address %s already in use"
8142                                        " in cluster" % nic_mac,
8143                                        errors.ECODE_NOTUNIQUE)
8144
8145     # DISK processing
8146     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8147       raise errors.OpPrereqError("Disk operations not supported for"
8148                                  " diskless instances",
8149                                  errors.ECODE_INVAL)
8150     for disk_op, _ in self.op.disks:
8151       if disk_op == constants.DDM_REMOVE:
8152         if len(instance.disks) == 1:
8153           raise errors.OpPrereqError("Cannot remove the last disk of"
8154                                      " an instance", errors.ECODE_INVAL)
8155         _CheckInstanceDown(self, instance, "cannot remove disks")
8156
8157       if (disk_op == constants.DDM_ADD and
8158           len(instance.nics) >= constants.MAX_DISKS):
8159         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8160                                    " add more" % constants.MAX_DISKS,
8161                                    errors.ECODE_STATE)
8162       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8163         # an existing disk
8164         if disk_op < 0 or disk_op >= len(instance.disks):
8165           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8166                                      " are 0 to %d" %
8167                                      (disk_op, len(instance.disks)),
8168                                      errors.ECODE_INVAL)
8169
8170     return
8171
8172   def _ConvertPlainToDrbd(self, feedback_fn):
8173     """Converts an instance from plain to drbd.
8174
8175     """
8176     feedback_fn("Converting template to drbd")
8177     instance = self.instance
8178     pnode = instance.primary_node
8179     snode = self.op.remote_node
8180
8181     # create a fake disk info for _GenerateDiskTemplate
8182     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8183     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8184                                       instance.name, pnode, [snode],
8185                                       disk_info, None, None, 0)
8186     info = _GetInstanceInfoText(instance)
8187     feedback_fn("Creating aditional volumes...")
8188     # first, create the missing data and meta devices
8189     for disk in new_disks:
8190       # unfortunately this is... not too nice
8191       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8192                             info, True)
8193       for child in disk.children:
8194         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8195     # at this stage, all new LVs have been created, we can rename the
8196     # old ones
8197     feedback_fn("Renaming original volumes...")
8198     rename_list = [(o, n.children[0].logical_id)
8199                    for (o, n) in zip(instance.disks, new_disks)]
8200     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8201     result.Raise("Failed to rename original LVs")
8202
8203     feedback_fn("Initializing DRBD devices...")
8204     # all child devices are in place, we can now create the DRBD devices
8205     for disk in new_disks:
8206       for node in [pnode, snode]:
8207         f_create = node == pnode
8208         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8209
8210     # at this point, the instance has been modified
8211     instance.disk_template = constants.DT_DRBD8
8212     instance.disks = new_disks
8213     self.cfg.Update(instance, feedback_fn)
8214
8215     # disks are created, waiting for sync
8216     disk_abort = not _WaitForSync(self, instance)
8217     if disk_abort:
8218       raise errors.OpExecError("There are some degraded disks for"
8219                                " this instance, please cleanup manually")
8220
8221   def Exec(self, feedback_fn):
8222     """Modifies an instance.
8223
8224     All parameters take effect only at the next restart of the instance.
8225
8226     """
8227     # Process here the warnings from CheckPrereq, as we don't have a
8228     # feedback_fn there.
8229     for warn in self.warn:
8230       feedback_fn("WARNING: %s" % warn)
8231
8232     result = []
8233     instance = self.instance
8234     # disk changes
8235     for disk_op, disk_dict in self.op.disks:
8236       if disk_op == constants.DDM_REMOVE:
8237         # remove the last disk
8238         device = instance.disks.pop()
8239         device_idx = len(instance.disks)
8240         for node, disk in device.ComputeNodeTree(instance.primary_node):
8241           self.cfg.SetDiskID(disk, node)
8242           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8243           if msg:
8244             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8245                             " continuing anyway", device_idx, node, msg)
8246         result.append(("disk/%d" % device_idx, "remove"))
8247       elif disk_op == constants.DDM_ADD:
8248         # add a new disk
8249         if instance.disk_template == constants.DT_FILE:
8250           file_driver, file_path = instance.disks[0].logical_id
8251           file_path = os.path.dirname(file_path)
8252         else:
8253           file_driver = file_path = None
8254         disk_idx_base = len(instance.disks)
8255         new_disk = _GenerateDiskTemplate(self,
8256                                          instance.disk_template,
8257                                          instance.name, instance.primary_node,
8258                                          instance.secondary_nodes,
8259                                          [disk_dict],
8260                                          file_path,
8261                                          file_driver,
8262                                          disk_idx_base)[0]
8263         instance.disks.append(new_disk)
8264         info = _GetInstanceInfoText(instance)
8265
8266         logging.info("Creating volume %s for instance %s",
8267                      new_disk.iv_name, instance.name)
8268         # Note: this needs to be kept in sync with _CreateDisks
8269         #HARDCODE
8270         for node in instance.all_nodes:
8271           f_create = node == instance.primary_node
8272           try:
8273             _CreateBlockDev(self, node, instance, new_disk,
8274                             f_create, info, f_create)
8275           except errors.OpExecError, err:
8276             self.LogWarning("Failed to create volume %s (%s) on"
8277                             " node %s: %s",
8278                             new_disk.iv_name, new_disk, node, err)
8279         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8280                        (new_disk.size, new_disk.mode)))
8281       else:
8282         # change a given disk
8283         instance.disks[disk_op].mode = disk_dict['mode']
8284         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8285
8286     if self.op.disk_template:
8287       r_shut = _ShutdownInstanceDisks(self, instance)
8288       if not r_shut:
8289         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8290                                  " proceed with disk template conversion")
8291       mode = (instance.disk_template, self.op.disk_template)
8292       try:
8293         self._DISK_CONVERSIONS[mode](self, feedback_fn)
8294       except:
8295         self.cfg.ReleaseDRBDMinors(instance.name)
8296         raise
8297       result.append(("disk_template", self.op.disk_template))
8298
8299     # NIC changes
8300     for nic_op, nic_dict in self.op.nics:
8301       if nic_op == constants.DDM_REMOVE:
8302         # remove the last nic
8303         del instance.nics[-1]
8304         result.append(("nic.%d" % len(instance.nics), "remove"))
8305       elif nic_op == constants.DDM_ADD:
8306         # mac and bridge should be set, by now
8307         mac = nic_dict['mac']
8308         ip = nic_dict.get('ip', None)
8309         nicparams = self.nic_pinst[constants.DDM_ADD]
8310         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8311         instance.nics.append(new_nic)
8312         result.append(("nic.%d" % (len(instance.nics) - 1),
8313                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
8314                        (new_nic.mac, new_nic.ip,
8315                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8316                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8317                        )))
8318       else:
8319         for key in 'mac', 'ip':
8320           if key in nic_dict:
8321             setattr(instance.nics[nic_op], key, nic_dict[key])
8322         if nic_op in self.nic_pinst:
8323           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8324         for key, val in nic_dict.iteritems():
8325           result.append(("nic.%s/%d" % (key, nic_op), val))
8326
8327     # hvparams changes
8328     if self.op.hvparams:
8329       instance.hvparams = self.hv_inst
8330       for key, val in self.op.hvparams.iteritems():
8331         result.append(("hv/%s" % key, val))
8332
8333     # beparams changes
8334     if self.op.beparams:
8335       instance.beparams = self.be_inst
8336       for key, val in self.op.beparams.iteritems():
8337         result.append(("be/%s" % key, val))
8338
8339     self.cfg.Update(instance, feedback_fn)
8340
8341     return result
8342
8343   _DISK_CONVERSIONS = {
8344     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8345     }
8346
8347 class LUQueryExports(NoHooksLU):
8348   """Query the exports list
8349
8350   """
8351   _OP_REQP = ['nodes']
8352   REQ_BGL = False
8353
8354   def ExpandNames(self):
8355     self.needed_locks = {}
8356     self.share_locks[locking.LEVEL_NODE] = 1
8357     if not self.op.nodes:
8358       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8359     else:
8360       self.needed_locks[locking.LEVEL_NODE] = \
8361         _GetWantedNodes(self, self.op.nodes)
8362
8363   def CheckPrereq(self):
8364     """Check prerequisites.
8365
8366     """
8367     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8368
8369   def Exec(self, feedback_fn):
8370     """Compute the list of all the exported system images.
8371
8372     @rtype: dict
8373     @return: a dictionary with the structure node->(export-list)
8374         where export-list is a list of the instances exported on
8375         that node.
8376
8377     """
8378     rpcresult = self.rpc.call_export_list(self.nodes)
8379     result = {}
8380     for node in rpcresult:
8381       if rpcresult[node].fail_msg:
8382         result[node] = False
8383       else:
8384         result[node] = rpcresult[node].payload
8385
8386     return result
8387
8388
8389 class LUExportInstance(LogicalUnit):
8390   """Export an instance to an image in the cluster.
8391
8392   """
8393   HPATH = "instance-export"
8394   HTYPE = constants.HTYPE_INSTANCE
8395   _OP_REQP = ["instance_name", "target_node", "shutdown"]
8396   REQ_BGL = False
8397
8398   def CheckArguments(self):
8399     """Check the arguments.
8400
8401     """
8402     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8403                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
8404
8405   def ExpandNames(self):
8406     self._ExpandAndLockInstance()
8407     # FIXME: lock only instance primary and destination node
8408     #
8409     # Sad but true, for now we have do lock all nodes, as we don't know where
8410     # the previous export might be, and and in this LU we search for it and
8411     # remove it from its current node. In the future we could fix this by:
8412     #  - making a tasklet to search (share-lock all), then create the new one,
8413     #    then one to remove, after
8414     #  - removing the removal operation altogether
8415     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8416
8417   def DeclareLocks(self, level):
8418     """Last minute lock declaration."""
8419     # All nodes are locked anyway, so nothing to do here.
8420
8421   def BuildHooksEnv(self):
8422     """Build hooks env.
8423
8424     This will run on the master, primary node and target node.
8425
8426     """
8427     env = {
8428       "EXPORT_NODE": self.op.target_node,
8429       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8430       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8431       }
8432     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8433     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8434           self.op.target_node]
8435     return env, nl, nl
8436
8437   def CheckPrereq(self):
8438     """Check prerequisites.
8439
8440     This checks that the instance and node names are valid.
8441
8442     """
8443     instance_name = self.op.instance_name
8444     self.instance = self.cfg.GetInstanceInfo(instance_name)
8445     assert self.instance is not None, \
8446           "Cannot retrieve locked instance %s" % self.op.instance_name
8447     _CheckNodeOnline(self, self.instance.primary_node)
8448
8449     self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8450     self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8451     assert self.dst_node is not None
8452
8453     _CheckNodeOnline(self, self.dst_node.name)
8454     _CheckNodeNotDrained(self, self.dst_node.name)
8455
8456     # instance disk type verification
8457     for disk in self.instance.disks:
8458       if disk.dev_type == constants.LD_FILE:
8459         raise errors.OpPrereqError("Export not supported for instances with"
8460                                    " file-based disks", errors.ECODE_INVAL)
8461
8462   def Exec(self, feedback_fn):
8463     """Export an instance to an image in the cluster.
8464
8465     """
8466     instance = self.instance
8467     dst_node = self.dst_node
8468     src_node = instance.primary_node
8469
8470     if self.op.shutdown:
8471       # shutdown the instance, but not the disks
8472       feedback_fn("Shutting down instance %s" % instance.name)
8473       result = self.rpc.call_instance_shutdown(src_node, instance,
8474                                                self.shutdown_timeout)
8475       result.Raise("Could not shutdown instance %s on"
8476                    " node %s" % (instance.name, src_node))
8477
8478     vgname = self.cfg.GetVGName()
8479
8480     snap_disks = []
8481
8482     # set the disks ID correctly since call_instance_start needs the
8483     # correct drbd minor to create the symlinks
8484     for disk in instance.disks:
8485       self.cfg.SetDiskID(disk, src_node)
8486
8487     activate_disks = (not instance.admin_up)
8488
8489     if activate_disks:
8490       # Activate the instance disks if we'exporting a stopped instance
8491       feedback_fn("Activating disks for %s" % instance.name)
8492       _StartInstanceDisks(self, instance, None)
8493
8494     try:
8495       # per-disk results
8496       dresults = []
8497       try:
8498         for idx, disk in enumerate(instance.disks):
8499           feedback_fn("Creating a snapshot of disk/%s on node %s" %
8500                       (idx, src_node))
8501
8502           # result.payload will be a snapshot of an lvm leaf of the one we
8503           # passed
8504           result = self.rpc.call_blockdev_snapshot(src_node, disk)
8505           msg = result.fail_msg
8506           if msg:
8507             self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8508                             idx, src_node, msg)
8509             snap_disks.append(False)
8510           else:
8511             disk_id = (vgname, result.payload)
8512             new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8513                                    logical_id=disk_id, physical_id=disk_id,
8514                                    iv_name=disk.iv_name)
8515             snap_disks.append(new_dev)
8516
8517       finally:
8518         if self.op.shutdown and instance.admin_up:
8519           feedback_fn("Starting instance %s" % instance.name)
8520           result = self.rpc.call_instance_start(src_node, instance, None, None)
8521           msg = result.fail_msg
8522           if msg:
8523             _ShutdownInstanceDisks(self, instance)
8524             raise errors.OpExecError("Could not start instance: %s" % msg)
8525
8526       # TODO: check for size
8527
8528       cluster_name = self.cfg.GetClusterName()
8529       for idx, dev in enumerate(snap_disks):
8530         feedback_fn("Exporting snapshot %s from %s to %s" %
8531                     (idx, src_node, dst_node.name))
8532         if dev:
8533           # FIXME: pass debug from opcode to backend
8534           result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8535                                                  instance, cluster_name,
8536                                                  idx, self.op.debug_level)
8537           msg = result.fail_msg
8538           if msg:
8539             self.LogWarning("Could not export disk/%s from node %s to"
8540                             " node %s: %s", idx, src_node, dst_node.name, msg)
8541             dresults.append(False)
8542           else:
8543             dresults.append(True)
8544           msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8545           if msg:
8546             self.LogWarning("Could not remove snapshot for disk/%d from node"
8547                             " %s: %s", idx, src_node, msg)
8548         else:
8549           dresults.append(False)
8550
8551       feedback_fn("Finalizing export on %s" % dst_node.name)
8552       result = self.rpc.call_finalize_export(dst_node.name, instance,
8553                                              snap_disks)
8554       fin_resu = True
8555       msg = result.fail_msg
8556       if msg:
8557         self.LogWarning("Could not finalize export for instance %s"
8558                         " on node %s: %s", instance.name, dst_node.name, msg)
8559         fin_resu = False
8560
8561     finally:
8562       if activate_disks:
8563         feedback_fn("Deactivating disks for %s" % instance.name)
8564         _ShutdownInstanceDisks(self, instance)
8565
8566     nodelist = self.cfg.GetNodeList()
8567     nodelist.remove(dst_node.name)
8568
8569     # on one-node clusters nodelist will be empty after the removal
8570     # if we proceed the backup would be removed because OpQueryExports
8571     # substitutes an empty list with the full cluster node list.
8572     iname = instance.name
8573     if nodelist:
8574       feedback_fn("Removing old exports for instance %s" % iname)
8575       exportlist = self.rpc.call_export_list(nodelist)
8576       for node in exportlist:
8577         if exportlist[node].fail_msg:
8578           continue
8579         if iname in exportlist[node].payload:
8580           msg = self.rpc.call_export_remove(node, iname).fail_msg
8581           if msg:
8582             self.LogWarning("Could not remove older export for instance %s"
8583                             " on node %s: %s", iname, node, msg)
8584     return fin_resu, dresults
8585
8586
8587 class LURemoveExport(NoHooksLU):
8588   """Remove exports related to the named instance.
8589
8590   """
8591   _OP_REQP = ["instance_name"]
8592   REQ_BGL = False
8593
8594   def ExpandNames(self):
8595     self.needed_locks = {}
8596     # We need all nodes to be locked in order for RemoveExport to work, but we
8597     # don't need to lock the instance itself, as nothing will happen to it (and
8598     # we can remove exports also for a removed instance)
8599     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8600
8601   def CheckPrereq(self):
8602     """Check prerequisites.
8603     """
8604     pass
8605
8606   def Exec(self, feedback_fn):
8607     """Remove any export.
8608
8609     """
8610     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8611     # If the instance was not found we'll try with the name that was passed in.
8612     # This will only work if it was an FQDN, though.
8613     fqdn_warn = False
8614     if not instance_name:
8615       fqdn_warn = True
8616       instance_name = self.op.instance_name
8617
8618     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8619     exportlist = self.rpc.call_export_list(locked_nodes)
8620     found = False
8621     for node in exportlist:
8622       msg = exportlist[node].fail_msg
8623       if msg:
8624         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8625         continue
8626       if instance_name in exportlist[node].payload:
8627         found = True
8628         result = self.rpc.call_export_remove(node, instance_name)
8629         msg = result.fail_msg
8630         if msg:
8631           logging.error("Could not remove export for instance %s"
8632                         " on node %s: %s", instance_name, node, msg)
8633
8634     if fqdn_warn and not found:
8635       feedback_fn("Export not found. If trying to remove an export belonging"
8636                   " to a deleted instance please use its Fully Qualified"
8637                   " Domain Name.")
8638
8639
8640 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8641   """Generic tags LU.
8642
8643   This is an abstract class which is the parent of all the other tags LUs.
8644
8645   """
8646
8647   def ExpandNames(self):
8648     self.needed_locks = {}
8649     if self.op.kind == constants.TAG_NODE:
8650       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8651       self.needed_locks[locking.LEVEL_NODE] = self.op.name
8652     elif self.op.kind == constants.TAG_INSTANCE:
8653       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8654       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8655
8656   def CheckPrereq(self):
8657     """Check prerequisites.
8658
8659     """
8660     if self.op.kind == constants.TAG_CLUSTER:
8661       self.target = self.cfg.GetClusterInfo()
8662     elif self.op.kind == constants.TAG_NODE:
8663       self.target = self.cfg.GetNodeInfo(self.op.name)
8664     elif self.op.kind == constants.TAG_INSTANCE:
8665       self.target = self.cfg.GetInstanceInfo(self.op.name)
8666     else:
8667       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8668                                  str(self.op.kind), errors.ECODE_INVAL)
8669
8670
8671 class LUGetTags(TagsLU):
8672   """Returns the tags of a given object.
8673
8674   """
8675   _OP_REQP = ["kind", "name"]
8676   REQ_BGL = False
8677
8678   def Exec(self, feedback_fn):
8679     """Returns the tag list.
8680
8681     """
8682     return list(self.target.GetTags())
8683
8684
8685 class LUSearchTags(NoHooksLU):
8686   """Searches the tags for a given pattern.
8687
8688   """
8689   _OP_REQP = ["pattern"]
8690   REQ_BGL = False
8691
8692   def ExpandNames(self):
8693     self.needed_locks = {}
8694
8695   def CheckPrereq(self):
8696     """Check prerequisites.
8697
8698     This checks the pattern passed for validity by compiling it.
8699
8700     """
8701     try:
8702       self.re = re.compile(self.op.pattern)
8703     except re.error, err:
8704       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8705                                  (self.op.pattern, err), errors.ECODE_INVAL)
8706
8707   def Exec(self, feedback_fn):
8708     """Returns the tag list.
8709
8710     """
8711     cfg = self.cfg
8712     tgts = [("/cluster", cfg.GetClusterInfo())]
8713     ilist = cfg.GetAllInstancesInfo().values()
8714     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8715     nlist = cfg.GetAllNodesInfo().values()
8716     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8717     results = []
8718     for path, target in tgts:
8719       for tag in target.GetTags():
8720         if self.re.search(tag):
8721           results.append((path, tag))
8722     return results
8723
8724
8725 class LUAddTags(TagsLU):
8726   """Sets a tag on a given object.
8727
8728   """
8729   _OP_REQP = ["kind", "name", "tags"]
8730   REQ_BGL = False
8731
8732   def CheckPrereq(self):
8733     """Check prerequisites.
8734
8735     This checks the type and length of the tag name and value.
8736
8737     """
8738     TagsLU.CheckPrereq(self)
8739     for tag in self.op.tags:
8740       objects.TaggableObject.ValidateTag(tag)
8741
8742   def Exec(self, feedback_fn):
8743     """Sets the tag.
8744
8745     """
8746     try:
8747       for tag in self.op.tags:
8748         self.target.AddTag(tag)
8749     except errors.TagError, err:
8750       raise errors.OpExecError("Error while setting tag: %s" % str(err))
8751     self.cfg.Update(self.target, feedback_fn)
8752
8753
8754 class LUDelTags(TagsLU):
8755   """Delete a list of tags from a given object.
8756
8757   """
8758   _OP_REQP = ["kind", "name", "tags"]
8759   REQ_BGL = False
8760
8761   def CheckPrereq(self):
8762     """Check prerequisites.
8763
8764     This checks that we have the given tag.
8765
8766     """
8767     TagsLU.CheckPrereq(self)
8768     for tag in self.op.tags:
8769       objects.TaggableObject.ValidateTag(tag)
8770     del_tags = frozenset(self.op.tags)
8771     cur_tags = self.target.GetTags()
8772     if not del_tags <= cur_tags:
8773       diff_tags = del_tags - cur_tags
8774       diff_names = ["'%s'" % tag for tag in diff_tags]
8775       diff_names.sort()
8776       raise errors.OpPrereqError("Tag(s) %s not found" %
8777                                  (",".join(diff_names)), errors.ECODE_NOENT)
8778
8779   def Exec(self, feedback_fn):
8780     """Remove the tag from the object.
8781
8782     """
8783     for tag in self.op.tags:
8784       self.target.RemoveTag(tag)
8785     self.cfg.Update(self.target, feedback_fn)
8786
8787
8788 class LUTestDelay(NoHooksLU):
8789   """Sleep for a specified amount of time.
8790
8791   This LU sleeps on the master and/or nodes for a specified amount of
8792   time.
8793
8794   """
8795   _OP_REQP = ["duration", "on_master", "on_nodes"]
8796   REQ_BGL = False
8797
8798   def ExpandNames(self):
8799     """Expand names and set required locks.
8800
8801     This expands the node list, if any.
8802
8803     """
8804     self.needed_locks = {}
8805     if self.op.on_nodes:
8806       # _GetWantedNodes can be used here, but is not always appropriate to use
8807       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8808       # more information.
8809       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8810       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8811
8812   def CheckPrereq(self):
8813     """Check prerequisites.
8814
8815     """
8816
8817   def Exec(self, feedback_fn):
8818     """Do the actual sleep.
8819
8820     """
8821     if self.op.on_master:
8822       if not utils.TestDelay(self.op.duration):
8823         raise errors.OpExecError("Error during master delay test")
8824     if self.op.on_nodes:
8825       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8826       for node, node_result in result.items():
8827         node_result.Raise("Failure during rpc call to node %s" % node)
8828
8829
8830 class IAllocator(object):
8831   """IAllocator framework.
8832
8833   An IAllocator instance has three sets of attributes:
8834     - cfg that is needed to query the cluster
8835     - input data (all members of the _KEYS class attribute are required)
8836     - four buffer attributes (in|out_data|text), that represent the
8837       input (to the external script) in text and data structure format,
8838       and the output from it, again in two formats
8839     - the result variables from the script (success, info, nodes) for
8840       easy usage
8841
8842   """
8843   # pylint: disable-msg=R0902
8844   # lots of instance attributes
8845   _ALLO_KEYS = [
8846     "name", "mem_size", "disks", "disk_template",
8847     "os", "tags", "nics", "vcpus", "hypervisor",
8848     ]
8849   _RELO_KEYS = [
8850     "name", "relocate_from",
8851     ]
8852   _EVAC_KEYS = [
8853     "evac_nodes",
8854     ]
8855
8856   def __init__(self, cfg, rpc, mode, **kwargs):
8857     self.cfg = cfg
8858     self.rpc = rpc
8859     # init buffer variables
8860     self.in_text = self.out_text = self.in_data = self.out_data = None
8861     # init all input fields so that pylint is happy
8862     self.mode = mode
8863     self.mem_size = self.disks = self.disk_template = None
8864     self.os = self.tags = self.nics = self.vcpus = None
8865     self.hypervisor = None
8866     self.relocate_from = None
8867     self.name = None
8868     self.evac_nodes = None
8869     # computed fields
8870     self.required_nodes = None
8871     # init result fields
8872     self.success = self.info = self.result = None
8873     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8874       keyset = self._ALLO_KEYS
8875       fn = self._AddNewInstance
8876     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8877       keyset = self._RELO_KEYS
8878       fn = self._AddRelocateInstance
8879     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8880       keyset = self._EVAC_KEYS
8881       fn = self._AddEvacuateNodes
8882     else:
8883       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8884                                    " IAllocator" % self.mode)
8885     for key in kwargs:
8886       if key not in keyset:
8887         raise errors.ProgrammerError("Invalid input parameter '%s' to"
8888                                      " IAllocator" % key)
8889       setattr(self, key, kwargs[key])
8890
8891     for key in keyset:
8892       if key not in kwargs:
8893         raise errors.ProgrammerError("Missing input parameter '%s' to"
8894                                      " IAllocator" % key)
8895     self._BuildInputData(fn)
8896
8897   def _ComputeClusterData(self):
8898     """Compute the generic allocator input data.
8899
8900     This is the data that is independent of the actual operation.
8901
8902     """
8903     cfg = self.cfg
8904     cluster_info = cfg.GetClusterInfo()
8905     # cluster data
8906     data = {
8907       "version": constants.IALLOCATOR_VERSION,
8908       "cluster_name": cfg.GetClusterName(),
8909       "cluster_tags": list(cluster_info.GetTags()),
8910       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8911       # we don't have job IDs
8912       }
8913     iinfo = cfg.GetAllInstancesInfo().values()
8914     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8915
8916     # node data
8917     node_results = {}
8918     node_list = cfg.GetNodeList()
8919
8920     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8921       hypervisor_name = self.hypervisor
8922     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8923       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8924     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8925       hypervisor_name = cluster_info.enabled_hypervisors[0]
8926
8927     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8928                                         hypervisor_name)
8929     node_iinfo = \
8930       self.rpc.call_all_instances_info(node_list,
8931                                        cluster_info.enabled_hypervisors)
8932     for nname, nresult in node_data.items():
8933       # first fill in static (config-based) values
8934       ninfo = cfg.GetNodeInfo(nname)
8935       pnr = {
8936         "tags": list(ninfo.GetTags()),
8937         "primary_ip": ninfo.primary_ip,
8938         "secondary_ip": ninfo.secondary_ip,
8939         "offline": ninfo.offline,
8940         "drained": ninfo.drained,
8941         "master_candidate": ninfo.master_candidate,
8942         }
8943
8944       if not (ninfo.offline or ninfo.drained):
8945         nresult.Raise("Can't get data for node %s" % nname)
8946         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8947                                 nname)
8948         remote_info = nresult.payload
8949
8950         for attr in ['memory_total', 'memory_free', 'memory_dom0',
8951                      'vg_size', 'vg_free', 'cpu_total']:
8952           if attr not in remote_info:
8953             raise errors.OpExecError("Node '%s' didn't return attribute"
8954                                      " '%s'" % (nname, attr))
8955           if not isinstance(remote_info[attr], int):
8956             raise errors.OpExecError("Node '%s' returned invalid value"
8957                                      " for '%s': %s" %
8958                                      (nname, attr, remote_info[attr]))
8959         # compute memory used by primary instances
8960         i_p_mem = i_p_up_mem = 0
8961         for iinfo, beinfo in i_list:
8962           if iinfo.primary_node == nname:
8963             i_p_mem += beinfo[constants.BE_MEMORY]
8964             if iinfo.name not in node_iinfo[nname].payload:
8965               i_used_mem = 0
8966             else:
8967               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8968             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8969             remote_info['memory_free'] -= max(0, i_mem_diff)
8970
8971             if iinfo.admin_up:
8972               i_p_up_mem += beinfo[constants.BE_MEMORY]
8973
8974         # compute memory used by instances
8975         pnr_dyn = {
8976           "total_memory": remote_info['memory_total'],
8977           "reserved_memory": remote_info['memory_dom0'],
8978           "free_memory": remote_info['memory_free'],
8979           "total_disk": remote_info['vg_size'],
8980           "free_disk": remote_info['vg_free'],
8981           "total_cpus": remote_info['cpu_total'],
8982           "i_pri_memory": i_p_mem,
8983           "i_pri_up_memory": i_p_up_mem,
8984           }
8985         pnr.update(pnr_dyn)
8986
8987       node_results[nname] = pnr
8988     data["nodes"] = node_results
8989
8990     # instance data
8991     instance_data = {}
8992     for iinfo, beinfo in i_list:
8993       nic_data = []
8994       for nic in iinfo.nics:
8995         filled_params = objects.FillDict(
8996             cluster_info.nicparams[constants.PP_DEFAULT],
8997             nic.nicparams)
8998         nic_dict = {"mac": nic.mac,
8999                     "ip": nic.ip,
9000                     "mode": filled_params[constants.NIC_MODE],
9001                     "link": filled_params[constants.NIC_LINK],
9002                    }
9003         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9004           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9005         nic_data.append(nic_dict)
9006       pir = {
9007         "tags": list(iinfo.GetTags()),
9008         "admin_up": iinfo.admin_up,
9009         "vcpus": beinfo[constants.BE_VCPUS],
9010         "memory": beinfo[constants.BE_MEMORY],
9011         "os": iinfo.os,
9012         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9013         "nics": nic_data,
9014         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9015         "disk_template": iinfo.disk_template,
9016         "hypervisor": iinfo.hypervisor,
9017         }
9018       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9019                                                  pir["disks"])
9020       instance_data[iinfo.name] = pir
9021
9022     data["instances"] = instance_data
9023
9024     self.in_data = data
9025
9026   def _AddNewInstance(self):
9027     """Add new instance data to allocator structure.
9028
9029     This in combination with _AllocatorGetClusterData will create the
9030     correct structure needed as input for the allocator.
9031
9032     The checks for the completeness of the opcode must have already been
9033     done.
9034
9035     """
9036     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9037
9038     if self.disk_template in constants.DTS_NET_MIRROR:
9039       self.required_nodes = 2
9040     else:
9041       self.required_nodes = 1
9042     request = {
9043       "name": self.name,
9044       "disk_template": self.disk_template,
9045       "tags": self.tags,
9046       "os": self.os,
9047       "vcpus": self.vcpus,
9048       "memory": self.mem_size,
9049       "disks": self.disks,
9050       "disk_space_total": disk_space,
9051       "nics": self.nics,
9052       "required_nodes": self.required_nodes,
9053       }
9054     return request
9055
9056   def _AddRelocateInstance(self):
9057     """Add relocate instance data to allocator structure.
9058
9059     This in combination with _IAllocatorGetClusterData will create the
9060     correct structure needed as input for the allocator.
9061
9062     The checks for the completeness of the opcode must have already been
9063     done.
9064
9065     """
9066     instance = self.cfg.GetInstanceInfo(self.name)
9067     if instance is None:
9068       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9069                                    " IAllocator" % self.name)
9070
9071     if instance.disk_template not in constants.DTS_NET_MIRROR:
9072       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9073                                  errors.ECODE_INVAL)
9074
9075     if len(instance.secondary_nodes) != 1:
9076       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9077                                  errors.ECODE_STATE)
9078
9079     self.required_nodes = 1
9080     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9081     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9082
9083     request = {
9084       "name": self.name,
9085       "disk_space_total": disk_space,
9086       "required_nodes": self.required_nodes,
9087       "relocate_from": self.relocate_from,
9088       }
9089     return request
9090
9091   def _AddEvacuateNodes(self):
9092     """Add evacuate nodes data to allocator structure.
9093
9094     """
9095     request = {
9096       "evac_nodes": self.evac_nodes
9097       }
9098     return request
9099
9100   def _BuildInputData(self, fn):
9101     """Build input data structures.
9102
9103     """
9104     self._ComputeClusterData()
9105
9106     request = fn()
9107     request["type"] = self.mode
9108     self.in_data["request"] = request
9109
9110     self.in_text = serializer.Dump(self.in_data)
9111
9112   def Run(self, name, validate=True, call_fn=None):
9113     """Run an instance allocator and return the results.
9114
9115     """
9116     if call_fn is None:
9117       call_fn = self.rpc.call_iallocator_runner
9118
9119     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9120     result.Raise("Failure while running the iallocator script")
9121
9122     self.out_text = result.payload
9123     if validate:
9124       self._ValidateResult()
9125
9126   def _ValidateResult(self):
9127     """Process the allocator results.
9128
9129     This will process and if successful save the result in
9130     self.out_data and the other parameters.
9131
9132     """
9133     try:
9134       rdict = serializer.Load(self.out_text)
9135     except Exception, err:
9136       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9137
9138     if not isinstance(rdict, dict):
9139       raise errors.OpExecError("Can't parse iallocator results: not a dict")
9140
9141     # TODO: remove backwards compatiblity in later versions
9142     if "nodes" in rdict and "result" not in rdict:
9143       rdict["result"] = rdict["nodes"]
9144       del rdict["nodes"]
9145
9146     for key in "success", "info", "result":
9147       if key not in rdict:
9148         raise errors.OpExecError("Can't parse iallocator results:"
9149                                  " missing key '%s'" % key)
9150       setattr(self, key, rdict[key])
9151
9152     if not isinstance(rdict["result"], list):
9153       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9154                                " is not a list")
9155     self.out_data = rdict
9156
9157
9158 class LUTestAllocator(NoHooksLU):
9159   """Run allocator tests.
9160
9161   This LU runs the allocator tests
9162
9163   """
9164   _OP_REQP = ["direction", "mode", "name"]
9165
9166   def CheckPrereq(self):
9167     """Check prerequisites.
9168
9169     This checks the opcode parameters depending on the director and mode test.
9170
9171     """
9172     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9173       for attr in ["name", "mem_size", "disks", "disk_template",
9174                    "os", "tags", "nics", "vcpus"]:
9175         if not hasattr(self.op, attr):
9176           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9177                                      attr, errors.ECODE_INVAL)
9178       iname = self.cfg.ExpandInstanceName(self.op.name)
9179       if iname is not None:
9180         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9181                                    iname, errors.ECODE_EXISTS)
9182       if not isinstance(self.op.nics, list):
9183         raise errors.OpPrereqError("Invalid parameter 'nics'",
9184                                    errors.ECODE_INVAL)
9185       for row in self.op.nics:
9186         if (not isinstance(row, dict) or
9187             "mac" not in row or
9188             "ip" not in row or
9189             "bridge" not in row):
9190           raise errors.OpPrereqError("Invalid contents of the 'nics'"
9191                                      " parameter", errors.ECODE_INVAL)
9192       if not isinstance(self.op.disks, list):
9193         raise errors.OpPrereqError("Invalid parameter 'disks'",
9194                                    errors.ECODE_INVAL)
9195       for row in self.op.disks:
9196         if (not isinstance(row, dict) or
9197             "size" not in row or
9198             not isinstance(row["size"], int) or
9199             "mode" not in row or
9200             row["mode"] not in ['r', 'w']):
9201           raise errors.OpPrereqError("Invalid contents of the 'disks'"
9202                                      " parameter", errors.ECODE_INVAL)
9203       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9204         self.op.hypervisor = self.cfg.GetHypervisorType()
9205     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9206       if not hasattr(self.op, "name"):
9207         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9208                                    errors.ECODE_INVAL)
9209       fname = _ExpandInstanceName(self.cfg, self.op.name)
9210       self.op.name = fname
9211       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9212     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9213       if not hasattr(self.op, "evac_nodes"):
9214         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9215                                    " opcode input", errors.ECODE_INVAL)
9216     else:
9217       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9218                                  self.op.mode, errors.ECODE_INVAL)
9219
9220     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9221       if not hasattr(self.op, "allocator") or self.op.allocator is None:
9222         raise errors.OpPrereqError("Missing allocator name",
9223                                    errors.ECODE_INVAL)
9224     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9225       raise errors.OpPrereqError("Wrong allocator test '%s'" %
9226                                  self.op.direction, errors.ECODE_INVAL)
9227
9228   def Exec(self, feedback_fn):
9229     """Run the allocator test.
9230
9231     """
9232     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9233       ial = IAllocator(self.cfg, self.rpc,
9234                        mode=self.op.mode,
9235                        name=self.op.name,
9236                        mem_size=self.op.mem_size,
9237                        disks=self.op.disks,
9238                        disk_template=self.op.disk_template,
9239                        os=self.op.os,
9240                        tags=self.op.tags,
9241                        nics=self.op.nics,
9242                        vcpus=self.op.vcpus,
9243                        hypervisor=self.op.hypervisor,
9244                        )
9245     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9246       ial = IAllocator(self.cfg, self.rpc,
9247                        mode=self.op.mode,
9248                        name=self.op.name,
9249                        relocate_from=list(self.relocate_from),
9250                        )
9251     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9252       ial = IAllocator(self.cfg, self.rpc,
9253                        mode=self.op.mode,
9254                        evac_nodes=self.op.evac_nodes)
9255     else:
9256       raise errors.ProgrammerError("Uncatched mode %s in"
9257                                    " LUTestAllocator.Exec", self.op.mode)
9258
9259     if self.op.direction == constants.IALLOCATOR_DIR_IN:
9260       result = ial.in_text
9261     else:
9262       ial.Run(self.op.allocator, validate=False)
9263       result = ial.out_text
9264     return result