code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0613,W0201
  25
  26 import os
  27 import os.path
  28 import sha
  29 import time
  30 import tempfile
  31 import re
  32 import platform
  33 import logging
  34 import copy
  35 import random
  36
  37 from ganeti import ssh
  38 from ganeti import utils
  39 from ganeti import errors
  40 from ganeti import hypervisor
  41 from ganeti import locking
  42 from ganeti import constants
  43 from ganeti import objects
  44 from ganeti import opcodes
  45 from ganeti import serializer
  46 from ganeti import ssconf
  47
  48
  49 class LogicalUnit(object):
  50   """Logical Unit base class.
  51
  52   Subclasses must follow these rules:
  53     - implement ExpandNames
  54     - implement CheckPrereq
  55     - implement Exec
  56     - implement BuildHooksEnv
  57     - redefine HPATH and HTYPE
  58     - optionally redefine their run requirements:
  59         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  60
  61   Note that all commands require root permissions.
  62
  63   """
  64   HPATH = None
  65   HTYPE = None
  66   _OP_REQP = []
  67   REQ_BGL = True
  68
  69   def __init__(self, processor, op, context, rpc):
  70     """Constructor for LogicalUnit.
  71
  72     This needs to be overriden in derived classes in order to check op
  73     validity.
  74
  75     """
  76     self.proc = processor
  77     self.op = op
  78     self.cfg = context.cfg
  79     self.context = context
  80     self.rpc = rpc
  81     # Dicts used to declare locking needs to mcpu
  82     self.needed_locks = None
  83     self.acquired_locks = {}
  84     self.share_locks = dict(((i, 0) for i in locking.LEVELS))
  85     self.add_locks = {}
  86     self.remove_locks = {}
  87     # Used to force good behavior when calling helper functions
  88     self.recalculate_locks = {}
  89     self.__ssh = None
  90     # logging
  91     self.LogWarning = processor.LogWarning
  92     self.LogInfo = processor.LogInfo
  93
  94     for attr_name in self._OP_REQP:
  95       attr_val = getattr(op, attr_name, None)
  96       if attr_val is None:
  97         raise errors.OpPrereqError("Required parameter '%s' missing" %
  98                                    attr_name)
  99     self.CheckArguments()
 100
 101   def __GetSSH(self):
 102     """Returns the SshRunner object
 103
 104     """
 105     if not self.__ssh:
 106       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 107     return self.__ssh
 108
 109   ssh = property(fget=__GetSSH)
 110
 111   def CheckArguments(self):
 112     """Check syntactic validity for the opcode arguments.
 113
 114     This method is for doing a simple syntactic check and ensure
 115     validity of opcode parameters, without any cluster-related
 116     checks. While the same can be accomplished in ExpandNames and/or
 117     CheckPrereq, doing these separate is better because:
 118
 119       - ExpandNames is left as as purely a lock-related function
 120       - CheckPrereq is run after we have aquired locks (and possible
 121         waited for them)
 122
 123     The function is allowed to change the self.op attribute so that
 124     later methods can no longer worry about missing parameters.
 125
 126     """
 127     pass
 128
 129   def ExpandNames(self):
 130     """Expand names for this LU.
 131
 132     This method is called before starting to execute the opcode, and it should
 133     update all the parameters of the opcode to their canonical form (e.g. a
 134     short node name must be fully expanded after this method has successfully
 135     completed). This way locking, hooks, logging, ecc. can work correctly.
 136
 137     LUs which implement this method must also populate the self.needed_locks
 138     member, as a dict with lock levels as keys, and a list of needed lock names
 139     as values. Rules:
 140
 141       - use an empty dict if you don't need any lock
 142       - if you don't need any lock at a particular level omit that level
 143       - don't put anything for the BGL level
 144       - if you want all locks at a level use locking.ALL_SET as a value
 145
 146     If you need to share locks (rather than acquire them exclusively) at one
 147     level you can modify self.share_locks, setting a true value (usually 1) for
 148     that level. By default locks are not shared.
 149
 150     Examples::
 151
 152       # Acquire all nodes and one instance
 153       self.needed_locks = {
 154         locking.LEVEL_NODE: locking.ALL_SET,
 155         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 156       }
 157       # Acquire just two nodes
 158       self.needed_locks = {
 159         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 160       }
 161       # Acquire no locks
 162       self.needed_locks = {} # No, you can't leave it to the default value None
 163
 164     """
 165     # The implementation of this method is mandatory only if the new LU is
 166     # concurrent, so that old LUs don't need to be changed all at the same
 167     # time.
 168     if self.REQ_BGL:
 169       self.needed_locks = {} # Exclusive LUs don't need locks.
 170     else:
 171       raise NotImplementedError
 172
 173   def DeclareLocks(self, level):
 174     """Declare LU locking needs for a level
 175
 176     While most LUs can just declare their locking needs at ExpandNames time,
 177     sometimes there's the need to calculate some locks after having acquired
 178     the ones before. This function is called just before acquiring locks at a
 179     particular level, but after acquiring the ones at lower levels, and permits
 180     such calculations. It can be used to modify self.needed_locks, and by
 181     default it does nothing.
 182
 183     This function is only called if you have something already set in
 184     self.needed_locks for the level.
 185
 186     @param level: Locking level which is going to be locked
 187     @type level: member of ganeti.locking.LEVELS
 188
 189     """
 190
 191   def CheckPrereq(self):
 192     """Check prerequisites for this LU.
 193
 194     This method should check that the prerequisites for the execution
 195     of this LU are fulfilled. It can do internode communication, but
 196     it should be idempotent - no cluster or system changes are
 197     allowed.
 198
 199     The method should raise errors.OpPrereqError in case something is
 200     not fulfilled. Its return value is ignored.
 201
 202     This method should also update all the parameters of the opcode to
 203     their canonical form if it hasn't been done by ExpandNames before.
 204
 205     """
 206     raise NotImplementedError
 207
 208   def Exec(self, feedback_fn):
 209     """Execute the LU.
 210
 211     This method should implement the actual work. It should raise
 212     errors.OpExecError for failures that are somewhat dealt with in
 213     code, or expected.
 214
 215     """
 216     raise NotImplementedError
 217
 218   def BuildHooksEnv(self):
 219     """Build hooks environment for this LU.
 220
 221     This method should return a three-node tuple consisting of: a dict
 222     containing the environment that will be used for running the
 223     specific hook for this LU, a list of node names on which the hook
 224     should run before the execution, and a list of node names on which
 225     the hook should run after the execution.
 226
 227     The keys of the dict must not have 'GANETI_' prefixed as this will
 228     be handled in the hooks runner. Also note additional keys will be
 229     added by the hooks runner. If the LU doesn't define any
 230     environment, an empty dict (and not None) should be returned.
 231
 232     No nodes should be returned as an empty list (and not None).
 233
 234     Note that if the HPATH for a LU class is None, this function will
 235     not be called.
 236
 237     """
 238     raise NotImplementedError
 239
 240   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 241     """Notify the LU about the results of its hooks.
 242
 243     This method is called every time a hooks phase is executed, and notifies
 244     the Logical Unit about the hooks' result. The LU can then use it to alter
 245     its result based on the hooks.  By default the method does nothing and the
 246     previous result is passed back unchanged but any LU can define it if it
 247     wants to use the local cluster hook-scripts somehow.
 248
 249     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 250         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 251     @param hook_results: the results of the multi-node hooks rpc call
 252     @param feedback_fn: function used send feedback back to the caller
 253     @param lu_result: the previous Exec result this LU had, or None
 254         in the PRE phase
 255     @return: the new Exec result, based on the previous result
 256         and hook results
 257
 258     """
 259     return lu_result
 260
 261   def _ExpandAndLockInstance(self):
 262     """Helper function to expand and lock an instance.
 263
 264     Many LUs that work on an instance take its name in self.op.instance_name
 265     and need to expand it and then declare the expanded name for locking. This
 266     function does it, and then updates self.op.instance_name to the expanded
 267     name. It also initializes needed_locks as a dict, if this hasn't been done
 268     before.
 269
 270     """
 271     if self.needed_locks is None:
 272       self.needed_locks = {}
 273     else:
 274       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 275         "_ExpandAndLockInstance called with instance-level locks set"
 276     expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
 277     if expanded_name is None:
 278       raise errors.OpPrereqError("Instance '%s' not known" %
 279                                   self.op.instance_name)
 280     self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
 281     self.op.instance_name = expanded_name
 282
 283   def _LockInstancesNodes(self, primary_only=False):
 284     """Helper function to declare instances' nodes for locking.
 285
 286     This function should be called after locking one or more instances to lock
 287     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 288     with all primary or secondary nodes for instances already locked and
 289     present in self.needed_locks[locking.LEVEL_INSTANCE].
 290
 291     It should be called from DeclareLocks, and for safety only works if
 292     self.recalculate_locks[locking.LEVEL_NODE] is set.
 293
 294     In the future it may grow parameters to just lock some instance's nodes, or
 295     to just lock primaries or secondary nodes, if needed.
 296
 297     If should be called in DeclareLocks in a way similar to::
 298
 299       if level == locking.LEVEL_NODE:
 300         self._LockInstancesNodes()
 301
 302     @type primary_only: boolean
 303     @param primary_only: only lock primary nodes of locked instances
 304
 305     """
 306     assert locking.LEVEL_NODE in self.recalculate_locks, \
 307       "_LockInstancesNodes helper function called with no nodes to recalculate"
 308
 309     # TODO: check if we're really been called with the instance locks held
 310
 311     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 312     # future we might want to have different behaviors depending on the value
 313     # of self.recalculate_locks[locking.LEVEL_NODE]
 314     wanted_nodes = []
 315     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 316       instance = self.context.cfg.GetInstanceInfo(instance_name)
 317       wanted_nodes.append(instance.primary_node)
 318       if not primary_only:
 319         wanted_nodes.extend(instance.secondary_nodes)
 320
 321     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 322       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 323     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 324       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 325
 326     del self.recalculate_locks[locking.LEVEL_NODE]
 327
 328
 329 class NoHooksLU(LogicalUnit):
 330   """Simple LU which runs no hooks.
 331
 332   This LU is intended as a parent for other LogicalUnits which will
 333   run no hooks, in order to reduce duplicate code.
 334
 335   """
 336   HPATH = None
 337   HTYPE = None
 338
 339
 340 def _GetWantedNodes(lu, nodes):
 341   """Returns list of checked and expanded node names.
 342
 343   @type lu: L{LogicalUnit}
 344   @param lu: the logical unit on whose behalf we execute
 345   @type nodes: list
 346   @param nodes: list of node names or None for all nodes
 347   @rtype: list
 348   @return: the list of nodes, sorted
 349   @raise errors.OpProgrammerError: if the nodes parameter is wrong type
 350
 351   """
 352   if not isinstance(nodes, list):
 353     raise errors.OpPrereqError("Invalid argument type 'nodes'")
 354
 355   if not nodes:
 356     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 357       " non-empty list of nodes whose name is to be expanded.")
 358
 359   wanted = []
 360   for name in nodes:
 361     node = lu.cfg.ExpandNodeName(name)
 362     if node is None:
 363       raise errors.OpPrereqError("No such node name '%s'" % name)
 364     wanted.append(node)
 365
 366   return utils.NiceSort(wanted)
 367
 368
 369 def _GetWantedInstances(lu, instances):
 370   """Returns list of checked and expanded instance names.
 371
 372   @type lu: L{LogicalUnit}
 373   @param lu: the logical unit on whose behalf we execute
 374   @type instances: list
 375   @param instances: list of instance names or None for all instances
 376   @rtype: list
 377   @return: the list of instances, sorted
 378   @raise errors.OpPrereqError: if the instances parameter is wrong type
 379   @raise errors.OpPrereqError: if any of the passed instances is not found
 380
 381   """
 382   if not isinstance(instances, list):
 383     raise errors.OpPrereqError("Invalid argument type 'instances'")
 384
 385   if instances:
 386     wanted = []
 387
 388     for name in instances:
 389       instance = lu.cfg.ExpandInstanceName(name)
 390       if instance is None:
 391         raise errors.OpPrereqError("No such instance name '%s'" % name)
 392       wanted.append(instance)
 393
 394   else:
 395     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 396   return wanted
 397
 398
 399 def _CheckOutputFields(static, dynamic, selected):
 400   """Checks whether all selected fields are valid.
 401
 402   @type static: L{utils.FieldSet}
 403   @param static: static fields set
 404   @type dynamic: L{utils.FieldSet}
 405   @param dynamic: dynamic fields set
 406
 407   """
 408   f = utils.FieldSet()
 409   f.Extend(static)
 410   f.Extend(dynamic)
 411
 412   delta = f.NonMatching(selected)
 413   if delta:
 414     raise errors.OpPrereqError("Unknown output fields selected: %s"
 415                                % ",".join(delta))
 416
 417
 418 def _CheckBooleanOpField(op, name):
 419   """Validates boolean opcode parameters.
 420
 421   This will ensure that an opcode parameter is either a boolean value,
 422   or None (but that it always exists).
 423
 424   """
 425   val = getattr(op, name, None)
 426   if not (val is None or isinstance(val, bool)):
 427     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 428                                (name, str(val)))
 429   setattr(op, name, val)
 430
 431
 432 def _CheckNodeOnline(lu, node):
 433   """Ensure that a given node is online.
 434
 435   @param lu: the LU on behalf of which we make the check
 436   @param node: the node to check
 437   @raise errors.OpPrereqError: if the node is offline
 438
 439   """
 440   if lu.cfg.GetNodeInfo(node).offline:
 441     raise errors.OpPrereqError("Can't use offline node %s" % node)
 442
 443
 444 def _CheckNodeNotDrained(lu, node):
 445   """Ensure that a given node is not drained.
 446
 447   @param lu: the LU on behalf of which we make the check
 448   @param node: the node to check
 449   @raise errors.OpPrereqError: if the node is drained
 450
 451   """
 452   if lu.cfg.GetNodeInfo(node).drained:
 453     raise errors.OpPrereqError("Can't use drained node %s" % node)
 454
 455
 456 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 457                           memory, vcpus, nics, disk_template, disks):
 458   """Builds instance related env variables for hooks
 459
 460   This builds the hook environment from individual variables.
 461
 462   @type name: string
 463   @param name: the name of the instance
 464   @type primary_node: string
 465   @param primary_node: the name of the instance's primary node
 466   @type secondary_nodes: list
 467   @param secondary_nodes: list of secondary nodes as strings
 468   @type os_type: string
 469   @param os_type: the name of the instance's OS
 470   @type status: boolean
 471   @param status: the should_run status of the instance
 472   @type memory: string
 473   @param memory: the memory size of the instance
 474   @type vcpus: string
 475   @param vcpus: the count of VCPUs the instance has
 476   @type nics: list
 477   @param nics: list of tuples (ip, bridge, mac) representing
 478       the NICs the instance  has
 479   @type disk_template: string
 480   @param disk_template: the distk template of the instance
 481   @type disks: list
 482   @param disks: the list of (size, mode) pairs
 483   @rtype: dict
 484   @return: the hook environment for this instance
 485
 486   """
 487   if status:
 488     str_status = "up"
 489   else:
 490     str_status = "down"
 491   env = {
 492     "OP_TARGET": name,
 493     "INSTANCE_NAME": name,
 494     "INSTANCE_PRIMARY": primary_node,
 495     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 496     "INSTANCE_OS_TYPE": os_type,
 497     "INSTANCE_STATUS": str_status,
 498     "INSTANCE_MEMORY": memory,
 499     "INSTANCE_VCPUS": vcpus,
 500     "INSTANCE_DISK_TEMPLATE": disk_template,
 501   }
 502
 503   if nics:
 504     nic_count = len(nics)
 505     for idx, (ip, bridge, mac) in enumerate(nics):
 506       if ip is None:
 507         ip = ""
 508       env["INSTANCE_NIC%d_IP" % idx] = ip
 509       env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
 510       env["INSTANCE_NIC%d_MAC" % idx] = mac
 511   else:
 512     nic_count = 0
 513
 514   env["INSTANCE_NIC_COUNT"] = nic_count
 515
 516   if disks:
 517     disk_count = len(disks)
 518     for idx, (size, mode) in enumerate(disks):
 519       env["INSTANCE_DISK%d_SIZE" % idx] = size
 520       env["INSTANCE_DISK%d_MODE" % idx] = mode
 521   else:
 522     disk_count = 0
 523
 524   env["INSTANCE_DISK_COUNT"] = disk_count
 525
 526   return env
 527
 528
 529 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 530   """Builds instance related env variables for hooks from an object.
 531
 532   @type lu: L{LogicalUnit}
 533   @param lu: the logical unit on whose behalf we execute
 534   @type instance: L{objects.Instance}
 535   @param instance: the instance for which we should build the
 536       environment
 537   @type override: dict
 538   @param override: dictionary with key/values that will override
 539       our values
 540   @rtype: dict
 541   @return: the hook environment dictionary
 542
 543   """
 544   bep = lu.cfg.GetClusterInfo().FillBE(instance)
 545   args = {
 546     'name': instance.name,
 547     'primary_node': instance.primary_node,
 548     'secondary_nodes': instance.secondary_nodes,
 549     'os_type': instance.os,
 550     'status': instance.admin_up,
 551     'memory': bep[constants.BE_MEMORY],
 552     'vcpus': bep[constants.BE_VCPUS],
 553     'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
 554     'disk_template': instance.disk_template,
 555     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 556   }
 557   if override:
 558     args.update(override)
 559   return _BuildInstanceHookEnv(**args)
 560
 561
 562 def _AdjustCandidatePool(lu):
 563   """Adjust the candidate pool after node operations.
 564
 565   """
 566   mod_list = lu.cfg.MaintainCandidatePool()
 567   if mod_list:
 568     lu.LogInfo("Promoted nodes to master candidate role: %s",
 569                ", ".join(node.name for node in mod_list))
 570     for name in mod_list:
 571       lu.context.ReaddNode(name)
 572   mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
 573   if mc_now > mc_max:
 574     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 575                (mc_now, mc_max))
 576
 577
 578 def _CheckInstanceBridgesExist(lu, instance):
 579   """Check that the brigdes needed by an instance exist.
 580
 581   """
 582   # check bridges existance
 583   brlist = [nic.bridge for nic in instance.nics]
 584   result = lu.rpc.call_bridges_exist(instance.primary_node, brlist)
 585   result.Raise()
 586   if not result.data:
 587     raise errors.OpPrereqError("One or more target bridges %s does not"
 588                                " exist on destination node '%s'" %
 589                                (brlist, instance.primary_node))
 590
 591
 592 class LUDestroyCluster(NoHooksLU):
 593   """Logical unit for destroying the cluster.
 594
 595   """
 596   _OP_REQP = []
 597
 598   def CheckPrereq(self):
 599     """Check prerequisites.
 600
 601     This checks whether the cluster is empty.
 602
 603     Any errors are signalled by raising errors.OpPrereqError.
 604
 605     """
 606     master = self.cfg.GetMasterNode()
 607
 608     nodelist = self.cfg.GetNodeList()
 609     if len(nodelist) != 1 or nodelist[0] != master:
 610       raise errors.OpPrereqError("There are still %d node(s) in"
 611                                  " this cluster." % (len(nodelist) - 1))
 612     instancelist = self.cfg.GetInstanceList()
 613     if instancelist:
 614       raise errors.OpPrereqError("There are still %d instance(s) in"
 615                                  " this cluster." % len(instancelist))
 616
 617   def Exec(self, feedback_fn):
 618     """Destroys the cluster.
 619
 620     """
 621     master = self.cfg.GetMasterNode()
 622     result = self.rpc.call_node_stop_master(master, False)
 623     result.Raise()
 624     if not result.data:
 625       raise errors.OpExecError("Could not disable the master role")
 626     priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
 627     utils.CreateBackup(priv_key)
 628     utils.CreateBackup(pub_key)
 629     return master
 630
 631
 632 class LUVerifyCluster(LogicalUnit):
 633   """Verifies the cluster status.
 634
 635   """
 636   HPATH = "cluster-verify"
 637   HTYPE = constants.HTYPE_CLUSTER
 638   _OP_REQP = ["skip_checks"]
 639   REQ_BGL = False
 640
 641   def ExpandNames(self):
 642     self.needed_locks = {
 643       locking.LEVEL_NODE: locking.ALL_SET,
 644       locking.LEVEL_INSTANCE: locking.ALL_SET,
 645     }
 646     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
 647
 648   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
 649                   node_result, feedback_fn, master_files,
 650                   drbd_map, vg_name):
 651     """Run multiple tests against a node.
 652
 653     Test list:
 654
 655       - compares ganeti version
 656       - checks vg existance and size > 20G
 657       - checks config file checksum
 658       - checks ssh to other nodes
 659
 660     @type nodeinfo: L{objects.Node}
 661     @param nodeinfo: the node to check
 662     @param file_list: required list of files
 663     @param local_cksum: dictionary of local files and their checksums
 664     @param node_result: the results from the node
 665     @param feedback_fn: function used to accumulate results
 666     @param master_files: list of files that only masters should have
 667     @param drbd_map: the useddrbd minors for this node, in
 668         form of minor: (instance, must_exist) which correspond to instances
 669         and their running status
 670     @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
 671
 672     """
 673     node = nodeinfo.name
 674
 675     # main result, node_result should be a non-empty dict
 676     if not node_result or not isinstance(node_result, dict):
 677       feedback_fn("  - ERROR: unable to verify node %s." % (node,))
 678       return True
 679
 680     # compares ganeti version
 681     local_version = constants.PROTOCOL_VERSION
 682     remote_version = node_result.get('version', None)
 683     if not (remote_version and isinstance(remote_version, (list, tuple)) and
 684             len(remote_version) == 2):
 685       feedback_fn("  - ERROR: connection to %s failed" % (node))
 686       return True
 687
 688     if local_version != remote_version[0]:
 689       feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
 690                   " node %s %s" % (local_version, node, remote_version[0]))
 691       return True
 692
 693     # node seems compatible, we can actually try to look into its results
 694
 695     bad = False
 696
 697     # full package version
 698     if constants.RELEASE_VERSION != remote_version[1]:
 699       feedback_fn("  - WARNING: software version mismatch: master %s,"
 700                   " node %s %s" %
 701                   (constants.RELEASE_VERSION, node, remote_version[1]))
 702
 703     # checks vg existence and size > 20G
 704     if vg_name is not None:
 705       vglist = node_result.get(constants.NV_VGLIST, None)
 706       if not vglist:
 707         feedback_fn("  - ERROR: unable to check volume groups on node %s." %
 708                         (node,))
 709         bad = True
 710       else:
 711         vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
 712                                               constants.MIN_VG_SIZE)
 713         if vgstatus:
 714           feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
 715           bad = True
 716
 717     # checks config file checksum
 718
 719     remote_cksum = node_result.get(constants.NV_FILELIST, None)
 720     if not isinstance(remote_cksum, dict):
 721       bad = True
 722       feedback_fn("  - ERROR: node hasn't returned file checksum data")
 723     else:
 724       for file_name in file_list:
 725         node_is_mc = nodeinfo.master_candidate
 726         must_have_file = file_name not in master_files
 727         if file_name not in remote_cksum:
 728           if node_is_mc or must_have_file:
 729             bad = True
 730             feedback_fn("  - ERROR: file '%s' missing" % file_name)
 731         elif remote_cksum[file_name] != local_cksum[file_name]:
 732           if node_is_mc or must_have_file:
 733             bad = True
 734             feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
 735           else:
 736             # not candidate and this is not a must-have file
 737             bad = True
 738             feedback_fn("  - ERROR: non master-candidate has old/wrong file"
 739                         " '%s'" % file_name)
 740         else:
 741           # all good, except non-master/non-must have combination
 742           if not node_is_mc and not must_have_file:
 743             feedback_fn("  - ERROR: file '%s' should not exist on non master"
 744                         " candidates" % file_name)
 745
 746     # checks ssh to any
 747
 748     if constants.NV_NODELIST not in node_result:
 749       bad = True
 750       feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
 751     else:
 752       if node_result[constants.NV_NODELIST]:
 753         bad = True
 754         for node in node_result[constants.NV_NODELIST]:
 755           feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
 756                           (node, node_result[constants.NV_NODELIST][node]))
 757
 758     if constants.NV_NODENETTEST not in node_result:
 759       bad = True
 760       feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
 761     else:
 762       if node_result[constants.NV_NODENETTEST]:
 763         bad = True
 764         nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
 765         for node in nlist:
 766           feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
 767                           (node, node_result[constants.NV_NODENETTEST][node]))
 768
 769     hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
 770     if isinstance(hyp_result, dict):
 771       for hv_name, hv_result in hyp_result.iteritems():
 772         if hv_result is not None:
 773           feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
 774                       (hv_name, hv_result))
 775
 776     # check used drbd list
 777     if vg_name is not None:
 778       used_minors = node_result.get(constants.NV_DRBDLIST, [])
 779       if not isinstance(used_minors, (tuple, list)):
 780         feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
 781                     str(used_minors))
 782       else:
 783         for minor, (iname, must_exist) in drbd_map.items():
 784           if minor not in used_minors and must_exist:
 785             feedback_fn("  - ERROR: drbd minor %d of instance %s is"
 786                         " not active" % (minor, iname))
 787             bad = True
 788         for minor in used_minors:
 789           if minor not in drbd_map:
 790             feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
 791                         minor)
 792             bad = True
 793
 794     return bad
 795
 796   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
 797                       node_instance, feedback_fn, n_offline):
 798     """Verify an instance.
 799
 800     This function checks to see if the required block devices are
 801     available on the instance's node.
 802
 803     """
 804     bad = False
 805
 806     node_current = instanceconfig.primary_node
 807
 808     node_vol_should = {}
 809     instanceconfig.MapLVsByNode(node_vol_should)
 810
 811     for node in node_vol_should:
 812       if node in n_offline:
 813         # ignore missing volumes on offline nodes
 814         continue
 815       for volume in node_vol_should[node]:
 816         if node not in node_vol_is or volume not in node_vol_is[node]:
 817           feedback_fn("  - ERROR: volume %s missing on node %s" %
 818                           (volume, node))
 819           bad = True
 820
 821     if instanceconfig.admin_up:
 822       if ((node_current not in node_instance or
 823           not instance in node_instance[node_current]) and
 824           node_current not in n_offline):
 825         feedback_fn("  - ERROR: instance %s not running on node %s" %
 826                         (instance, node_current))
 827         bad = True
 828
 829     for node in node_instance:
 830       if (not node == node_current):
 831         if instance in node_instance[node]:
 832           feedback_fn("  - ERROR: instance %s should not run on node %s" %
 833                           (instance, node))
 834           bad = True
 835
 836     return bad
 837
 838   def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
 839     """Verify if there are any unknown volumes in the cluster.
 840
 841     The .os, .swap and backup volumes are ignored. All other volumes are
 842     reported as unknown.
 843
 844     """
 845     bad = False
 846
 847     for node in node_vol_is:
 848       for volume in node_vol_is[node]:
 849         if node not in node_vol_should or volume not in node_vol_should[node]:
 850           feedback_fn("  - ERROR: volume %s on node %s should not exist" %
 851                       (volume, node))
 852           bad = True
 853     return bad
 854
 855   def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
 856     """Verify the list of running instances.
 857
 858     This checks what instances are running but unknown to the cluster.
 859
 860     """
 861     bad = False
 862     for node in node_instance:
 863       for runninginstance in node_instance[node]:
 864         if runninginstance not in instancelist:
 865           feedback_fn("  - ERROR: instance %s on node %s should not exist" %
 866                           (runninginstance, node))
 867           bad = True
 868     return bad
 869
 870   def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
 871     """Verify N+1 Memory Resilience.
 872
 873     Check that if one single node dies we can still start all the instances it
 874     was primary for.
 875
 876     """
 877     bad = False
 878
 879     for node, nodeinfo in node_info.iteritems():
 880       # This code checks that every node which is now listed as secondary has
 881       # enough memory to host all instances it is supposed to should a single
 882       # other node in the cluster fail.
 883       # FIXME: not ready for failover to an arbitrary node
 884       # FIXME: does not support file-backed instances
 885       # WARNING: we currently take into account down instances as well as up
 886       # ones, considering that even if they're down someone might want to start
 887       # them even in the event of a node failure.
 888       for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
 889         needed_mem = 0
 890         for instance in instances:
 891           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
 892           if bep[constants.BE_AUTO_BALANCE]:
 893             needed_mem += bep[constants.BE_MEMORY]
 894         if nodeinfo['mfree'] < needed_mem:
 895           feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
 896                       " failovers should node %s fail" % (node, prinode))
 897           bad = True
 898     return bad
 899
 900   def CheckPrereq(self):
 901     """Check prerequisites.
 902
 903     Transform the list of checks we're going to skip into a set and check that
 904     all its members are valid.
 905
 906     """
 907     self.skip_set = frozenset(self.op.skip_checks)
 908     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
 909       raise errors.OpPrereqError("Invalid checks to be skipped specified")
 910
 911   def BuildHooksEnv(self):
 912     """Build hooks env.
 913
 914     Cluster-Verify hooks just rone in the post phase and their failure makes
 915     the output be logged in the verify output and the verification to fail.
 916
 917     """
 918     all_nodes = self.cfg.GetNodeList()
 919     env = {
 920       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
 921       }
 922     for node in self.cfg.GetAllNodesInfo().values():
 923       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
 924
 925     return env, [], all_nodes
 926
 927   def Exec(self, feedback_fn):
 928     """Verify integrity of cluster, performing various test on nodes.
 929
 930     """
 931     bad = False
 932     feedback_fn("* Verifying global settings")
 933     for msg in self.cfg.VerifyConfig():
 934       feedback_fn("  - ERROR: %s" % msg)
 935
 936     vg_name = self.cfg.GetVGName()
 937     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
 938     nodelist = utils.NiceSort(self.cfg.GetNodeList())
 939     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
 940     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
 941     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
 942                         for iname in instancelist)
 943     i_non_redundant = [] # Non redundant instances
 944     i_non_a_balanced = [] # Non auto-balanced instances
 945     n_offline = [] # List of offline nodes
 946     n_drained = [] # List of nodes being drained
 947     node_volume = {}
 948     node_instance = {}
 949     node_info = {}
 950     instance_cfg = {}
 951
 952     # FIXME: verify OS list
 953     # do local checksums
 954     master_files = [constants.CLUSTER_CONF_FILE]
 955
 956     file_names = ssconf.SimpleStore().GetFileList()
 957     file_names.append(constants.SSL_CERT_FILE)
 958     file_names.append(constants.RAPI_CERT_FILE)
 959     file_names.extend(master_files)
 960
 961     local_checksums = utils.FingerprintFiles(file_names)
 962
 963     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
 964     node_verify_param = {
 965       constants.NV_FILELIST: file_names,
 966       constants.NV_NODELIST: [node.name for node in nodeinfo
 967                               if not node.offline],
 968       constants.NV_HYPERVISOR: hypervisors,
 969       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
 970                                   node.secondary_ip) for node in nodeinfo
 971                                  if not node.offline],
 972       constants.NV_INSTANCELIST: hypervisors,
 973       constants.NV_VERSION: None,
 974       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
 975       }
 976     if vg_name is not None:
 977       node_verify_param[constants.NV_VGLIST] = None
 978       node_verify_param[constants.NV_LVLIST] = vg_name
 979       node_verify_param[constants.NV_DRBDLIST] = None
 980     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
 981                                            self.cfg.GetClusterName())
 982
 983     cluster = self.cfg.GetClusterInfo()
 984     master_node = self.cfg.GetMasterNode()
 985     all_drbd_map = self.cfg.ComputeDRBDMap()
 986
 987     for node_i in nodeinfo:
 988       node = node_i.name
 989       nresult = all_nvinfo[node].data
 990
 991       if node_i.offline:
 992         feedback_fn("* Skipping offline node %s" % (node,))
 993         n_offline.append(node)
 994         continue
 995
 996       if node == master_node:
 997         ntype = "master"
 998       elif node_i.master_candidate:
 999         ntype = "master candidate"
1000       elif node_i.drained:
1001         ntype = "drained"
1002         n_drained.append(node)
1003       else:
1004         ntype = "regular"
1005       feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1006
1007       if all_nvinfo[node].failed or not isinstance(nresult, dict):
1008         feedback_fn("  - ERROR: connection to %s failed" % (node,))
1009         bad = True
1010         continue
1011
1012       node_drbd = {}
1013       for minor, instance in all_drbd_map[node].items():
1014         if instance not in instanceinfo:
1015           feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1016                       instance)
1017           # ghost instance should not be running, but otherwise we
1018           # don't give double warnings (both ghost instance and
1019           # unallocated minor in use)
1020           node_drbd[minor] = (instance, False)
1021         else:
1022           instance = instanceinfo[instance]
1023           node_drbd[minor] = (instance.name, instance.admin_up)
1024       result = self._VerifyNode(node_i, file_names, local_checksums,
1025                                 nresult, feedback_fn, master_files,
1026                                 node_drbd, vg_name)
1027       bad = bad or result
1028
1029       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1030       if vg_name is None:
1031         node_volume[node] = {}
1032       elif isinstance(lvdata, basestring):
1033         feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1034                     (node, utils.SafeEncode(lvdata)))
1035         bad = True
1036         node_volume[node] = {}
1037       elif not isinstance(lvdata, dict):
1038         feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1039         bad = True
1040         continue
1041       else:
1042         node_volume[node] = lvdata
1043
1044       # node_instance
1045       idata = nresult.get(constants.NV_INSTANCELIST, None)
1046       if not isinstance(idata, list):
1047         feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1048                     (node,))
1049         bad = True
1050         continue
1051
1052       node_instance[node] = idata
1053
1054       # node_info
1055       nodeinfo = nresult.get(constants.NV_HVINFO, None)
1056       if not isinstance(nodeinfo, dict):
1057         feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1058         bad = True
1059         continue
1060
1061       try:
1062         node_info[node] = {
1063           "mfree": int(nodeinfo['memory_free']),
1064           "pinst": [],
1065           "sinst": [],
1066           # dictionary holding all instances this node is secondary for,
1067           # grouped by their primary node. Each key is a cluster node, and each
1068           # value is a list of instances which have the key as primary and the
1069           # current node as secondary.  this is handy to calculate N+1 memory
1070           # availability if you can only failover from a primary to its
1071           # secondary.
1072           "sinst-by-pnode": {},
1073         }
1074         # FIXME: devise a free space model for file based instances as well
1075         if vg_name is not None:
1076           if (constants.NV_VGLIST not in nresult or
1077               vg_name not in nresult[constants.NV_VGLIST]):
1078             feedback_fn("  - ERROR: node %s didn't return data for the"
1079                         " volume group '%s' - it is either missing or broken" %
1080                         (node, vg_name))
1081             bad = True
1082             continue
1083           node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1084       except (ValueError, KeyError):
1085         feedback_fn("  - ERROR: invalid nodeinfo value returned"
1086                     " from node %s" % (node,))
1087         bad = True
1088         continue
1089
1090     node_vol_should = {}
1091
1092     for instance in instancelist:
1093       feedback_fn("* Verifying instance %s" % instance)
1094       inst_config = instanceinfo[instance]
1095       result =  self._VerifyInstance(instance, inst_config, node_volume,
1096                                      node_instance, feedback_fn, n_offline)
1097       bad = bad or result
1098       inst_nodes_offline = []
1099
1100       inst_config.MapLVsByNode(node_vol_should)
1101
1102       instance_cfg[instance] = inst_config
1103
1104       pnode = inst_config.primary_node
1105       if pnode in node_info:
1106         node_info[pnode]['pinst'].append(instance)
1107       elif pnode not in n_offline:
1108         feedback_fn("  - ERROR: instance %s, connection to primary node"
1109                     " %s failed" % (instance, pnode))
1110         bad = True
1111
1112       if pnode in n_offline:
1113         inst_nodes_offline.append(pnode)
1114
1115       # If the instance is non-redundant we cannot survive losing its primary
1116       # node, so we are not N+1 compliant. On the other hand we have no disk
1117       # templates with more than one secondary so that situation is not well
1118       # supported either.
1119       # FIXME: does not support file-backed instances
1120       if len(inst_config.secondary_nodes) == 0:
1121         i_non_redundant.append(instance)
1122       elif len(inst_config.secondary_nodes) > 1:
1123         feedback_fn("  - WARNING: multiple secondaries for instance %s"
1124                     % instance)
1125
1126       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1127         i_non_a_balanced.append(instance)
1128
1129       for snode in inst_config.secondary_nodes:
1130         if snode in node_info:
1131           node_info[snode]['sinst'].append(instance)
1132           if pnode not in node_info[snode]['sinst-by-pnode']:
1133             node_info[snode]['sinst-by-pnode'][pnode] = []
1134           node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1135         elif snode not in n_offline:
1136           feedback_fn("  - ERROR: instance %s, connection to secondary node"
1137                       " %s failed" % (instance, snode))
1138           bad = True
1139         if snode in n_offline:
1140           inst_nodes_offline.append(snode)
1141
1142       if inst_nodes_offline:
1143         # warn that the instance lives on offline nodes, and set bad=True
1144         feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1145                     ", ".join(inst_nodes_offline))
1146         bad = True
1147
1148     feedback_fn("* Verifying orphan volumes")
1149     result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1150                                        feedback_fn)
1151     bad = bad or result
1152
1153     feedback_fn("* Verifying remaining instances")
1154     result = self._VerifyOrphanInstances(instancelist, node_instance,
1155                                          feedback_fn)
1156     bad = bad or result
1157
1158     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1159       feedback_fn("* Verifying N+1 Memory redundancy")
1160       result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1161       bad = bad or result
1162
1163     feedback_fn("* Other Notes")
1164     if i_non_redundant:
1165       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1166                   % len(i_non_redundant))
1167
1168     if i_non_a_balanced:
1169       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1170                   % len(i_non_a_balanced))
1171
1172     if n_offline:
1173       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1174
1175     if n_drained:
1176       feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1177
1178     return not bad
1179
1180   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1181     """Analize the post-hooks' result
1182
1183     This method analyses the hook result, handles it, and sends some
1184     nicely-formatted feedback back to the user.
1185
1186     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1187         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1188     @param hooks_results: the results of the multi-node hooks rpc call
1189     @param feedback_fn: function used send feedback back to the caller
1190     @param lu_result: previous Exec result
1191     @return: the new Exec result, based on the previous result
1192         and hook results
1193
1194     """
1195     # We only really run POST phase hooks, and are only interested in
1196     # their results
1197     if phase == constants.HOOKS_PHASE_POST:
1198       # Used to change hooks' output to proper indentation
1199       indent_re = re.compile('^', re.M)
1200       feedback_fn("* Hooks Results")
1201       if not hooks_results:
1202         feedback_fn("  - ERROR: general communication failure")
1203         lu_result = 1
1204       else:
1205         for node_name in hooks_results:
1206           show_node_header = True
1207           res = hooks_results[node_name]
1208           if res.failed or res.data is False or not isinstance(res.data, list):
1209             if res.offline:
1210               # no need to warn or set fail return value
1211               continue
1212             feedback_fn("    Communication failure in hooks execution")
1213             lu_result = 1
1214             continue
1215           for script, hkr, output in res.data:
1216             if hkr == constants.HKR_FAIL:
1217               # The node header is only shown once, if there are
1218               # failing hooks on that node
1219               if show_node_header:
1220                 feedback_fn("  Node %s:" % node_name)
1221                 show_node_header = False
1222               feedback_fn("    ERROR: Script %s failed, output:" % script)
1223               output = indent_re.sub('      ', output)
1224               feedback_fn("%s" % output)
1225               lu_result = 1
1226
1227       return lu_result
1228
1229
1230 class LUVerifyDisks(NoHooksLU):
1231   """Verifies the cluster disks status.
1232
1233   """
1234   _OP_REQP = []
1235   REQ_BGL = False
1236
1237   def ExpandNames(self):
1238     self.needed_locks = {
1239       locking.LEVEL_NODE: locking.ALL_SET,
1240       locking.LEVEL_INSTANCE: locking.ALL_SET,
1241     }
1242     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1243
1244   def CheckPrereq(self):
1245     """Check prerequisites.
1246
1247     This has no prerequisites.
1248
1249     """
1250     pass
1251
1252   def Exec(self, feedback_fn):
1253     """Verify integrity of cluster disks.
1254
1255     """
1256     result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
1257
1258     vg_name = self.cfg.GetVGName()
1259     nodes = utils.NiceSort(self.cfg.GetNodeList())
1260     instances = [self.cfg.GetInstanceInfo(name)
1261                  for name in self.cfg.GetInstanceList()]
1262
1263     nv_dict = {}
1264     for inst in instances:
1265       inst_lvs = {}
1266       if (not inst.admin_up or
1267           inst.disk_template not in constants.DTS_NET_MIRROR):
1268         continue
1269       inst.MapLVsByNode(inst_lvs)
1270       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1271       for node, vol_list in inst_lvs.iteritems():
1272         for vol in vol_list:
1273           nv_dict[(node, vol)] = inst
1274
1275     if not nv_dict:
1276       return result
1277
1278     node_lvs = self.rpc.call_volume_list(nodes, vg_name)
1279
1280     to_act = set()
1281     for node in nodes:
1282       # node_volume
1283       lvs = node_lvs[node]
1284       if lvs.failed:
1285         if not lvs.offline:
1286           self.LogWarning("Connection to node %s failed: %s" %
1287                           (node, lvs.data))
1288         continue
1289       lvs = lvs.data
1290       if isinstance(lvs, basestring):
1291         logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
1292         res_nlvm[node] = lvs
1293         continue
1294       elif not isinstance(lvs, dict):
1295         logging.warning("Connection to node %s failed or invalid data"
1296                         " returned", node)
1297         res_nodes.append(node)
1298         continue
1299
1300       for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems():
1301         inst = nv_dict.pop((node, lv_name), None)
1302         if (not lv_online and inst is not None
1303             and inst.name not in res_instances):
1304           res_instances.append(inst.name)
1305
1306     # any leftover items in nv_dict are missing LVs, let's arrange the
1307     # data better
1308     for key, inst in nv_dict.iteritems():
1309       if inst.name not in res_missing:
1310         res_missing[inst.name] = []
1311       res_missing[inst.name].append(key)
1312
1313     return result
1314
1315
1316 class LURenameCluster(LogicalUnit):
1317   """Rename the cluster.
1318
1319   """
1320   HPATH = "cluster-rename"
1321   HTYPE = constants.HTYPE_CLUSTER
1322   _OP_REQP = ["name"]
1323
1324   def BuildHooksEnv(self):
1325     """Build hooks env.
1326
1327     """
1328     env = {
1329       "OP_TARGET": self.cfg.GetClusterName(),
1330       "NEW_NAME": self.op.name,
1331       }
1332     mn = self.cfg.GetMasterNode()
1333     return env, [mn], [mn]
1334
1335   def CheckPrereq(self):
1336     """Verify that the passed name is a valid one.
1337
1338     """
1339     hostname = utils.HostInfo(self.op.name)
1340
1341     new_name = hostname.name
1342     self.ip = new_ip = hostname.ip
1343     old_name = self.cfg.GetClusterName()
1344     old_ip = self.cfg.GetMasterIP()
1345     if new_name == old_name and new_ip == old_ip:
1346       raise errors.OpPrereqError("Neither the name nor the IP address of the"
1347                                  " cluster has changed")
1348     if new_ip != old_ip:
1349       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1350         raise errors.OpPrereqError("The given cluster IP address (%s) is"
1351                                    " reachable on the network. Aborting." %
1352                                    new_ip)
1353
1354     self.op.name = new_name
1355
1356   def Exec(self, feedback_fn):
1357     """Rename the cluster.
1358
1359     """
1360     clustername = self.op.name
1361     ip = self.ip
1362
1363     # shutdown the master IP
1364     master = self.cfg.GetMasterNode()
1365     result = self.rpc.call_node_stop_master(master, False)
1366     if result.failed or not result.data:
1367       raise errors.OpExecError("Could not disable the master role")
1368
1369     try:
1370       cluster = self.cfg.GetClusterInfo()
1371       cluster.cluster_name = clustername
1372       cluster.master_ip = ip
1373       self.cfg.Update(cluster)
1374
1375       # update the known hosts file
1376       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1377       node_list = self.cfg.GetNodeList()
1378       try:
1379         node_list.remove(master)
1380       except ValueError:
1381         pass
1382       result = self.rpc.call_upload_file(node_list,
1383                                          constants.SSH_KNOWN_HOSTS_FILE)
1384       for to_node, to_result in result.iteritems():
1385         if to_result.failed or not to_result.data:
1386           logging.error("Copy of file %s to node %s failed",
1387                         constants.SSH_KNOWN_HOSTS_FILE, to_node)
1388
1389     finally:
1390       result = self.rpc.call_node_start_master(master, False)
1391       if result.failed or not result.data:
1392         self.LogWarning("Could not re-enable the master role on"
1393                         " the master, please restart manually.")
1394
1395
1396 def _RecursiveCheckIfLVMBased(disk):
1397   """Check if the given disk or its children are lvm-based.
1398
1399   @type disk: L{objects.Disk}
1400   @param disk: the disk to check
1401   @rtype: booleean
1402   @return: boolean indicating whether a LD_LV dev_type was found or not
1403
1404   """
1405   if disk.children:
1406     for chdisk in disk.children:
1407       if _RecursiveCheckIfLVMBased(chdisk):
1408         return True
1409   return disk.dev_type == constants.LD_LV
1410
1411
1412 class LUSetClusterParams(LogicalUnit):
1413   """Change the parameters of the cluster.
1414
1415   """
1416   HPATH = "cluster-modify"
1417   HTYPE = constants.HTYPE_CLUSTER
1418   _OP_REQP = []
1419   REQ_BGL = False
1420
1421   def CheckParameters(self):
1422     """Check parameters
1423
1424     """
1425     if not hasattr(self.op, "candidate_pool_size"):
1426       self.op.candidate_pool_size = None
1427     if self.op.candidate_pool_size is not None:
1428       try:
1429         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1430       except ValueError, err:
1431         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1432                                    str(err))
1433       if self.op.candidate_pool_size < 1:
1434         raise errors.OpPrereqError("At least one master candidate needed")
1435
1436   def ExpandNames(self):
1437     # FIXME: in the future maybe other cluster params won't require checking on
1438     # all nodes to be modified.
1439     self.needed_locks = {
1440       locking.LEVEL_NODE: locking.ALL_SET,
1441     }
1442     self.share_locks[locking.LEVEL_NODE] = 1
1443
1444   def BuildHooksEnv(self):
1445     """Build hooks env.
1446
1447     """
1448     env = {
1449       "OP_TARGET": self.cfg.GetClusterName(),
1450       "NEW_VG_NAME": self.op.vg_name,
1451       }
1452     mn = self.cfg.GetMasterNode()
1453     return env, [mn], [mn]
1454
1455   def CheckPrereq(self):
1456     """Check prerequisites.
1457
1458     This checks whether the given params don't conflict and
1459     if the given volume group is valid.
1460
1461     """
1462     if self.op.vg_name is not None and not self.op.vg_name:
1463       instances = self.cfg.GetAllInstancesInfo().values()
1464       for inst in instances:
1465         for disk in inst.disks:
1466           if _RecursiveCheckIfLVMBased(disk):
1467             raise errors.OpPrereqError("Cannot disable lvm storage while"
1468                                        " lvm-based instances exist")
1469
1470     node_list = self.acquired_locks[locking.LEVEL_NODE]
1471
1472     # if vg_name not None, checks given volume group on all nodes
1473     if self.op.vg_name:
1474       vglist = self.rpc.call_vg_list(node_list)
1475       for node in node_list:
1476         if vglist[node].failed:
1477           # ignoring down node
1478           self.LogWarning("Node %s unreachable/error, ignoring" % node)
1479           continue
1480         vgstatus = utils.CheckVolumeGroupSize(vglist[node].data,
1481                                               self.op.vg_name,
1482                                               constants.MIN_VG_SIZE)
1483         if vgstatus:
1484           raise errors.OpPrereqError("Error on node '%s': %s" %
1485                                      (node, vgstatus))
1486
1487     self.cluster = cluster = self.cfg.GetClusterInfo()
1488     # validate beparams changes
1489     if self.op.beparams:
1490       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1491       self.new_beparams = cluster.FillDict(
1492         cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
1493
1494     # hypervisor list/parameters
1495     self.new_hvparams = cluster.FillDict(cluster.hvparams, {})
1496     if self.op.hvparams:
1497       if not isinstance(self.op.hvparams, dict):
1498         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1499       for hv_name, hv_dict in self.op.hvparams.items():
1500         if hv_name not in self.new_hvparams:
1501           self.new_hvparams[hv_name] = hv_dict
1502         else:
1503           self.new_hvparams[hv_name].update(hv_dict)
1504
1505     if self.op.enabled_hypervisors is not None:
1506       self.hv_list = self.op.enabled_hypervisors
1507     else:
1508       self.hv_list = cluster.enabled_hypervisors
1509
1510     if self.op.hvparams or self.op.enabled_hypervisors is not None:
1511       # either the enabled list has changed, or the parameters have, validate
1512       for hv_name, hv_params in self.new_hvparams.items():
1513         if ((self.op.hvparams and hv_name in self.op.hvparams) or
1514             (self.op.enabled_hypervisors and
1515              hv_name in self.op.enabled_hypervisors)):
1516           # either this is a new hypervisor, or its parameters have changed
1517           hv_class = hypervisor.GetHypervisor(hv_name)
1518           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1519           hv_class.CheckParameterSyntax(hv_params)
1520           _CheckHVParams(self, node_list, hv_name, hv_params)
1521
1522   def Exec(self, feedback_fn):
1523     """Change the parameters of the cluster.
1524
1525     """
1526     if self.op.vg_name is not None:
1527       if self.op.vg_name != self.cfg.GetVGName():
1528         self.cfg.SetVGName(self.op.vg_name)
1529       else:
1530         feedback_fn("Cluster LVM configuration already in desired"
1531                     " state, not changing")
1532     if self.op.hvparams:
1533       self.cluster.hvparams = self.new_hvparams
1534     if self.op.enabled_hypervisors is not None:
1535       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1536     if self.op.beparams:
1537       self.cluster.beparams[constants.BEGR_DEFAULT] = self.new_beparams
1538     if self.op.candidate_pool_size is not None:
1539       self.cluster.candidate_pool_size = self.op.candidate_pool_size
1540
1541     self.cfg.Update(self.cluster)
1542
1543     # we want to update nodes after the cluster so that if any errors
1544     # happen, we have recorded and saved the cluster info
1545     if self.op.candidate_pool_size is not None:
1546       _AdjustCandidatePool(self)
1547
1548
1549 class LURedistributeConfig(NoHooksLU):
1550   """Force the redistribution of cluster configuration.
1551
1552   This is a very simple LU.
1553
1554   """
1555   _OP_REQP = []
1556   REQ_BGL = False
1557
1558   def ExpandNames(self):
1559     self.needed_locks = {
1560       locking.LEVEL_NODE: locking.ALL_SET,
1561     }
1562     self.share_locks[locking.LEVEL_NODE] = 1
1563
1564   def CheckPrereq(self):
1565     """Check prerequisites.
1566
1567     """
1568
1569   def Exec(self, feedback_fn):
1570     """Redistribute the configuration.
1571
1572     """
1573     self.cfg.Update(self.cfg.GetClusterInfo())
1574
1575
1576 def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1577   """Sleep and poll for an instance's disk to sync.
1578
1579   """
1580   if not instance.disks:
1581     return True
1582
1583   if not oneshot:
1584     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1585
1586   node = instance.primary_node
1587
1588   for dev in instance.disks:
1589     lu.cfg.SetDiskID(dev, node)
1590
1591   retries = 0
1592   while True:
1593     max_time = 0
1594     done = True
1595     cumul_degraded = False
1596     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1597     if rstats.failed or not rstats.data:
1598       lu.LogWarning("Can't get any data from node %s", node)
1599       retries += 1
1600       if retries >= 10:
1601         raise errors.RemoteError("Can't contact node %s for mirror data,"
1602                                  " aborting." % node)
1603       time.sleep(6)
1604       continue
1605     rstats = rstats.data
1606     retries = 0
1607     for i, mstat in enumerate(rstats):
1608       if mstat is None:
1609         lu.LogWarning("Can't compute data for node %s/%s",
1610                            node, instance.disks[i].iv_name)
1611         continue
1612       # we ignore the ldisk parameter
1613       perc_done, est_time, is_degraded, _ = mstat
1614       cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1615       if perc_done is not None:
1616         done = False
1617         if est_time is not None:
1618           rem_time = "%d estimated seconds remaining" % est_time
1619           max_time = est_time
1620         else:
1621           rem_time = "no time estimate"
1622         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1623                         (instance.disks[i].iv_name, perc_done, rem_time))
1624     if done or oneshot:
1625       break
1626
1627     time.sleep(min(60, max_time))
1628
1629   if done:
1630     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1631   return not cumul_degraded
1632
1633
1634 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1635   """Check that mirrors are not degraded.
1636
1637   The ldisk parameter, if True, will change the test from the
1638   is_degraded attribute (which represents overall non-ok status for
1639   the device(s)) to the ldisk (representing the local storage status).
1640
1641   """
1642   lu.cfg.SetDiskID(dev, node)
1643   if ldisk:
1644     idx = 6
1645   else:
1646     idx = 5
1647
1648   result = True
1649   if on_primary or dev.AssembleOnSecondary():
1650     rstats = lu.rpc.call_blockdev_find(node, dev)
1651     msg = rstats.RemoteFailMsg()
1652     if msg:
1653       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1654       result = False
1655     elif not rstats.payload:
1656       lu.LogWarning("Can't find disk on node %s", node)
1657       result = False
1658     else:
1659       result = result and (not rstats.payload[idx])
1660   if dev.children:
1661     for child in dev.children:
1662       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1663
1664   return result
1665
1666
1667 class LUDiagnoseOS(NoHooksLU):
1668   """Logical unit for OS diagnose/query.
1669
1670   """
1671   _OP_REQP = ["output_fields", "names"]
1672   REQ_BGL = False
1673   _FIELDS_STATIC = utils.FieldSet()
1674   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1675
1676   def ExpandNames(self):
1677     if self.op.names:
1678       raise errors.OpPrereqError("Selective OS query not supported")
1679
1680     _CheckOutputFields(static=self._FIELDS_STATIC,
1681                        dynamic=self._FIELDS_DYNAMIC,
1682                        selected=self.op.output_fields)
1683
1684     # Lock all nodes, in shared mode
1685     self.needed_locks = {}
1686     self.share_locks[locking.LEVEL_NODE] = 1
1687     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1688
1689   def CheckPrereq(self):
1690     """Check prerequisites.
1691
1692     """
1693
1694   @staticmethod
1695   def _DiagnoseByOS(node_list, rlist):
1696     """Remaps a per-node return list into an a per-os per-node dictionary
1697
1698     @param node_list: a list with the names of all nodes
1699     @param rlist: a map with node names as keys and OS objects as values
1700
1701     @rtype: dict
1702     @return: a dictionary with osnames as keys and as value another map, with
1703         nodes as keys and list of OS objects as values, eg::
1704
1705           {"debian-etch": {"node1": [<object>,...],
1706                            "node2": [<object>,]}
1707           }
1708
1709     """
1710     all_os = {}
1711     for node_name, nr in rlist.iteritems():
1712       if nr.failed or not nr.data:
1713         continue
1714       for os_obj in nr.data:
1715         if os_obj.name not in all_os:
1716           # build a list of nodes for this os containing empty lists
1717           # for each node in node_list
1718           all_os[os_obj.name] = {}
1719           for nname in node_list:
1720             all_os[os_obj.name][nname] = []
1721         all_os[os_obj.name][node_name].append(os_obj)
1722     return all_os
1723
1724   def Exec(self, feedback_fn):
1725     """Compute the list of OSes.
1726
1727     """
1728     node_list = self.acquired_locks[locking.LEVEL_NODE]
1729     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()
1730                    if node in node_list]
1731     node_data = self.rpc.call_os_diagnose(valid_nodes)
1732     if node_data == False:
1733       raise errors.OpExecError("Can't gather the list of OSes")
1734     pol = self._DiagnoseByOS(valid_nodes, node_data)
1735     output = []
1736     for os_name, os_data in pol.iteritems():
1737       row = []
1738       for field in self.op.output_fields:
1739         if field == "name":
1740           val = os_name
1741         elif field == "valid":
1742           val = utils.all([osl and osl[0] for osl in os_data.values()])
1743         elif field == "node_status":
1744           val = {}
1745           for node_name, nos_list in os_data.iteritems():
1746             val[node_name] = [(v.status, v.path) for v in nos_list]
1747         else:
1748           raise errors.ParameterError(field)
1749         row.append(val)
1750       output.append(row)
1751
1752     return output
1753
1754
1755 class LURemoveNode(LogicalUnit):
1756   """Logical unit for removing a node.
1757
1758   """
1759   HPATH = "node-remove"
1760   HTYPE = constants.HTYPE_NODE
1761   _OP_REQP = ["node_name"]
1762
1763   def BuildHooksEnv(self):
1764     """Build hooks env.
1765
1766     This doesn't run on the target node in the pre phase as a failed
1767     node would then be impossible to remove.
1768
1769     """
1770     env = {
1771       "OP_TARGET": self.op.node_name,
1772       "NODE_NAME": self.op.node_name,
1773       }
1774     all_nodes = self.cfg.GetNodeList()
1775     all_nodes.remove(self.op.node_name)
1776     return env, all_nodes, all_nodes
1777
1778   def CheckPrereq(self):
1779     """Check prerequisites.
1780
1781     This checks:
1782      - the node exists in the configuration
1783      - it does not have primary or secondary instances
1784      - it's not the master
1785
1786     Any errors are signalled by raising errors.OpPrereqError.
1787
1788     """
1789     node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1790     if node is None:
1791       raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1792
1793     instance_list = self.cfg.GetInstanceList()
1794
1795     masternode = self.cfg.GetMasterNode()
1796     if node.name == masternode:
1797       raise errors.OpPrereqError("Node is the master node,"
1798                                  " you need to failover first.")
1799
1800     for instance_name in instance_list:
1801       instance = self.cfg.GetInstanceInfo(instance_name)
1802       if node.name in instance.all_nodes:
1803         raise errors.OpPrereqError("Instance %s is still running on the node,"
1804                                    " please remove first." % instance_name)
1805     self.op.node_name = node.name
1806     self.node = node
1807
1808   def Exec(self, feedback_fn):
1809     """Removes the node from the cluster.
1810
1811     """
1812     node = self.node
1813     logging.info("Stopping the node daemon and removing configs from node %s",
1814                  node.name)
1815
1816     self.context.RemoveNode(node.name)
1817
1818     self.rpc.call_node_leave_cluster(node.name)
1819
1820     # Promote nodes to master candidate as needed
1821     _AdjustCandidatePool(self)
1822
1823
1824 class LUQueryNodes(NoHooksLU):
1825   """Logical unit for querying nodes.
1826
1827   """
1828   _OP_REQP = ["output_fields", "names", "use_locking"]
1829   REQ_BGL = False
1830   _FIELDS_DYNAMIC = utils.FieldSet(
1831     "dtotal", "dfree",
1832     "mtotal", "mnode", "mfree",
1833     "bootid",
1834     "ctotal", "cnodes", "csockets",
1835     )
1836
1837   _FIELDS_STATIC = utils.FieldSet(
1838     "name", "pinst_cnt", "sinst_cnt",
1839     "pinst_list", "sinst_list",
1840     "pip", "sip", "tags",
1841     "serial_no",
1842     "master_candidate",
1843     "master",
1844     "offline",
1845     "drained",
1846     )
1847
1848   def ExpandNames(self):
1849     _CheckOutputFields(static=self._FIELDS_STATIC,
1850                        dynamic=self._FIELDS_DYNAMIC,
1851                        selected=self.op.output_fields)
1852
1853     self.needed_locks = {}
1854     self.share_locks[locking.LEVEL_NODE] = 1
1855
1856     if self.op.names:
1857       self.wanted = _GetWantedNodes(self, self.op.names)
1858     else:
1859       self.wanted = locking.ALL_SET
1860
1861     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
1862     self.do_locking = self.do_node_query and self.op.use_locking
1863     if self.do_locking:
1864       # if we don't request only static fields, we need to lock the nodes
1865       self.needed_locks[locking.LEVEL_NODE] = self.wanted
1866
1867
1868   def CheckPrereq(self):
1869     """Check prerequisites.
1870
1871     """
1872     # The validation of the node list is done in the _GetWantedNodes,
1873     # if non empty, and if empty, there's no validation to do
1874     pass
1875
1876   def Exec(self, feedback_fn):
1877     """Computes the list of nodes and their attributes.
1878
1879     """
1880     all_info = self.cfg.GetAllNodesInfo()
1881     if self.do_locking:
1882       nodenames = self.acquired_locks[locking.LEVEL_NODE]
1883     elif self.wanted != locking.ALL_SET:
1884       nodenames = self.wanted
1885       missing = set(nodenames).difference(all_info.keys())
1886       if missing:
1887         raise errors.OpExecError(
1888           "Some nodes were removed before retrieving their data: %s" % missing)
1889     else:
1890       nodenames = all_info.keys()
1891
1892     nodenames = utils.NiceSort(nodenames)
1893     nodelist = [all_info[name] for name in nodenames]
1894
1895     # begin data gathering
1896
1897     if self.do_node_query:
1898       live_data = {}
1899       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
1900                                           self.cfg.GetHypervisorType())
1901       for name in nodenames:
1902         nodeinfo = node_data[name]
1903         if not nodeinfo.failed and nodeinfo.data:
1904           nodeinfo = nodeinfo.data
1905           fn = utils.TryConvert
1906           live_data[name] = {
1907             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
1908             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
1909             "mfree": fn(int, nodeinfo.get('memory_free', None)),
1910             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
1911             "dfree": fn(int, nodeinfo.get('vg_free', None)),
1912             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
1913             "bootid": nodeinfo.get('bootid', None),
1914             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
1915             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
1916             }
1917         else:
1918           live_data[name] = {}
1919     else:
1920       live_data = dict.fromkeys(nodenames, {})
1921
1922     node_to_primary = dict([(name, set()) for name in nodenames])
1923     node_to_secondary = dict([(name, set()) for name in nodenames])
1924
1925     inst_fields = frozenset(("pinst_cnt", "pinst_list",
1926                              "sinst_cnt", "sinst_list"))
1927     if inst_fields & frozenset(self.op.output_fields):
1928       instancelist = self.cfg.GetInstanceList()
1929
1930       for instance_name in instancelist:
1931         inst = self.cfg.GetInstanceInfo(instance_name)
1932         if inst.primary_node in node_to_primary:
1933           node_to_primary[inst.primary_node].add(inst.name)
1934         for secnode in inst.secondary_nodes:
1935           if secnode in node_to_secondary:
1936             node_to_secondary[secnode].add(inst.name)
1937
1938     master_node = self.cfg.GetMasterNode()
1939
1940     # end data gathering
1941
1942     output = []
1943     for node in nodelist:
1944       node_output = []
1945       for field in self.op.output_fields:
1946         if field == "name":
1947           val = node.name
1948         elif field == "pinst_list":
1949           val = list(node_to_primary[node.name])
1950         elif field == "sinst_list":
1951           val = list(node_to_secondary[node.name])
1952         elif field == "pinst_cnt":
1953           val = len(node_to_primary[node.name])
1954         elif field == "sinst_cnt":
1955           val = len(node_to_secondary[node.name])
1956         elif field == "pip":
1957           val = node.primary_ip
1958         elif field == "sip":
1959           val = node.secondary_ip
1960         elif field == "tags":
1961           val = list(node.GetTags())
1962         elif field == "serial_no":
1963           val = node.serial_no
1964         elif field == "master_candidate":
1965           val = node.master_candidate
1966         elif field == "master":
1967           val = node.name == master_node
1968         elif field == "offline":
1969           val = node.offline
1970         elif field == "drained":
1971           val = node.drained
1972         elif self._FIELDS_DYNAMIC.Matches(field):
1973           val = live_data[node.name].get(field, None)
1974         else:
1975           raise errors.ParameterError(field)
1976         node_output.append(val)
1977       output.append(node_output)
1978
1979     return output
1980
1981
1982 class LUQueryNodeVolumes(NoHooksLU):
1983   """Logical unit for getting volumes on node(s).
1984
1985   """
1986   _OP_REQP = ["nodes", "output_fields"]
1987   REQ_BGL = False
1988   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
1989   _FIELDS_STATIC = utils.FieldSet("node")
1990
1991   def ExpandNames(self):
1992     _CheckOutputFields(static=self._FIELDS_STATIC,
1993                        dynamic=self._FIELDS_DYNAMIC,
1994                        selected=self.op.output_fields)
1995
1996     self.needed_locks = {}
1997     self.share_locks[locking.LEVEL_NODE] = 1
1998     if not self.op.nodes:
1999       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2000     else:
2001       self.needed_locks[locking.LEVEL_NODE] = \
2002         _GetWantedNodes(self, self.op.nodes)
2003
2004   def CheckPrereq(self):
2005     """Check prerequisites.
2006
2007     This checks that the fields required are valid output fields.
2008
2009     """
2010     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2011
2012   def Exec(self, feedback_fn):
2013     """Computes the list of nodes and their attributes.
2014
2015     """
2016     nodenames = self.nodes
2017     volumes = self.rpc.call_node_volumes(nodenames)
2018
2019     ilist = [self.cfg.GetInstanceInfo(iname) for iname
2020              in self.cfg.GetInstanceList()]
2021
2022     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2023
2024     output = []
2025     for node in nodenames:
2026       if node not in volumes or volumes[node].failed or not volumes[node].data:
2027         continue
2028
2029       node_vols = volumes[node].data[:]
2030       node_vols.sort(key=lambda vol: vol['dev'])
2031
2032       for vol in node_vols:
2033         node_output = []
2034         for field in self.op.output_fields:
2035           if field == "node":
2036             val = node
2037           elif field == "phys":
2038             val = vol['dev']
2039           elif field == "vg":
2040             val = vol['vg']
2041           elif field == "name":
2042             val = vol['name']
2043           elif field == "size":
2044             val = int(float(vol['size']))
2045           elif field == "instance":
2046             for inst in ilist:
2047               if node not in lv_by_node[inst]:
2048                 continue
2049               if vol['name'] in lv_by_node[inst][node]:
2050                 val = inst.name
2051                 break
2052             else:
2053               val = '-'
2054           else:
2055             raise errors.ParameterError(field)
2056           node_output.append(str(val))
2057
2058         output.append(node_output)
2059
2060     return output
2061
2062
2063 class LUAddNode(LogicalUnit):
2064   """Logical unit for adding node to the cluster.
2065
2066   """
2067   HPATH = "node-add"
2068   HTYPE = constants.HTYPE_NODE
2069   _OP_REQP = ["node_name"]
2070
2071   def BuildHooksEnv(self):
2072     """Build hooks env.
2073
2074     This will run on all nodes before, and on all nodes + the new node after.
2075
2076     """
2077     env = {
2078       "OP_TARGET": self.op.node_name,
2079       "NODE_NAME": self.op.node_name,
2080       "NODE_PIP": self.op.primary_ip,
2081       "NODE_SIP": self.op.secondary_ip,
2082       }
2083     nodes_0 = self.cfg.GetNodeList()
2084     nodes_1 = nodes_0 + [self.op.node_name, ]
2085     return env, nodes_0, nodes_1
2086
2087   def CheckPrereq(self):
2088     """Check prerequisites.
2089
2090     This checks:
2091      - the new node is not already in the config
2092      - it is resolvable
2093      - its parameters (single/dual homed) matches the cluster
2094
2095     Any errors are signalled by raising errors.OpPrereqError.
2096
2097     """
2098     node_name = self.op.node_name
2099     cfg = self.cfg
2100
2101     dns_data = utils.HostInfo(node_name)
2102
2103     node = dns_data.name
2104     primary_ip = self.op.primary_ip = dns_data.ip
2105     secondary_ip = getattr(self.op, "secondary_ip", None)
2106     if secondary_ip is None:
2107       secondary_ip = primary_ip
2108     if not utils.IsValidIP(secondary_ip):
2109       raise errors.OpPrereqError("Invalid secondary IP given")
2110     self.op.secondary_ip = secondary_ip
2111
2112     node_list = cfg.GetNodeList()
2113     if not self.op.readd and node in node_list:
2114       raise errors.OpPrereqError("Node %s is already in the configuration" %
2115                                  node)
2116     elif self.op.readd and node not in node_list:
2117       raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2118
2119     for existing_node_name in node_list:
2120       existing_node = cfg.GetNodeInfo(existing_node_name)
2121
2122       if self.op.readd and node == existing_node_name:
2123         if (existing_node.primary_ip != primary_ip or
2124             existing_node.secondary_ip != secondary_ip):
2125           raise errors.OpPrereqError("Readded node doesn't have the same IP"
2126                                      " address configuration as before")
2127         continue
2128
2129       if (existing_node.primary_ip == primary_ip or
2130           existing_node.secondary_ip == primary_ip or
2131           existing_node.primary_ip == secondary_ip or
2132           existing_node.secondary_ip == secondary_ip):
2133         raise errors.OpPrereqError("New node ip address(es) conflict with"
2134                                    " existing node %s" % existing_node.name)
2135
2136     # check that the type of the node (single versus dual homed) is the
2137     # same as for the master
2138     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2139     master_singlehomed = myself.secondary_ip == myself.primary_ip
2140     newbie_singlehomed = secondary_ip == primary_ip
2141     if master_singlehomed != newbie_singlehomed:
2142       if master_singlehomed:
2143         raise errors.OpPrereqError("The master has no private ip but the"
2144                                    " new node has one")
2145       else:
2146         raise errors.OpPrereqError("The master has a private ip but the"
2147                                    " new node doesn't have one")
2148
2149     # checks reachablity
2150     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2151       raise errors.OpPrereqError("Node not reachable by ping")
2152
2153     if not newbie_singlehomed:
2154       # check reachability from my secondary ip to newbie's secondary ip
2155       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2156                            source=myself.secondary_ip):
2157         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2158                                    " based ping to noded port")
2159
2160     cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2161     mc_now, _ = self.cfg.GetMasterCandidateStats()
2162     master_candidate = mc_now < cp_size
2163
2164     self.new_node = objects.Node(name=node,
2165                                  primary_ip=primary_ip,
2166                                  secondary_ip=secondary_ip,
2167                                  master_candidate=master_candidate,
2168                                  offline=False, drained=False)
2169
2170   def Exec(self, feedback_fn):
2171     """Adds the new node to the cluster.
2172
2173     """
2174     new_node = self.new_node
2175     node = new_node.name
2176
2177     # check connectivity
2178     result = self.rpc.call_version([node])[node]
2179     result.Raise()
2180     if result.data:
2181       if constants.PROTOCOL_VERSION == result.data:
2182         logging.info("Communication to node %s fine, sw version %s match",
2183                      node, result.data)
2184       else:
2185         raise errors.OpExecError("Version mismatch master version %s,"
2186                                  " node version %s" %
2187                                  (constants.PROTOCOL_VERSION, result.data))
2188     else:
2189       raise errors.OpExecError("Cannot get version from the new node")
2190
2191     # setup ssh on node
2192     logging.info("Copy ssh key to node %s", node)
2193     priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2194     keyarray = []
2195     keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2196                 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2197                 priv_key, pub_key]
2198
2199     for i in keyfiles:
2200       f = open(i, 'r')
2201       try:
2202         keyarray.append(f.read())
2203       finally:
2204         f.close()
2205
2206     result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2207                                     keyarray[2],
2208                                     keyarray[3], keyarray[4], keyarray[5])
2209
2210     msg = result.RemoteFailMsg()
2211     if msg:
2212       raise errors.OpExecError("Cannot transfer ssh keys to the"
2213                                " new node: %s" % msg)
2214
2215     # Add node to our /etc/hosts, and add key to known_hosts
2216     utils.AddHostToEtcHosts(new_node.name)
2217
2218     if new_node.secondary_ip != new_node.primary_ip:
2219       result = self.rpc.call_node_has_ip_address(new_node.name,
2220                                                  new_node.secondary_ip)
2221       if result.failed or not result.data:
2222         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2223                                  " you gave (%s). Please fix and re-run this"
2224                                  " command." % new_node.secondary_ip)
2225
2226     node_verify_list = [self.cfg.GetMasterNode()]
2227     node_verify_param = {
2228       'nodelist': [node],
2229       # TODO: do a node-net-test as well?
2230     }
2231
2232     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2233                                        self.cfg.GetClusterName())
2234     for verifier in node_verify_list:
2235       if result[verifier].failed or not result[verifier].data:
2236         raise errors.OpExecError("Cannot communicate with %s's node daemon"
2237                                  " for remote verification" % verifier)
2238       if result[verifier].data['nodelist']:
2239         for failed in result[verifier].data['nodelist']:
2240           feedback_fn("ssh/hostname verification failed %s -> %s" %
2241                       (verifier, result[verifier].data['nodelist'][failed]))
2242         raise errors.OpExecError("ssh/hostname verification failed.")
2243
2244     # Distribute updated /etc/hosts and known_hosts to all nodes,
2245     # including the node just added
2246     myself = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
2247     dist_nodes = self.cfg.GetNodeList()
2248     if not self.op.readd:
2249       dist_nodes.append(node)
2250     if myself.name in dist_nodes:
2251       dist_nodes.remove(myself.name)
2252
2253     logging.debug("Copying hosts and known_hosts to all nodes")
2254     for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
2255       result = self.rpc.call_upload_file(dist_nodes, fname)
2256       for to_node, to_result in result.iteritems():
2257         if to_result.failed or not to_result.data:
2258           logging.error("Copy of file %s to node %s failed", fname, to_node)
2259
2260     to_copy = []
2261     enabled_hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2262     if constants.HTS_COPY_VNC_PASSWORD.intersection(enabled_hypervisors):
2263       to_copy.append(constants.VNC_PASSWORD_FILE)
2264
2265     for fname in to_copy:
2266       result = self.rpc.call_upload_file([node], fname)
2267       if result[node].failed or not result[node]:
2268         logging.error("Could not copy file %s to node %s", fname, node)
2269
2270     if self.op.readd:
2271       self.context.ReaddNode(new_node)
2272     else:
2273       self.context.AddNode(new_node)
2274
2275
2276 class LUSetNodeParams(LogicalUnit):
2277   """Modifies the parameters of a node.
2278
2279   """
2280   HPATH = "node-modify"
2281   HTYPE = constants.HTYPE_NODE
2282   _OP_REQP = ["node_name"]
2283   REQ_BGL = False
2284
2285   def CheckArguments(self):
2286     node_name = self.cfg.ExpandNodeName(self.op.node_name)
2287     if node_name is None:
2288       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2289     self.op.node_name = node_name
2290     _CheckBooleanOpField(self.op, 'master_candidate')
2291     _CheckBooleanOpField(self.op, 'offline')
2292     _CheckBooleanOpField(self.op, 'drained')
2293     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2294     if all_mods.count(None) == 3:
2295       raise errors.OpPrereqError("Please pass at least one modification")
2296     if all_mods.count(True) > 1:
2297       raise errors.OpPrereqError("Can't set the node into more than one"
2298                                  " state at the same time")
2299
2300   def ExpandNames(self):
2301     self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2302
2303   def BuildHooksEnv(self):
2304     """Build hooks env.
2305
2306     This runs on the master node.
2307
2308     """
2309     env = {
2310       "OP_TARGET": self.op.node_name,
2311       "MASTER_CANDIDATE": str(self.op.master_candidate),
2312       "OFFLINE": str(self.op.offline),
2313       "DRAINED": str(self.op.drained),
2314       }
2315     nl = [self.cfg.GetMasterNode(),
2316           self.op.node_name]
2317     return env, nl, nl
2318
2319   def CheckPrereq(self):
2320     """Check prerequisites.
2321
2322     This only checks the instance list against the existing names.
2323
2324     """
2325     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2326
2327     if ((self.op.master_candidate == False or self.op.offline == True or
2328          self.op.drained == True) and node.master_candidate):
2329       # we will demote the node from master_candidate
2330       if self.op.node_name == self.cfg.GetMasterNode():
2331         raise errors.OpPrereqError("The master node has to be a"
2332                                    " master candidate, online and not drained")
2333       cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2334       num_candidates, _ = self.cfg.GetMasterCandidateStats()
2335       if num_candidates <= cp_size:
2336         msg = ("Not enough master candidates (desired"
2337                " %d, new value will be %d)" % (cp_size, num_candidates-1))
2338         if self.op.force:
2339           self.LogWarning(msg)
2340         else:
2341           raise errors.OpPrereqError(msg)
2342
2343     if (self.op.master_candidate == True and
2344         ((node.offline and not self.op.offline == False) or
2345          (node.drained and not self.op.drained == False))):
2346       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2347                                  " to master_candidate" % node.name)
2348
2349     return
2350
2351   def Exec(self, feedback_fn):
2352     """Modifies a node.
2353
2354     """
2355     node = self.node
2356
2357     result = []
2358     changed_mc = False
2359
2360     if self.op.offline is not None:
2361       node.offline = self.op.offline
2362       result.append(("offline", str(self.op.offline)))
2363       if self.op.offline == True:
2364         if node.master_candidate:
2365           node.master_candidate = False
2366           changed_mc = True
2367           result.append(("master_candidate", "auto-demotion due to offline"))
2368         if node.drained:
2369           node.drained = False
2370           result.append(("drained", "clear drained status due to offline"))
2371
2372     if self.op.master_candidate is not None:
2373       node.master_candidate = self.op.master_candidate
2374       changed_mc = True
2375       result.append(("master_candidate", str(self.op.master_candidate)))
2376       if self.op.master_candidate == False:
2377         rrc = self.rpc.call_node_demote_from_mc(node.name)
2378         msg = rrc.RemoteFailMsg()
2379         if msg:
2380           self.LogWarning("Node failed to demote itself: %s" % msg)
2381
2382     if self.op.drained is not None:
2383       node.drained = self.op.drained
2384       result.append(("drained", str(self.op.drained)))
2385       if self.op.drained == True:
2386         if node.master_candidate:
2387           node.master_candidate = False
2388           changed_mc = True
2389           result.append(("master_candidate", "auto-demotion due to drain"))
2390         if node.offline:
2391           node.offline = False
2392           result.append(("offline", "clear offline status due to drain"))
2393
2394     # this will trigger configuration file update, if needed
2395     self.cfg.Update(node)
2396     # this will trigger job queue propagation or cleanup
2397     if changed_mc:
2398       self.context.ReaddNode(node)
2399
2400     return result
2401
2402
2403 class LUQueryClusterInfo(NoHooksLU):
2404   """Query cluster configuration.
2405
2406   """
2407   _OP_REQP = []
2408   REQ_BGL = False
2409
2410   def ExpandNames(self):
2411     self.needed_locks = {}
2412
2413   def CheckPrereq(self):
2414     """No prerequsites needed for this LU.
2415
2416     """
2417     pass
2418
2419   def Exec(self, feedback_fn):
2420     """Return cluster config.
2421
2422     """
2423     cluster = self.cfg.GetClusterInfo()
2424     result = {
2425       "software_version": constants.RELEASE_VERSION,
2426       "protocol_version": constants.PROTOCOL_VERSION,
2427       "config_version": constants.CONFIG_VERSION,
2428       "os_api_version": constants.OS_API_VERSION,
2429       "export_version": constants.EXPORT_VERSION,
2430       "architecture": (platform.architecture()[0], platform.machine()),
2431       "name": cluster.cluster_name,
2432       "master": cluster.master_node,
2433       "default_hypervisor": cluster.default_hypervisor,
2434       "enabled_hypervisors": cluster.enabled_hypervisors,
2435       "hvparams": dict([(hypervisor, cluster.hvparams[hypervisor])
2436                         for hypervisor in cluster.enabled_hypervisors]),
2437       "beparams": cluster.beparams,
2438       "candidate_pool_size": cluster.candidate_pool_size,
2439       }
2440
2441     return result
2442
2443
2444 class LUQueryConfigValues(NoHooksLU):
2445   """Return configuration values.
2446
2447   """
2448   _OP_REQP = []
2449   REQ_BGL = False
2450   _FIELDS_DYNAMIC = utils.FieldSet()
2451   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2452
2453   def ExpandNames(self):
2454     self.needed_locks = {}
2455
2456     _CheckOutputFields(static=self._FIELDS_STATIC,
2457                        dynamic=self._FIELDS_DYNAMIC,
2458                        selected=self.op.output_fields)
2459
2460   def CheckPrereq(self):
2461     """No prerequisites.
2462
2463     """
2464     pass
2465
2466   def Exec(self, feedback_fn):
2467     """Dump a representation of the cluster config to the standard output.
2468
2469     """
2470     values = []
2471     for field in self.op.output_fields:
2472       if field == "cluster_name":
2473         entry = self.cfg.GetClusterName()
2474       elif field == "master_node":
2475         entry = self.cfg.GetMasterNode()
2476       elif field == "drain_flag":
2477         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2478       else:
2479         raise errors.ParameterError(field)
2480       values.append(entry)
2481     return values
2482
2483
2484 class LUActivateInstanceDisks(NoHooksLU):
2485   """Bring up an instance's disks.
2486
2487   """
2488   _OP_REQP = ["instance_name"]
2489   REQ_BGL = False
2490
2491   def ExpandNames(self):
2492     self._ExpandAndLockInstance()
2493     self.needed_locks[locking.LEVEL_NODE] = []
2494     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2495
2496   def DeclareLocks(self, level):
2497     if level == locking.LEVEL_NODE:
2498       self._LockInstancesNodes()
2499
2500   def CheckPrereq(self):
2501     """Check prerequisites.
2502
2503     This checks that the instance is in the cluster.
2504
2505     """
2506     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2507     assert self.instance is not None, \
2508       "Cannot retrieve locked instance %s" % self.op.instance_name
2509     _CheckNodeOnline(self, self.instance.primary_node)
2510
2511   def Exec(self, feedback_fn):
2512     """Activate the disks.
2513
2514     """
2515     disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2516     if not disks_ok:
2517       raise errors.OpExecError("Cannot activate block devices")
2518
2519     return disks_info
2520
2521
2522 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2523   """Prepare the block devices for an instance.
2524
2525   This sets up the block devices on all nodes.
2526
2527   @type lu: L{LogicalUnit}
2528   @param lu: the logical unit on whose behalf we execute
2529   @type instance: L{objects.Instance}
2530   @param instance: the instance for whose disks we assemble
2531   @type ignore_secondaries: boolean
2532   @param ignore_secondaries: if true, errors on secondary nodes
2533       won't result in an error return from the function
2534   @return: False if the operation failed, otherwise a list of
2535       (host, instance_visible_name, node_visible_name)
2536       with the mapping from node devices to instance devices
2537
2538   """
2539   device_info = []
2540   disks_ok = True
2541   iname = instance.name
2542   # With the two passes mechanism we try to reduce the window of
2543   # opportunity for the race condition of switching DRBD to primary
2544   # before handshaking occured, but we do not eliminate it
2545
2546   # The proper fix would be to wait (with some limits) until the
2547   # connection has been made and drbd transitions from WFConnection
2548   # into any other network-connected state (Connected, SyncTarget,
2549   # SyncSource, etc.)
2550
2551   # 1st pass, assemble on all nodes in secondary mode
2552   for inst_disk in instance.disks:
2553     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2554       lu.cfg.SetDiskID(node_disk, node)
2555       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2556       msg = result.RemoteFailMsg()
2557       if msg:
2558         lu.proc.LogWarning("Could not prepare block device %s on node %s"
2559                            " (is_primary=False, pass=1): %s",
2560                            inst_disk.iv_name, node, msg)
2561         if not ignore_secondaries:
2562           disks_ok = False
2563
2564   # FIXME: race condition on drbd migration to primary
2565
2566   # 2nd pass, do only the primary node
2567   for inst_disk in instance.disks:
2568     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2569       if node != instance.primary_node:
2570         continue
2571       lu.cfg.SetDiskID(node_disk, node)
2572       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2573       msg = result.RemoteFailMsg()
2574       if msg:
2575         lu.proc.LogWarning("Could not prepare block device %s on node %s"
2576                            " (is_primary=True, pass=2): %s",
2577                            inst_disk.iv_name, node, msg)
2578         disks_ok = False
2579     device_info.append((instance.primary_node, inst_disk.iv_name,
2580                         result.payload))
2581
2582   # leave the disks configured for the primary node
2583   # this is a workaround that would be fixed better by
2584   # improving the logical/physical id handling
2585   for disk in instance.disks:
2586     lu.cfg.SetDiskID(disk, instance.primary_node)
2587
2588   return disks_ok, device_info
2589
2590
2591 def _StartInstanceDisks(lu, instance, force):
2592   """Start the disks of an instance.
2593
2594   """
2595   disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
2596                                            ignore_secondaries=force)
2597   if not disks_ok:
2598     _ShutdownInstanceDisks(lu, instance)
2599     if force is not None and not force:
2600       lu.proc.LogWarning("", hint="If the message above refers to a"
2601                          " secondary node,"
2602                          " you can retry the operation using '--force'.")
2603     raise errors.OpExecError("Disk consistency error")
2604
2605
2606 class LUDeactivateInstanceDisks(NoHooksLU):
2607   """Shutdown an instance's disks.
2608
2609   """
2610   _OP_REQP = ["instance_name"]
2611   REQ_BGL = False
2612
2613   def ExpandNames(self):
2614     self._ExpandAndLockInstance()
2615     self.needed_locks[locking.LEVEL_NODE] = []
2616     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2617
2618   def DeclareLocks(self, level):
2619     if level == locking.LEVEL_NODE:
2620       self._LockInstancesNodes()
2621
2622   def CheckPrereq(self):
2623     """Check prerequisites.
2624
2625     This checks that the instance is in the cluster.
2626
2627     """
2628     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2629     assert self.instance is not None, \
2630       "Cannot retrieve locked instance %s" % self.op.instance_name
2631
2632   def Exec(self, feedback_fn):
2633     """Deactivate the disks
2634
2635     """
2636     instance = self.instance
2637     _SafeShutdownInstanceDisks(self, instance)
2638
2639
2640 def _SafeShutdownInstanceDisks(lu, instance):
2641   """Shutdown block devices of an instance.
2642
2643   This function checks if an instance is running, before calling
2644   _ShutdownInstanceDisks.
2645
2646   """
2647   ins_l = lu.rpc.call_instance_list([instance.primary_node],
2648                                       [instance.hypervisor])
2649   ins_l = ins_l[instance.primary_node]
2650   if ins_l.failed or not isinstance(ins_l.data, list):
2651     raise errors.OpExecError("Can't contact node '%s'" %
2652                              instance.primary_node)
2653
2654   if instance.name in ins_l.data:
2655     raise errors.OpExecError("Instance is running, can't shutdown"
2656                              " block devices.")
2657
2658   _ShutdownInstanceDisks(lu, instance)
2659
2660
2661 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2662   """Shutdown block devices of an instance.
2663
2664   This does the shutdown on all nodes of the instance.
2665
2666   If the ignore_primary is false, errors on the primary node are
2667   ignored.
2668
2669   """
2670   all_result = True
2671   for disk in instance.disks:
2672     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2673       lu.cfg.SetDiskID(top_disk, node)
2674       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2675       msg = result.RemoteFailMsg()
2676       if msg:
2677         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2678                       disk.iv_name, node, msg)
2679         if not ignore_primary or node != instance.primary_node:
2680           all_result = False
2681   return all_result
2682
2683
2684 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2685   """Checks if a node has enough free memory.
2686
2687   This function check if a given node has the needed amount of free
2688   memory. In case the node has less memory or we cannot get the
2689   information from the node, this function raise an OpPrereqError
2690   exception.
2691
2692   @type lu: C{LogicalUnit}
2693   @param lu: a logical unit from which we get configuration data
2694   @type node: C{str}
2695   @param node: the node to check
2696   @type reason: C{str}
2697   @param reason: string to use in the error message
2698   @type requested: C{int}
2699   @param requested: the amount of memory in MiB to check for
2700   @type hypervisor_name: C{str}
2701   @param hypervisor_name: the hypervisor to ask for memory stats
2702   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2703       we cannot check the node
2704
2705   """
2706   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2707   nodeinfo[node].Raise()
2708   free_mem = nodeinfo[node].data.get('memory_free')
2709   if not isinstance(free_mem, int):
2710     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2711                              " was '%s'" % (node, free_mem))
2712   if requested > free_mem:
2713     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2714                              " needed %s MiB, available %s MiB" %
2715                              (node, reason, requested, free_mem))
2716
2717
2718 class LUStartupInstance(LogicalUnit):
2719   """Starts an instance.
2720
2721   """
2722   HPATH = "instance-start"
2723   HTYPE = constants.HTYPE_INSTANCE
2724   _OP_REQP = ["instance_name", "force"]
2725   REQ_BGL = False
2726
2727   def ExpandNames(self):
2728     self._ExpandAndLockInstance()
2729
2730   def BuildHooksEnv(self):
2731     """Build hooks env.
2732
2733     This runs on master, primary and secondary nodes of the instance.
2734
2735     """
2736     env = {
2737       "FORCE": self.op.force,
2738       }
2739     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2740     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2741     return env, nl, nl
2742
2743   def CheckPrereq(self):
2744     """Check prerequisites.
2745
2746     This checks that the instance is in the cluster.
2747
2748     """
2749     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2750     assert self.instance is not None, \
2751       "Cannot retrieve locked instance %s" % self.op.instance_name
2752
2753     _CheckNodeOnline(self, instance.primary_node)
2754
2755     bep = self.cfg.GetClusterInfo().FillBE(instance)
2756     # check bridges existance
2757     _CheckInstanceBridgesExist(self, instance)
2758
2759     _CheckNodeFreeMemory(self, instance.primary_node,
2760                          "starting instance %s" % instance.name,
2761                          bep[constants.BE_MEMORY], instance.hypervisor)
2762
2763   def Exec(self, feedback_fn):
2764     """Start the instance.
2765
2766     """
2767     instance = self.instance
2768     force = self.op.force
2769
2770     self.cfg.MarkInstanceUp(instance.name)
2771
2772     node_current = instance.primary_node
2773
2774     _StartInstanceDisks(self, instance, force)
2775
2776     result = self.rpc.call_instance_start(node_current, instance)
2777     msg = result.RemoteFailMsg()
2778     if msg:
2779       _ShutdownInstanceDisks(self, instance)
2780       raise errors.OpExecError("Could not start instance: %s" % msg)
2781
2782
2783 class LURebootInstance(LogicalUnit):
2784   """Reboot an instance.
2785
2786   """
2787   HPATH = "instance-reboot"
2788   HTYPE = constants.HTYPE_INSTANCE
2789   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2790   REQ_BGL = False
2791
2792   def ExpandNames(self):
2793     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2794                                    constants.INSTANCE_REBOOT_HARD,
2795                                    constants.INSTANCE_REBOOT_FULL]:
2796       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2797                                   (constants.INSTANCE_REBOOT_SOFT,
2798                                    constants.INSTANCE_REBOOT_HARD,
2799                                    constants.INSTANCE_REBOOT_FULL))
2800     self._ExpandAndLockInstance()
2801
2802   def BuildHooksEnv(self):
2803     """Build hooks env.
2804
2805     This runs on master, primary and secondary nodes of the instance.
2806
2807     """
2808     env = {
2809       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2810       "REBOOT_TYPE": self.op.reboot_type,
2811       }
2812     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2813     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2814     return env, nl, nl
2815
2816   def CheckPrereq(self):
2817     """Check prerequisites.
2818
2819     This checks that the instance is in the cluster.
2820
2821     """
2822     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2823     assert self.instance is not None, \
2824       "Cannot retrieve locked instance %s" % self.op.instance_name
2825
2826     _CheckNodeOnline(self, instance.primary_node)
2827
2828     # check bridges existance
2829     _CheckInstanceBridgesExist(self, instance)
2830
2831   def Exec(self, feedback_fn):
2832     """Reboot the instance.
2833
2834     """
2835     instance = self.instance
2836     ignore_secondaries = self.op.ignore_secondaries
2837     reboot_type = self.op.reboot_type
2838
2839     node_current = instance.primary_node
2840
2841     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
2842                        constants.INSTANCE_REBOOT_HARD]:
2843       for disk in instance.disks:
2844         self.cfg.SetDiskID(disk, node_current)
2845       result = self.rpc.call_instance_reboot(node_current, instance,
2846                                              reboot_type)
2847       msg = result.RemoteFailMsg()
2848       if msg:
2849         raise errors.OpExecError("Could not reboot instance: %s" % msg)
2850     else:
2851       result = self.rpc.call_instance_shutdown(node_current, instance)
2852       msg = result.RemoteFailMsg()
2853       if msg:
2854         raise errors.OpExecError("Could not shutdown instance for"
2855                                  " full reboot: %s" % msg)
2856       _ShutdownInstanceDisks(self, instance)
2857       _StartInstanceDisks(self, instance, ignore_secondaries)
2858       result = self.rpc.call_instance_start(node_current, instance)
2859       msg = result.RemoteFailMsg()
2860       if msg:
2861         _ShutdownInstanceDisks(self, instance)
2862         raise errors.OpExecError("Could not start instance for"
2863                                  " full reboot: %s" % msg)
2864
2865     self.cfg.MarkInstanceUp(instance.name)
2866
2867
2868 class LUShutdownInstance(LogicalUnit):
2869   """Shutdown an instance.
2870
2871   """
2872   HPATH = "instance-stop"
2873   HTYPE = constants.HTYPE_INSTANCE
2874   _OP_REQP = ["instance_name"]
2875   REQ_BGL = False
2876
2877   def ExpandNames(self):
2878     self._ExpandAndLockInstance()
2879
2880   def BuildHooksEnv(self):
2881     """Build hooks env.
2882
2883     This runs on master, primary and secondary nodes of the instance.
2884
2885     """
2886     env = _BuildInstanceHookEnvByObject(self, self.instance)
2887     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2888     return env, nl, nl
2889
2890   def CheckPrereq(self):
2891     """Check prerequisites.
2892
2893     This checks that the instance is in the cluster.
2894
2895     """
2896     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2897     assert self.instance is not None, \
2898       "Cannot retrieve locked instance %s" % self.op.instance_name
2899     _CheckNodeOnline(self, self.instance.primary_node)
2900
2901   def Exec(self, feedback_fn):
2902     """Shutdown the instance.
2903
2904     """
2905     instance = self.instance
2906     node_current = instance.primary_node
2907     self.cfg.MarkInstanceDown(instance.name)
2908     result = self.rpc.call_instance_shutdown(node_current, instance)
2909     msg = result.RemoteFailMsg()
2910     if msg:
2911       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
2912
2913     _ShutdownInstanceDisks(self, instance)
2914
2915
2916 class LUReinstallInstance(LogicalUnit):
2917   """Reinstall an instance.
2918
2919   """
2920   HPATH = "instance-reinstall"
2921   HTYPE = constants.HTYPE_INSTANCE
2922   _OP_REQP = ["instance_name"]
2923   REQ_BGL = False
2924
2925   def ExpandNames(self):
2926     self._ExpandAndLockInstance()
2927
2928   def BuildHooksEnv(self):
2929     """Build hooks env.
2930
2931     This runs on master, primary and secondary nodes of the instance.
2932
2933     """
2934     env = _BuildInstanceHookEnvByObject(self, self.instance)
2935     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2936     return env, nl, nl
2937
2938   def CheckPrereq(self):
2939     """Check prerequisites.
2940
2941     This checks that the instance is in the cluster and is not running.
2942
2943     """
2944     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2945     assert instance is not None, \
2946       "Cannot retrieve locked instance %s" % self.op.instance_name
2947     _CheckNodeOnline(self, instance.primary_node)
2948
2949     if instance.disk_template == constants.DT_DISKLESS:
2950       raise errors.OpPrereqError("Instance '%s' has no disks" %
2951                                  self.op.instance_name)
2952     if instance.admin_up:
2953       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2954                                  self.op.instance_name)
2955     remote_info = self.rpc.call_instance_info(instance.primary_node,
2956                                               instance.name,
2957                                               instance.hypervisor)
2958     if remote_info.failed or remote_info.data:
2959       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2960                                  (self.op.instance_name,
2961                                   instance.primary_node))
2962
2963     self.op.os_type = getattr(self.op, "os_type", None)
2964     if self.op.os_type is not None:
2965       # OS verification
2966       pnode = self.cfg.GetNodeInfo(
2967         self.cfg.ExpandNodeName(instance.primary_node))
2968       if pnode is None:
2969         raise errors.OpPrereqError("Primary node '%s' is unknown" %
2970                                    self.op.pnode)
2971       result = self.rpc.call_os_get(pnode.name, self.op.os_type)
2972       result.Raise()
2973       if not isinstance(result.data, objects.OS):
2974         raise errors.OpPrereqError("OS '%s' not in supported OS list for"
2975                                    " primary node"  % self.op.os_type)
2976
2977     self.instance = instance
2978
2979   def Exec(self, feedback_fn):
2980     """Reinstall the instance.
2981
2982     """
2983     inst = self.instance
2984
2985     if self.op.os_type is not None:
2986       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
2987       inst.os = self.op.os_type
2988       self.cfg.Update(inst)
2989
2990     _StartInstanceDisks(self, inst, None)
2991     try:
2992       feedback_fn("Running the instance OS create scripts...")
2993       result = self.rpc.call_instance_os_add(inst.primary_node, inst)
2994       msg = result.RemoteFailMsg()
2995       if msg:
2996         raise errors.OpExecError("Could not install OS for instance %s"
2997                                  " on node %s: %s" %
2998                                  (inst.name, inst.primary_node, msg))
2999     finally:
3000       _ShutdownInstanceDisks(self, inst)
3001
3002
3003 class LURenameInstance(LogicalUnit):
3004   """Rename an instance.
3005
3006   """
3007   HPATH = "instance-rename"
3008   HTYPE = constants.HTYPE_INSTANCE
3009   _OP_REQP = ["instance_name", "new_name"]
3010
3011   def BuildHooksEnv(self):
3012     """Build hooks env.
3013
3014     This runs on master, primary and secondary nodes of the instance.
3015
3016     """
3017     env = _BuildInstanceHookEnvByObject(self, self.instance)
3018     env["INSTANCE_NEW_NAME"] = self.op.new_name
3019     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3020     return env, nl, nl
3021
3022   def CheckPrereq(self):
3023     """Check prerequisites.
3024
3025     This checks that the instance is in the cluster and is not running.
3026
3027     """
3028     instance = self.cfg.GetInstanceInfo(
3029       self.cfg.ExpandInstanceName(self.op.instance_name))
3030     if instance is None:
3031       raise errors.OpPrereqError("Instance '%s' not known" %
3032                                  self.op.instance_name)
3033     _CheckNodeOnline(self, instance.primary_node)
3034
3035     if instance.admin_up:
3036       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3037                                  self.op.instance_name)
3038     remote_info = self.rpc.call_instance_info(instance.primary_node,
3039                                               instance.name,
3040                                               instance.hypervisor)
3041     remote_info.Raise()
3042     if remote_info.data:
3043       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3044                                  (self.op.instance_name,
3045                                   instance.primary_node))
3046     self.instance = instance
3047
3048     # new name verification
3049     name_info = utils.HostInfo(self.op.new_name)
3050
3051     self.op.new_name = new_name = name_info.name
3052     instance_list = self.cfg.GetInstanceList()
3053     if new_name in instance_list:
3054       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3055                                  new_name)
3056
3057     if not getattr(self.op, "ignore_ip", False):
3058       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3059         raise errors.OpPrereqError("IP %s of instance %s already in use" %
3060                                    (name_info.ip, new_name))
3061
3062
3063   def Exec(self, feedback_fn):
3064     """Reinstall the instance.
3065
3066     """
3067     inst = self.instance
3068     old_name = inst.name
3069
3070     if inst.disk_template == constants.DT_FILE:
3071       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3072
3073     self.cfg.RenameInstance(inst.name, self.op.new_name)
3074     # Change the instance lock. This is definitely safe while we hold the BGL
3075     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3076     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3077
3078     # re-read the instance from the configuration after rename
3079     inst = self.cfg.GetInstanceInfo(self.op.new_name)
3080
3081     if inst.disk_template == constants.DT_FILE:
3082       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3083       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3084                                                      old_file_storage_dir,
3085                                                      new_file_storage_dir)
3086       result.Raise()
3087       if not result.data:
3088         raise errors.OpExecError("Could not connect to node '%s' to rename"
3089                                  " directory '%s' to '%s' (but the instance"
3090                                  " has been renamed in Ganeti)" % (
3091                                  inst.primary_node, old_file_storage_dir,
3092                                  new_file_storage_dir))
3093
3094       if not result.data[0]:
3095         raise errors.OpExecError("Could not rename directory '%s' to '%s'"
3096                                  " (but the instance has been renamed in"
3097                                  " Ganeti)" % (old_file_storage_dir,
3098                                                new_file_storage_dir))
3099
3100     _StartInstanceDisks(self, inst, None)
3101     try:
3102       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3103                                                  old_name)
3104       msg = result.RemoteFailMsg()
3105       if msg:
3106         msg = ("Could not run OS rename script for instance %s on node %s"
3107                " (but the instance has been renamed in Ganeti): %s" %
3108                (inst.name, inst.primary_node, msg))
3109         self.proc.LogWarning(msg)
3110     finally:
3111       _ShutdownInstanceDisks(self, inst)
3112
3113
3114 class LURemoveInstance(LogicalUnit):
3115   """Remove an instance.
3116
3117   """
3118   HPATH = "instance-remove"
3119   HTYPE = constants.HTYPE_INSTANCE
3120   _OP_REQP = ["instance_name", "ignore_failures"]
3121   REQ_BGL = False
3122
3123   def ExpandNames(self):
3124     self._ExpandAndLockInstance()
3125     self.needed_locks[locking.LEVEL_NODE] = []
3126     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3127
3128   def DeclareLocks(self, level):
3129     if level == locking.LEVEL_NODE:
3130       self._LockInstancesNodes()
3131
3132   def BuildHooksEnv(self):
3133     """Build hooks env.
3134
3135     This runs on master, primary and secondary nodes of the instance.
3136
3137     """
3138     env = _BuildInstanceHookEnvByObject(self, self.instance)
3139     nl = [self.cfg.GetMasterNode()]
3140     return env, nl, nl
3141
3142   def CheckPrereq(self):
3143     """Check prerequisites.
3144
3145     This checks that the instance is in the cluster.
3146
3147     """
3148     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3149     assert self.instance is not None, \
3150       "Cannot retrieve locked instance %s" % self.op.instance_name
3151
3152   def Exec(self, feedback_fn):
3153     """Remove the instance.
3154
3155     """
3156     instance = self.instance
3157     logging.info("Shutting down instance %s on node %s",
3158                  instance.name, instance.primary_node)
3159
3160     result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3161     msg = result.RemoteFailMsg()
3162     if msg:
3163       if self.op.ignore_failures:
3164         feedback_fn("Warning: can't shutdown instance: %s" % msg)
3165       else:
3166         raise errors.OpExecError("Could not shutdown instance %s on"
3167                                  " node %s: %s" %
3168                                  (instance.name, instance.primary_node, msg))
3169
3170     logging.info("Removing block devices for instance %s", instance.name)
3171
3172     if not _RemoveDisks(self, instance):
3173       if self.op.ignore_failures:
3174         feedback_fn("Warning: can't remove instance's disks")
3175       else:
3176         raise errors.OpExecError("Can't remove instance's disks")
3177
3178     logging.info("Removing instance %s out of cluster config", instance.name)
3179
3180     self.cfg.RemoveInstance(instance.name)
3181     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3182
3183
3184 class LUQueryInstances(NoHooksLU):
3185   """Logical unit for querying instances.
3186
3187   """
3188   _OP_REQP = ["output_fields", "names", "use_locking"]
3189   REQ_BGL = False
3190   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3191                                     "admin_state",
3192                                     "disk_template", "ip", "mac", "bridge",
3193                                     "sda_size", "sdb_size", "vcpus", "tags",
3194                                     "network_port", "beparams",
3195                                     r"(disk)\.(size)/([0-9]+)",
3196                                     r"(disk)\.(sizes)", "disk_usage",
3197                                     r"(nic)\.(mac|ip|bridge)/([0-9]+)",
3198                                     r"(nic)\.(macs|ips|bridges)",
3199                                     r"(disk|nic)\.(count)",
3200                                     "serial_no", "hypervisor", "hvparams",] +
3201                                   ["hv/%s" % name
3202                                    for name in constants.HVS_PARAMETERS] +
3203                                   ["be/%s" % name
3204                                    for name in constants.BES_PARAMETERS])
3205   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3206
3207
3208   def ExpandNames(self):
3209     _CheckOutputFields(static=self._FIELDS_STATIC,
3210                        dynamic=self._FIELDS_DYNAMIC,
3211                        selected=self.op.output_fields)
3212
3213     self.needed_locks = {}
3214     self.share_locks[locking.LEVEL_INSTANCE] = 1
3215     self.share_locks[locking.LEVEL_NODE] = 1
3216
3217     if self.op.names:
3218       self.wanted = _GetWantedInstances(self, self.op.names)
3219     else:
3220       self.wanted = locking.ALL_SET
3221
3222     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3223     self.do_locking = self.do_node_query and self.op.use_locking
3224     if self.do_locking:
3225       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3226       self.needed_locks[locking.LEVEL_NODE] = []
3227       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3228
3229   def DeclareLocks(self, level):
3230     if level == locking.LEVEL_NODE and self.do_locking:
3231       self._LockInstancesNodes()
3232
3233   def CheckPrereq(self):
3234     """Check prerequisites.
3235
3236     """
3237     pass
3238
3239   def Exec(self, feedback_fn):
3240     """Computes the list of nodes and their attributes.
3241
3242     """
3243     all_info = self.cfg.GetAllInstancesInfo()
3244     if self.wanted == locking.ALL_SET:
3245       # caller didn't specify instance names, so ordering is not important
3246       if self.do_locking:
3247         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3248       else:
3249         instance_names = all_info.keys()
3250       instance_names = utils.NiceSort(instance_names)
3251     else:
3252       # caller did specify names, so we must keep the ordering
3253       if self.do_locking:
3254         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3255       else:
3256         tgt_set = all_info.keys()
3257       missing = set(self.wanted).difference(tgt_set)
3258       if missing:
3259         raise errors.OpExecError("Some instances were removed before"
3260                                  " retrieving their data: %s" % missing)
3261       instance_names = self.wanted
3262
3263     instance_list = [all_info[iname] for iname in instance_names]
3264
3265     # begin data gathering
3266
3267     nodes = frozenset([inst.primary_node for inst in instance_list])
3268     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3269
3270     bad_nodes = []
3271     off_nodes = []
3272     if self.do_node_query:
3273       live_data = {}
3274       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3275       for name in nodes:
3276         result = node_data[name]
3277         if result.offline:
3278           # offline nodes will be in both lists
3279           off_nodes.append(name)
3280         if result.failed:
3281           bad_nodes.append(name)
3282         else:
3283           if result.data:
3284             live_data.update(result.data)
3285             # else no instance is alive
3286     else:
3287       live_data = dict([(name, {}) for name in instance_names])
3288
3289     # end data gathering
3290
3291     HVPREFIX = "hv/"
3292     BEPREFIX = "be/"
3293     output = []
3294     for instance in instance_list:
3295       iout = []
3296       i_hv = self.cfg.GetClusterInfo().FillHV(instance)
3297       i_be = self.cfg.GetClusterInfo().FillBE(instance)
3298       for field in self.op.output_fields:
3299         st_match = self._FIELDS_STATIC.Matches(field)
3300         if field == "name":
3301           val = instance.name
3302         elif field == "os":
3303           val = instance.os
3304         elif field == "pnode":
3305           val = instance.primary_node
3306         elif field == "snodes":
3307           val = list(instance.secondary_nodes)
3308         elif field == "admin_state":
3309           val = instance.admin_up
3310         elif field == "oper_state":
3311           if instance.primary_node in bad_nodes:
3312             val = None
3313           else:
3314             val = bool(live_data.get(instance.name))
3315         elif field == "status":
3316           if instance.primary_node in off_nodes:
3317             val = "ERROR_nodeoffline"
3318           elif instance.primary_node in bad_nodes:
3319             val = "ERROR_nodedown"
3320           else:
3321             running = bool(live_data.get(instance.name))
3322             if running:
3323               if instance.admin_up:
3324                 val = "running"
3325               else:
3326                 val = "ERROR_up"
3327             else:
3328               if instance.admin_up:
3329                 val = "ERROR_down"
3330               else:
3331                 val = "ADMIN_down"
3332         elif field == "oper_ram":
3333           if instance.primary_node in bad_nodes:
3334             val = None
3335           elif instance.name in live_data:
3336             val = live_data[instance.name].get("memory", "?")
3337           else:
3338             val = "-"
3339         elif field == "disk_template":
3340           val = instance.disk_template
3341         elif field == "ip":
3342           val = instance.nics[0].ip
3343         elif field == "bridge":
3344           val = instance.nics[0].bridge
3345         elif field == "mac":
3346           val = instance.nics[0].mac
3347         elif field == "sda_size" or field == "sdb_size":
3348           idx = ord(field[2]) - ord('a')
3349           try:
3350             val = instance.FindDisk(idx).size
3351           except errors.OpPrereqError:
3352             val = None
3353         elif field == "disk_usage": # total disk usage per node
3354           disk_sizes = [{'size': disk.size} for disk in instance.disks]
3355           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3356         elif field == "tags":
3357           val = list(instance.GetTags())
3358         elif field == "serial_no":
3359           val = instance.serial_no
3360         elif field == "network_port":
3361           val = instance.network_port
3362         elif field == "hypervisor":
3363           val = instance.hypervisor
3364         elif field == "hvparams":
3365           val = i_hv
3366         elif (field.startswith(HVPREFIX) and
3367               field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3368           val = i_hv.get(field[len(HVPREFIX):], None)
3369         elif field == "beparams":
3370           val = i_be
3371         elif (field.startswith(BEPREFIX) and
3372               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3373           val = i_be.get(field[len(BEPREFIX):], None)
3374         elif st_match and st_match.groups():
3375           # matches a variable list
3376           st_groups = st_match.groups()
3377           if st_groups and st_groups[0] == "disk":
3378             if st_groups[1] == "count":
3379               val = len(instance.disks)
3380             elif st_groups[1] == "sizes":
3381               val = [disk.size for disk in instance.disks]
3382             elif st_groups[1] == "size":
3383               try:
3384                 val = instance.FindDisk(st_groups[2]).size
3385               except errors.OpPrereqError:
3386                 val = None
3387             else:
3388               assert False, "Unhandled disk parameter"
3389           elif st_groups[0] == "nic":
3390             if st_groups[1] == "count":
3391               val = len(instance.nics)
3392             elif st_groups[1] == "macs":
3393               val = [nic.mac for nic in instance.nics]
3394             elif st_groups[1] == "ips":
3395               val = [nic.ip for nic in instance.nics]
3396             elif st_groups[1] == "bridges":
3397               val = [nic.bridge for nic in instance.nics]
3398             else:
3399               # index-based item
3400               nic_idx = int(st_groups[2])
3401               if nic_idx >= len(instance.nics):
3402                 val = None
3403               else:
3404                 if st_groups[1] == "mac":
3405                   val = instance.nics[nic_idx].mac
3406                 elif st_groups[1] == "ip":
3407                   val = instance.nics[nic_idx].ip
3408                 elif st_groups[1] == "bridge":
3409                   val = instance.nics[nic_idx].bridge
3410                 else:
3411                   assert False, "Unhandled NIC parameter"
3412           else:
3413             assert False, "Unhandled variable parameter"
3414         else:
3415           raise errors.ParameterError(field)
3416         iout.append(val)
3417       output.append(iout)
3418
3419     return output
3420
3421
3422 class LUFailoverInstance(LogicalUnit):
3423   """Failover an instance.
3424
3425   """
3426   HPATH = "instance-failover"
3427   HTYPE = constants.HTYPE_INSTANCE
3428   _OP_REQP = ["instance_name", "ignore_consistency"]
3429   REQ_BGL = False
3430
3431   def ExpandNames(self):
3432     self._ExpandAndLockInstance()
3433     self.needed_locks[locking.LEVEL_NODE] = []
3434     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3435
3436   def DeclareLocks(self, level):
3437     if level == locking.LEVEL_NODE:
3438       self._LockInstancesNodes()
3439
3440   def BuildHooksEnv(self):
3441     """Build hooks env.
3442
3443     This runs on master, primary and secondary nodes of the instance.
3444
3445     """
3446     env = {
3447       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3448       }
3449     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3450     nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3451     return env, nl, nl
3452
3453   def CheckPrereq(self):
3454     """Check prerequisites.
3455
3456     This checks that the instance is in the cluster.
3457
3458     """
3459     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3460     assert self.instance is not None, \
3461       "Cannot retrieve locked instance %s" % self.op.instance_name
3462
3463     bep = self.cfg.GetClusterInfo().FillBE(instance)
3464     if instance.disk_template not in constants.DTS_NET_MIRROR:
3465       raise errors.OpPrereqError("Instance's disk layout is not"
3466                                  " network mirrored, cannot failover.")
3467
3468     secondary_nodes = instance.secondary_nodes
3469     if not secondary_nodes:
3470       raise errors.ProgrammerError("no secondary node but using "
3471                                    "a mirrored disk template")
3472
3473     target_node = secondary_nodes[0]
3474     _CheckNodeOnline(self, target_node)
3475     _CheckNodeNotDrained(self, target_node)
3476     # check memory requirements on the secondary node
3477     _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3478                          instance.name, bep[constants.BE_MEMORY],
3479                          instance.hypervisor)
3480
3481     # check bridge existance
3482     brlist = [nic.bridge for nic in instance.nics]
3483     result = self.rpc.call_bridges_exist(target_node, brlist)
3484     result.Raise()
3485     if not result.data:
3486       raise errors.OpPrereqError("One or more target bridges %s does not"
3487                                  " exist on destination node '%s'" %
3488                                  (brlist, target_node))
3489
3490   def Exec(self, feedback_fn):
3491     """Failover an instance.
3492
3493     The failover is done by shutting it down on its present node and
3494     starting it on the secondary.
3495
3496     """
3497     instance = self.instance
3498
3499     source_node = instance.primary_node
3500     target_node = instance.secondary_nodes[0]
3501
3502     feedback_fn("* checking disk consistency between source and target")
3503     for dev in instance.disks:
3504       # for drbd, these are drbd over lvm
3505       if not _CheckDiskConsistency(self, dev, target_node, False):
3506         if instance.admin_up and not self.op.ignore_consistency:
3507           raise errors.OpExecError("Disk %s is degraded on target node,"
3508                                    " aborting failover." % dev.iv_name)
3509
3510     feedback_fn("* shutting down instance on source node")
3511     logging.info("Shutting down instance %s on node %s",
3512                  instance.name, source_node)
3513
3514     result = self.rpc.call_instance_shutdown(source_node, instance)
3515     msg = result.RemoteFailMsg()
3516     if msg:
3517       if self.op.ignore_consistency:
3518         self.proc.LogWarning("Could not shutdown instance %s on node %s."
3519                              " Proceeding anyway. Please make sure node"
3520                              " %s is down. Error details: %s",
3521                              instance.name, source_node, source_node, msg)
3522       else:
3523         raise errors.OpExecError("Could not shutdown instance %s on"
3524                                  " node %s: %s" %
3525                                  (instance.name, source_node, msg))
3526
3527     feedback_fn("* deactivating the instance's disks on source node")
3528     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3529       raise errors.OpExecError("Can't shut down the instance's disks.")
3530
3531     instance.primary_node = target_node
3532     # distribute new instance config to the other nodes
3533     self.cfg.Update(instance)
3534
3535     # Only start the instance if it's marked as up
3536     if instance.admin_up:
3537       feedback_fn("* activating the instance's disks on target node")
3538       logging.info("Starting instance %s on node %s",
3539                    instance.name, target_node)
3540
3541       disks_ok, dummy = _AssembleInstanceDisks(self, instance,
3542                                                ignore_secondaries=True)
3543       if not disks_ok:
3544         _ShutdownInstanceDisks(self, instance)
3545         raise errors.OpExecError("Can't activate the instance's disks")
3546
3547       feedback_fn("* starting the instance on the target node")
3548       result = self.rpc.call_instance_start(target_node, instance)
3549       msg = result.RemoteFailMsg()
3550       if msg:
3551         _ShutdownInstanceDisks(self, instance)
3552         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3553                                  (instance.name, target_node, msg))
3554
3555
3556 class LUMigrateInstance(LogicalUnit):
3557   """Migrate an instance.
3558
3559   This is migration without shutting down, compared to the failover,
3560   which is done with shutdown.
3561
3562   """
3563   HPATH = "instance-migrate"
3564   HTYPE = constants.HTYPE_INSTANCE
3565   _OP_REQP = ["instance_name", "live", "cleanup"]
3566
3567   REQ_BGL = False
3568
3569   def ExpandNames(self):
3570     self._ExpandAndLockInstance()
3571     self.needed_locks[locking.LEVEL_NODE] = []
3572     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3573
3574   def DeclareLocks(self, level):
3575     if level == locking.LEVEL_NODE:
3576       self._LockInstancesNodes()
3577
3578   def BuildHooksEnv(self):
3579     """Build hooks env.
3580
3581     This runs on master, primary and secondary nodes of the instance.
3582
3583     """
3584     env = _BuildInstanceHookEnvByObject(self, self.instance)
3585     env["MIGRATE_LIVE"] = self.op.live
3586     env["MIGRATE_CLEANUP"] = self.op.cleanup
3587     nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3588     return env, nl, nl
3589
3590   def CheckPrereq(self):
3591     """Check prerequisites.
3592
3593     This checks that the instance is in the cluster.
3594
3595     """
3596     instance = self.cfg.GetInstanceInfo(
3597       self.cfg.ExpandInstanceName(self.op.instance_name))
3598     if instance is None:
3599       raise errors.OpPrereqError("Instance '%s' not known" %
3600                                  self.op.instance_name)
3601
3602     if instance.disk_template != constants.DT_DRBD8:
3603       raise errors.OpPrereqError("Instance's disk layout is not"
3604                                  " drbd8, cannot migrate.")
3605
3606     secondary_nodes = instance.secondary_nodes
3607     if not secondary_nodes:
3608       raise errors.ConfigurationError("No secondary node but using"
3609                                       " drbd8 disk template")
3610
3611     i_be = self.cfg.GetClusterInfo().FillBE(instance)
3612
3613     target_node = secondary_nodes[0]
3614     # check memory requirements on the secondary node
3615     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3616                          instance.name, i_be[constants.BE_MEMORY],
3617                          instance.hypervisor)
3618
3619     # check bridge existance
3620     brlist = [nic.bridge for nic in instance.nics]
3621     result = self.rpc.call_bridges_exist(target_node, brlist)
3622     if result.failed or not result.data:
3623       raise errors.OpPrereqError("One or more target bridges %s does not"
3624                                  " exist on destination node '%s'" %
3625                                  (brlist, target_node))
3626
3627     if not self.op.cleanup:
3628       _CheckNodeNotDrained(self, target_node)
3629       result = self.rpc.call_instance_migratable(instance.primary_node,
3630                                                  instance)
3631       msg = result.RemoteFailMsg()
3632       if msg:
3633         raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
3634                                    msg)
3635
3636     self.instance = instance
3637
3638   def _WaitUntilSync(self):
3639     """Poll with custom rpc for disk sync.
3640
3641     This uses our own step-based rpc call.
3642
3643     """
3644     self.feedback_fn("* wait until resync is done")
3645     all_done = False
3646     while not all_done:
3647       all_done = True
3648       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3649                                             self.nodes_ip,
3650                                             self.instance.disks)
3651       min_percent = 100
3652       for node, nres in result.items():
3653         msg = nres.RemoteFailMsg()
3654         if msg:
3655           raise errors.OpExecError("Cannot resync disks on node %s: %s" %
3656                                    (node, msg))
3657         node_done, node_percent = nres.payload
3658         all_done = all_done and node_done
3659         if node_percent is not None:
3660           min_percent = min(min_percent, node_percent)
3661       if not all_done:
3662         if min_percent < 100:
3663           self.feedback_fn("   - progress: %.1f%%" % min_percent)
3664         time.sleep(2)
3665
3666   def _EnsureSecondary(self, node):
3667     """Demote a node to secondary.
3668
3669     """
3670     self.feedback_fn("* switching node %s to secondary mode" % node)
3671
3672     for dev in self.instance.disks:
3673       self.cfg.SetDiskID(dev, node)
3674
3675     result = self.rpc.call_blockdev_close(node, self.instance.name,
3676                                           self.instance.disks)
3677     msg = result.RemoteFailMsg()
3678     if msg:
3679       raise errors.OpExecError("Cannot change disk to secondary on node %s,"
3680                                " error %s" % (node, msg))
3681
3682   def _GoStandalone(self):
3683     """Disconnect from the network.
3684
3685     """
3686     self.feedback_fn("* changing into standalone mode")
3687     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
3688                                                self.instance.disks)
3689     for node, nres in result.items():
3690       msg = nres.RemoteFailMsg()
3691       if msg:
3692         raise errors.OpExecError("Cannot disconnect disks node %s,"
3693                                  " error %s" % (node, msg))
3694
3695   def _GoReconnect(self, multimaster):
3696     """Reconnect to the network.
3697
3698     """
3699     if multimaster:
3700       msg = "dual-master"
3701     else:
3702       msg = "single-master"
3703     self.feedback_fn("* changing disks into %s mode" % msg)
3704     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
3705                                            self.instance.disks,
3706                                            self.instance.name, multimaster)
3707     for node, nres in result.items():
3708       msg = nres.RemoteFailMsg()
3709       if msg:
3710         raise errors.OpExecError("Cannot change disks config on node %s,"
3711                                  " error: %s" % (node, msg))
3712
3713   def _ExecCleanup(self):
3714     """Try to cleanup after a failed migration.
3715
3716     The cleanup is done by:
3717       - check that the instance is running only on one node
3718         (and update the config if needed)
3719       - change disks on its secondary node to secondary
3720       - wait until disks are fully synchronized
3721       - disconnect from the network
3722       - change disks into single-master mode
3723       - wait again until disks are fully synchronized
3724
3725     """
3726     instance = self.instance
3727     target_node = self.target_node
3728     source_node = self.source_node
3729
3730     # check running on only one node
3731     self.feedback_fn("* checking where the instance actually runs"
3732                      " (if this hangs, the hypervisor might be in"
3733                      " a bad state)")
3734     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
3735     for node, result in ins_l.items():
3736       result.Raise()
3737       if not isinstance(result.data, list):
3738         raise errors.OpExecError("Can't contact node '%s'" % node)
3739
3740     runningon_source = instance.name in ins_l[source_node].data
3741     runningon_target = instance.name in ins_l[target_node].data
3742
3743     if runningon_source and runningon_target:
3744       raise errors.OpExecError("Instance seems to be running on two nodes,"
3745                                " or the hypervisor is confused. You will have"
3746                                " to ensure manually that it runs only on one"
3747                                " and restart this operation.")
3748
3749     if not (runningon_source or runningon_target):
3750       raise errors.OpExecError("Instance does not seem to be running at all."
3751                                " In this case, it's safer to repair by"
3752                                " running 'gnt-instance stop' to ensure disk"
3753                                " shutdown, and then restarting it.")
3754
3755     if runningon_target:
3756       # the migration has actually succeeded, we need to update the config
3757       self.feedback_fn("* instance running on secondary node (%s),"
3758                        " updating config" % target_node)
3759       instance.primary_node = target_node
3760       self.cfg.Update(instance)
3761       demoted_node = source_node
3762     else:
3763       self.feedback_fn("* instance confirmed to be running on its"
3764                        " primary node (%s)" % source_node)
3765       demoted_node = target_node
3766
3767     self._EnsureSecondary(demoted_node)
3768     try:
3769       self._WaitUntilSync()
3770     except errors.OpExecError:
3771       # we ignore here errors, since if the device is standalone, it
3772       # won't be able to sync
3773       pass
3774     self._GoStandalone()
3775     self._GoReconnect(False)
3776     self._WaitUntilSync()
3777
3778     self.feedback_fn("* done")
3779
3780   def _RevertDiskStatus(self):
3781     """Try to revert the disk status after a failed migration.
3782
3783     """
3784     target_node = self.target_node
3785     try:
3786       self._EnsureSecondary(target_node)
3787       self._GoStandalone()
3788       self._GoReconnect(False)
3789       self._WaitUntilSync()
3790     except errors.OpExecError, err:
3791       self.LogWarning("Migration failed and I can't reconnect the"
3792                       " drives: error '%s'\n"
3793                       "Please look and recover the instance status" %
3794                       str(err))
3795
3796   def _AbortMigration(self):
3797     """Call the hypervisor code to abort a started migration.
3798
3799     """
3800     instance = self.instance
3801     target_node = self.target_node
3802     migration_info = self.migration_info
3803
3804     abort_result = self.rpc.call_finalize_migration(target_node,
3805                                                     instance,
3806                                                     migration_info,
3807                                                     False)
3808     abort_msg = abort_result.RemoteFailMsg()
3809     if abort_msg:
3810       logging.error("Aborting migration failed on target node %s: %s" %
3811                     (target_node, abort_msg))
3812       # Don't raise an exception here, as we stil have to try to revert the
3813       # disk status, even if this step failed.
3814
3815   def _ExecMigration(self):
3816     """Migrate an instance.
3817
3818     The migrate is done by:
3819       - change the disks into dual-master mode
3820       - wait until disks are fully synchronized again
3821       - migrate the instance
3822       - change disks on the new secondary node (the old primary) to secondary
3823       - wait until disks are fully synchronized
3824       - change disks into single-master mode
3825
3826     """
3827     instance = self.instance
3828     target_node = self.target_node
3829     source_node = self.source_node
3830
3831     self.feedback_fn("* checking disk consistency between source and target")
3832     for dev in instance.disks:
3833       if not _CheckDiskConsistency(self, dev, target_node, False):
3834         raise errors.OpExecError("Disk %s is degraded or not fully"
3835                                  " synchronized on target node,"
3836                                  " aborting migrate." % dev.iv_name)
3837
3838     # First get the migration information from the remote node
3839     result = self.rpc.call_migration_info(source_node, instance)
3840     msg = result.RemoteFailMsg()
3841     if msg:
3842       log_err = ("Failed fetching source migration information from %s: %s" %
3843                  (source_node, msg))
3844       logging.error(log_err)
3845       raise errors.OpExecError(log_err)
3846
3847     self.migration_info = migration_info = result.payload
3848
3849     # Then switch the disks to master/master mode
3850     self._EnsureSecondary(target_node)
3851     self._GoStandalone()
3852     self._GoReconnect(True)
3853     self._WaitUntilSync()
3854
3855     self.feedback_fn("* preparing %s to accept the instance" % target_node)
3856     result = self.rpc.call_accept_instance(target_node,
3857                                            instance,
3858                                            migration_info,
3859                                            self.nodes_ip[target_node])
3860
3861     msg = result.RemoteFailMsg()
3862     if msg:
3863       logging.error("Instance pre-migration failed, trying to revert"
3864                     " disk status: %s", msg)
3865       self._AbortMigration()
3866       self._RevertDiskStatus()
3867       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
3868                                (instance.name, msg))
3869
3870     self.feedback_fn("* migrating instance to %s" % target_node)
3871     time.sleep(10)
3872     result = self.rpc.call_instance_migrate(source_node, instance,
3873                                             self.nodes_ip[target_node],
3874                                             self.op.live)
3875     msg = result.RemoteFailMsg()
3876     if msg:
3877       logging.error("Instance migration failed, trying to revert"
3878                     " disk status: %s", msg)
3879       self._AbortMigration()
3880       self._RevertDiskStatus()
3881       raise errors.OpExecError("Could not migrate instance %s: %s" %
3882                                (instance.name, msg))
3883     time.sleep(10)
3884
3885     instance.primary_node = target_node
3886     # distribute new instance config to the other nodes
3887     self.cfg.Update(instance)
3888
3889     result = self.rpc.call_finalize_migration(target_node,
3890                                               instance,
3891                                               migration_info,
3892                                               True)
3893     msg = result.RemoteFailMsg()
3894     if msg:
3895       logging.error("Instance migration succeeded, but finalization failed:"
3896                     " %s" % msg)
3897       raise errors.OpExecError("Could not finalize instance migration: %s" %
3898                                msg)
3899
3900     self._EnsureSecondary(source_node)
3901     self._WaitUntilSync()
3902     self._GoStandalone()
3903     self._GoReconnect(False)
3904     self._WaitUntilSync()
3905
3906     self.feedback_fn("* done")
3907
3908   def Exec(self, feedback_fn):
3909     """Perform the migration.
3910
3911     """
3912     self.feedback_fn = feedback_fn
3913
3914     self.source_node = self.instance.primary_node
3915     self.target_node = self.instance.secondary_nodes[0]
3916     self.all_nodes = [self.source_node, self.target_node]
3917     self.nodes_ip = {
3918       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
3919       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
3920       }
3921     if self.op.cleanup:
3922       return self._ExecCleanup()
3923     else:
3924       return self._ExecMigration()
3925
3926
3927 def _CreateBlockDev(lu, node, instance, device, force_create,
3928                     info, force_open):
3929   """Create a tree of block devices on a given node.
3930
3931   If this device type has to be created on secondaries, create it and
3932   all its children.
3933
3934   If not, just recurse to children keeping the same 'force' value.
3935
3936   @param lu: the lu on whose behalf we execute
3937   @param node: the node on which to create the device
3938   @type instance: L{objects.Instance}
3939   @param instance: the instance which owns the device
3940   @type device: L{objects.Disk}
3941   @param device: the device to create
3942   @type force_create: boolean
3943   @param force_create: whether to force creation of this device; this
3944       will be change to True whenever we find a device which has
3945       CreateOnSecondary() attribute
3946   @param info: the extra 'metadata' we should attach to the device
3947       (this will be represented as a LVM tag)
3948   @type force_open: boolean
3949   @param force_open: this parameter will be passes to the
3950       L{backend.BlockdevCreate} function where it specifies
3951       whether we run on primary or not, and it affects both
3952       the child assembly and the device own Open() execution
3953
3954   """
3955   if device.CreateOnSecondary():
3956     force_create = True
3957
3958   if device.children:
3959     for child in device.children:
3960       _CreateBlockDev(lu, node, instance, child, force_create,
3961                       info, force_open)
3962
3963   if not force_create:
3964     return
3965
3966   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
3967
3968
3969 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
3970   """Create a single block device on a given node.
3971
3972   This will not recurse over children of the device, so they must be
3973   created in advance.
3974
3975   @param lu: the lu on whose behalf we execute
3976   @param node: the node on which to create the device
3977   @type instance: L{objects.Instance}
3978   @param instance: the instance which owns the device
3979   @type device: L{objects.Disk}
3980   @param device: the device to create
3981   @param info: the extra 'metadata' we should attach to the device
3982       (this will be represented as a LVM tag)
3983   @type force_open: boolean
3984   @param force_open: this parameter will be passes to the
3985       L{backend.BlockdevCreate} function where it specifies
3986       whether we run on primary or not, and it affects both
3987       the child assembly and the device own Open() execution
3988
3989   """
3990   lu.cfg.SetDiskID(device, node)
3991   result = lu.rpc.call_blockdev_create(node, device, device.size,
3992                                        instance.name, force_open, info)
3993   msg = result.RemoteFailMsg()
3994   if msg:
3995     raise errors.OpExecError("Can't create block device %s on"
3996                              " node %s for instance %s: %s" %
3997                              (device, node, instance.name, msg))
3998   if device.physical_id is None:
3999     device.physical_id = result.payload
4000
4001
4002 def _GenerateUniqueNames(lu, exts):
4003   """Generate a suitable LV name.
4004
4005   This will generate a logical volume name for the given instance.
4006
4007   """
4008   results = []
4009   for val in exts:
4010     new_id = lu.cfg.GenerateUniqueID()
4011     results.append("%s%s" % (new_id, val))
4012   return results
4013
4014
4015 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4016                          p_minor, s_minor):
4017   """Generate a drbd8 device complete with its children.
4018
4019   """
4020   port = lu.cfg.AllocatePort()
4021   vgname = lu.cfg.GetVGName()
4022   shared_secret = lu.cfg.GenerateDRBDSecret()
4023   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4024                           logical_id=(vgname, names[0]))
4025   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4026                           logical_id=(vgname, names[1]))
4027   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4028                           logical_id=(primary, secondary, port,
4029                                       p_minor, s_minor,
4030                                       shared_secret),
4031                           children=[dev_data, dev_meta],
4032                           iv_name=iv_name)
4033   return drbd_dev
4034
4035
4036 def _GenerateDiskTemplate(lu, template_name,
4037                           instance_name, primary_node,
4038                           secondary_nodes, disk_info,
4039                           file_storage_dir, file_driver,
4040                           base_index):
4041   """Generate the entire disk layout for a given template type.
4042
4043   """
4044   #TODO: compute space requirements
4045
4046   vgname = lu.cfg.GetVGName()
4047   disk_count = len(disk_info)
4048   disks = []
4049   if template_name == constants.DT_DISKLESS:
4050     pass
4051   elif template_name == constants.DT_PLAIN:
4052     if len(secondary_nodes) != 0:
4053       raise errors.ProgrammerError("Wrong template configuration")
4054
4055     names = _GenerateUniqueNames(lu, [".disk%d" % i
4056                                       for i in range(disk_count)])
4057     for idx, disk in enumerate(disk_info):
4058       disk_index = idx + base_index
4059       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4060                               logical_id=(vgname, names[idx]),
4061                               iv_name="disk/%d" % disk_index,
4062                               mode=disk["mode"])
4063       disks.append(disk_dev)
4064   elif template_name == constants.DT_DRBD8:
4065     if len(secondary_nodes) != 1:
4066       raise errors.ProgrammerError("Wrong template configuration")
4067     remote_node = secondary_nodes[0]
4068     minors = lu.cfg.AllocateDRBDMinor(
4069       [primary_node, remote_node] * len(disk_info), instance_name)
4070
4071     names = []
4072     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % i
4073                                                for i in range(disk_count)]):
4074       names.append(lv_prefix + "_data")
4075       names.append(lv_prefix + "_meta")
4076     for idx, disk in enumerate(disk_info):
4077       disk_index = idx + base_index
4078       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4079                                       disk["size"], names[idx*2:idx*2+2],
4080                                       "disk/%d" % disk_index,
4081                                       minors[idx*2], minors[idx*2+1])
4082       disk_dev.mode = disk["mode"]
4083       disks.append(disk_dev)
4084   elif template_name == constants.DT_FILE:
4085     if len(secondary_nodes) != 0:
4086       raise errors.ProgrammerError("Wrong template configuration")
4087
4088     for idx, disk in enumerate(disk_info):
4089       disk_index = idx + base_index
4090       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4091                               iv_name="disk/%d" % disk_index,
4092                               logical_id=(file_driver,
4093                                           "%s/disk%d" % (file_storage_dir,
4094                                                          disk_index)),
4095                               mode=disk["mode"])
4096       disks.append(disk_dev)
4097   else:
4098     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4099   return disks
4100
4101
4102 def _GetInstanceInfoText(instance):
4103   """Compute that text that should be added to the disk's metadata.
4104
4105   """
4106   return "originstname+%s" % instance.name
4107
4108
4109 def _CreateDisks(lu, instance):
4110   """Create all disks for an instance.
4111
4112   This abstracts away some work from AddInstance.
4113
4114   @type lu: L{LogicalUnit}
4115   @param lu: the logical unit on whose behalf we execute
4116   @type instance: L{objects.Instance}
4117   @param instance: the instance whose disks we should create
4118   @rtype: boolean
4119   @return: the success of the creation
4120
4121   """
4122   info = _GetInstanceInfoText(instance)
4123   pnode = instance.primary_node
4124
4125   if instance.disk_template == constants.DT_FILE:
4126     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4127     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4128
4129     if result.failed or not result.data:
4130       raise errors.OpExecError("Could not connect to node '%s'" % pnode)
4131
4132     if not result.data[0]:
4133       raise errors.OpExecError("Failed to create directory '%s'" %
4134                                file_storage_dir)
4135
4136   # Note: this needs to be kept in sync with adding of disks in
4137   # LUSetInstanceParams
4138   for device in instance.disks:
4139     logging.info("Creating volume %s for instance %s",
4140                  device.iv_name, instance.name)
4141     #HARDCODE
4142     for node in instance.all_nodes:
4143       f_create = node == pnode
4144       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4145
4146
4147 def _RemoveDisks(lu, instance):
4148   """Remove all disks for an instance.
4149
4150   This abstracts away some work from `AddInstance()` and
4151   `RemoveInstance()`. Note that in case some of the devices couldn't
4152   be removed, the removal will continue with the other ones (compare
4153   with `_CreateDisks()`).
4154
4155   @type lu: L{LogicalUnit}
4156   @param lu: the logical unit on whose behalf we execute
4157   @type instance: L{objects.Instance}
4158   @param instance: the instance whose disks we should remove
4159   @rtype: boolean
4160   @return: the success of the removal
4161
4162   """
4163   logging.info("Removing block devices for instance %s", instance.name)
4164
4165   all_result = True
4166   for device in instance.disks:
4167     for node, disk in device.ComputeNodeTree(instance.primary_node):
4168       lu.cfg.SetDiskID(disk, node)
4169       msg = lu.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
4170       if msg:
4171         lu.LogWarning("Could not remove block device %s on node %s,"
4172                       " continuing anyway: %s", device.iv_name, node, msg)
4173         all_result = False
4174
4175   if instance.disk_template == constants.DT_FILE:
4176     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4177     result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4178                                                  file_storage_dir)
4179     if result.failed or not result.data:
4180       logging.error("Could not remove directory '%s'", file_storage_dir)
4181       all_result = False
4182
4183   return all_result
4184
4185
4186 def _ComputeDiskSize(disk_template, disks):
4187   """Compute disk size requirements in the volume group
4188
4189   """
4190   # Required free disk space as a function of disk and swap space
4191   req_size_dict = {
4192     constants.DT_DISKLESS: None,
4193     constants.DT_PLAIN: sum(d["size"] for d in disks),
4194     # 128 MB are added for drbd metadata for each disk
4195     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4196     constants.DT_FILE: None,
4197   }
4198
4199   if disk_template not in req_size_dict:
4200     raise errors.ProgrammerError("Disk template '%s' size requirement"
4201                                  " is unknown" %  disk_template)
4202
4203   return req_size_dict[disk_template]
4204
4205
4206 def _CheckHVParams(lu, nodenames, hvname, hvparams):
4207   """Hypervisor parameter validation.
4208
4209   This function abstract the hypervisor parameter validation to be
4210   used in both instance create and instance modify.
4211
4212   @type lu: L{LogicalUnit}
4213   @param lu: the logical unit for which we check
4214   @type nodenames: list
4215   @param nodenames: the list of nodes on which we should check
4216   @type hvname: string
4217   @param hvname: the name of the hypervisor we should use
4218   @type hvparams: dict
4219   @param hvparams: the parameters which we need to check
4220   @raise errors.OpPrereqError: if the parameters are not valid
4221
4222   """
4223   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4224                                                   hvname,
4225                                                   hvparams)
4226   for node in nodenames:
4227     info = hvinfo[node]
4228     if info.offline:
4229       continue
4230     msg = info.RemoteFailMsg()
4231     if msg:
4232       raise errors.OpPrereqError("Hypervisor parameter validation failed:"
4233                                  " %s" % msg)
4234
4235
4236 class LUCreateInstance(LogicalUnit):
4237   """Create an instance.
4238
4239   """
4240   HPATH = "instance-add"
4241   HTYPE = constants.HTYPE_INSTANCE
4242   _OP_REQP = ["instance_name", "disks", "disk_template",
4243               "mode", "start",
4244               "wait_for_sync", "ip_check", "nics",
4245               "hvparams", "beparams"]
4246   REQ_BGL = False
4247
4248   def _ExpandNode(self, node):
4249     """Expands and checks one node name.
4250
4251     """
4252     node_full = self.cfg.ExpandNodeName(node)
4253     if node_full is None:
4254       raise errors.OpPrereqError("Unknown node %s" % node)
4255     return node_full
4256
4257   def ExpandNames(self):
4258     """ExpandNames for CreateInstance.
4259
4260     Figure out the right locks for instance creation.
4261
4262     """
4263     self.needed_locks = {}
4264
4265     # set optional parameters to none if they don't exist
4266     for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4267       if not hasattr(self.op, attr):
4268         setattr(self.op, attr, None)
4269
4270     # cheap checks, mostly valid constants given
4271
4272     # verify creation mode
4273     if self.op.mode not in (constants.INSTANCE_CREATE,
4274                             constants.INSTANCE_IMPORT):
4275       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4276                                  self.op.mode)
4277
4278     # disk template and mirror node verification
4279     if self.op.disk_template not in constants.DISK_TEMPLATES:
4280       raise errors.OpPrereqError("Invalid disk template name")
4281
4282     if self.op.hypervisor is None:
4283       self.op.hypervisor = self.cfg.GetHypervisorType()
4284
4285     cluster = self.cfg.GetClusterInfo()
4286     enabled_hvs = cluster.enabled_hypervisors
4287     if self.op.hypervisor not in enabled_hvs:
4288       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4289                                  " cluster (%s)" % (self.op.hypervisor,
4290                                   ",".join(enabled_hvs)))
4291
4292     # check hypervisor parameter syntax (locally)
4293     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4294     filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor],
4295                                   self.op.hvparams)
4296     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4297     hv_type.CheckParameterSyntax(filled_hvp)
4298
4299     # fill and remember the beparams dict
4300     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4301     self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
4302                                     self.op.beparams)
4303
4304     #### instance parameters check
4305
4306     # instance name verification
4307     hostname1 = utils.HostInfo(self.op.instance_name)
4308     self.op.instance_name = instance_name = hostname1.name
4309
4310     # this is just a preventive check, but someone might still add this
4311     # instance in the meantime, and creation will fail at lock-add time
4312     if instance_name in self.cfg.GetInstanceList():
4313       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4314                                  instance_name)
4315
4316     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4317
4318     # NIC buildup
4319     self.nics = []
4320     for nic in self.op.nics:
4321       # ip validity checks
4322       ip = nic.get("ip", None)
4323       if ip is None or ip.lower() == "none":
4324         nic_ip = None
4325       elif ip.lower() == constants.VALUE_AUTO:
4326         nic_ip = hostname1.ip
4327       else:
4328         if not utils.IsValidIP(ip):
4329           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4330                                      " like a valid IP" % ip)
4331         nic_ip = ip
4332
4333       # MAC address verification
4334       mac = nic.get("mac", constants.VALUE_AUTO)
4335       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4336         if not utils.IsValidMac(mac.lower()):
4337           raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4338                                      mac)
4339       # bridge verification
4340       bridge = nic.get("bridge", None)
4341       if bridge is None:
4342         bridge = self.cfg.GetDefBridge()
4343       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge))
4344
4345     # disk checks/pre-build
4346     self.disks = []
4347     for disk in self.op.disks:
4348       mode = disk.get("mode", constants.DISK_RDWR)
4349       if mode not in constants.DISK_ACCESS_SET:
4350         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4351                                    mode)
4352       size = disk.get("size", None)
4353       if size is None:
4354         raise errors.OpPrereqError("Missing disk size")
4355       try:
4356         size = int(size)
4357       except ValueError:
4358         raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4359       self.disks.append({"size": size, "mode": mode})
4360
4361     # used in CheckPrereq for ip ping check
4362     self.check_ip = hostname1.ip
4363
4364     # file storage checks
4365     if (self.op.file_driver and
4366         not self.op.file_driver in constants.FILE_DRIVER):
4367       raise errors.OpPrereqError("Invalid file driver name '%s'" %
4368                                  self.op.file_driver)
4369
4370     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4371       raise errors.OpPrereqError("File storage directory path not absolute")
4372
4373     ### Node/iallocator related checks
4374     if [self.op.iallocator, self.op.pnode].count(None) != 1:
4375       raise errors.OpPrereqError("One and only one of iallocator and primary"
4376                                  " node must be given")
4377
4378     if self.op.iallocator:
4379       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4380     else:
4381       self.op.pnode = self._ExpandNode(self.op.pnode)
4382       nodelist = [self.op.pnode]
4383       if self.op.snode is not None:
4384         self.op.snode = self._ExpandNode(self.op.snode)
4385         nodelist.append(self.op.snode)
4386       self.needed_locks[locking.LEVEL_NODE] = nodelist
4387
4388     # in case of import lock the source node too
4389     if self.op.mode == constants.INSTANCE_IMPORT:
4390       src_node = getattr(self.op, "src_node", None)
4391       src_path = getattr(self.op, "src_path", None)
4392
4393       if src_path is None:
4394         self.op.src_path = src_path = self.op.instance_name
4395
4396       if src_node is None:
4397         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4398         self.op.src_node = None
4399         if os.path.isabs(src_path):
4400           raise errors.OpPrereqError("Importing an instance from an absolute"
4401                                      " path requires a source node option.")
4402       else:
4403         self.op.src_node = src_node = self._ExpandNode(src_node)
4404         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4405           self.needed_locks[locking.LEVEL_NODE].append(src_node)
4406         if not os.path.isabs(src_path):
4407           self.op.src_path = src_path = \
4408             os.path.join(constants.EXPORT_DIR, src_path)
4409
4410     else: # INSTANCE_CREATE
4411       if getattr(self.op, "os_type", None) is None:
4412         raise errors.OpPrereqError("No guest OS specified")
4413
4414   def _RunAllocator(self):
4415     """Run the allocator based on input opcode.
4416
4417     """
4418     nics = [n.ToDict() for n in self.nics]
4419     ial = IAllocator(self,
4420                      mode=constants.IALLOCATOR_MODE_ALLOC,
4421                      name=self.op.instance_name,
4422                      disk_template=self.op.disk_template,
4423                      tags=[],
4424                      os=self.op.os_type,
4425                      vcpus=self.be_full[constants.BE_VCPUS],
4426                      mem_size=self.be_full[constants.BE_MEMORY],
4427                      disks=self.disks,
4428                      nics=nics,
4429                      hypervisor=self.op.hypervisor,
4430                      )
4431
4432     ial.Run(self.op.iallocator)
4433
4434     if not ial.success:
4435       raise errors.OpPrereqError("Can't compute nodes using"
4436                                  " iallocator '%s': %s" % (self.op.iallocator,
4437                                                            ial.info))
4438     if len(ial.nodes) != ial.required_nodes:
4439       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4440                                  " of nodes (%s), required %s" %
4441                                  (self.op.iallocator, len(ial.nodes),
4442                                   ial.required_nodes))
4443     self.op.pnode = ial.nodes[0]
4444     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4445                  self.op.instance_name, self.op.iallocator,
4446                  ", ".join(ial.nodes))
4447     if ial.required_nodes == 2:
4448       self.op.snode = ial.nodes[1]
4449
4450   def BuildHooksEnv(self):
4451     """Build hooks env.
4452
4453     This runs on master, primary and secondary nodes of the instance.
4454
4455     """
4456     env = {
4457       "ADD_MODE": self.op.mode,
4458       }
4459     if self.op.mode == constants.INSTANCE_IMPORT:
4460       env["SRC_NODE"] = self.op.src_node
4461       env["SRC_PATH"] = self.op.src_path
4462       env["SRC_IMAGES"] = self.src_images
4463
4464     env.update(_BuildInstanceHookEnv(
4465       name=self.op.instance_name,
4466       primary_node=self.op.pnode,
4467       secondary_nodes=self.secondaries,
4468       status=self.op.start,
4469       os_type=self.op.os_type,
4470       memory=self.be_full[constants.BE_MEMORY],
4471       vcpus=self.be_full[constants.BE_VCPUS],
4472       nics=[(n.ip, n.bridge, n.mac) for n in self.nics],
4473       disk_template=self.op.disk_template,
4474       disks=[(d["size"], d["mode"]) for d in self.disks],
4475     ))
4476
4477     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4478           self.secondaries)
4479     return env, nl, nl
4480
4481
4482   def CheckPrereq(self):
4483     """Check prerequisites.
4484
4485     """
4486     if (not self.cfg.GetVGName() and
4487         self.op.disk_template not in constants.DTS_NOT_LVM):
4488       raise errors.OpPrereqError("Cluster does not support lvm-based"
4489                                  " instances")
4490
4491     if self.op.mode == constants.INSTANCE_IMPORT:
4492       src_node = self.op.src_node
4493       src_path = self.op.src_path
4494
4495       if src_node is None:
4496         exp_list = self.rpc.call_export_list(
4497           self.acquired_locks[locking.LEVEL_NODE])
4498         found = False
4499         for node in exp_list:
4500           if not exp_list[node].failed and src_path in exp_list[node].data:
4501             found = True
4502             self.op.src_node = src_node = node
4503             self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4504                                                        src_path)
4505             break
4506         if not found:
4507           raise errors.OpPrereqError("No export found for relative path %s" %
4508                                       src_path)
4509
4510       _CheckNodeOnline(self, src_node)
4511       result = self.rpc.call_export_info(src_node, src_path)
4512       result.Raise()
4513       if not result.data:
4514         raise errors.OpPrereqError("No export found in dir %s" % src_path)
4515
4516       export_info = result.data
4517       if not export_info.has_section(constants.INISECT_EXP):
4518         raise errors.ProgrammerError("Corrupted export config")
4519
4520       ei_version = export_info.get(constants.INISECT_EXP, 'version')
4521       if (int(ei_version) != constants.EXPORT_VERSION):
4522         raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4523                                    (ei_version, constants.EXPORT_VERSION))
4524
4525       # Check that the new instance doesn't have less disks than the export
4526       instance_disks = len(self.disks)
4527       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4528       if instance_disks < export_disks:
4529         raise errors.OpPrereqError("Not enough disks to import."
4530                                    " (instance: %d, export: %d)" %
4531                                    (instance_disks, export_disks))
4532
4533       self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4534       disk_images = []
4535       for idx in range(export_disks):
4536         option = 'disk%d_dump' % idx
4537         if export_info.has_option(constants.INISECT_INS, option):
4538           # FIXME: are the old os-es, disk sizes, etc. useful?
4539           export_name = export_info.get(constants.INISECT_INS, option)
4540           image = os.path.join(src_path, export_name)
4541           disk_images.append(image)
4542         else:
4543           disk_images.append(False)
4544
4545       self.src_images = disk_images
4546
4547       old_name = export_info.get(constants.INISECT_INS, 'name')
4548       # FIXME: int() here could throw a ValueError on broken exports
4549       exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4550       if self.op.instance_name == old_name:
4551         for idx, nic in enumerate(self.nics):
4552           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4553             nic_mac_ini = 'nic%d_mac' % idx
4554             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4555
4556     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4557     # ip ping checks (we use the same ip that was resolved in ExpandNames)
4558     if self.op.start and not self.op.ip_check:
4559       raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4560                                  " adding an instance in start mode")
4561
4562     if self.op.ip_check:
4563       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4564         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4565                                    (self.check_ip, self.op.instance_name))
4566
4567     #### mac address generation
4568     # By generating here the mac address both the allocator and the hooks get
4569     # the real final mac address rather than the 'auto' or 'generate' value.
4570     # There is a race condition between the generation and the instance object
4571     # creation, which means that we know the mac is valid now, but we're not
4572     # sure it will be when we actually add the instance. If things go bad
4573     # adding the instance will abort because of a duplicate mac, and the
4574     # creation job will fail.
4575     for nic in self.nics:
4576       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4577         nic.mac = self.cfg.GenerateMAC()
4578
4579     #### allocator run
4580
4581     if self.op.iallocator is not None:
4582       self._RunAllocator()
4583
4584     #### node related checks
4585
4586     # check primary node
4587     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4588     assert self.pnode is not None, \
4589       "Cannot retrieve locked node %s" % self.op.pnode
4590     if pnode.offline:
4591       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4592                                  pnode.name)
4593     if pnode.drained:
4594       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4595                                  pnode.name)
4596
4597     self.secondaries = []
4598
4599     # mirror node verification
4600     if self.op.disk_template in constants.DTS_NET_MIRROR:
4601       if self.op.snode is None:
4602         raise errors.OpPrereqError("The networked disk templates need"
4603                                    " a mirror node")
4604       if self.op.snode == pnode.name:
4605         raise errors.OpPrereqError("The secondary node cannot be"
4606                                    " the primary node.")
4607       _CheckNodeOnline(self, self.op.snode)
4608       _CheckNodeNotDrained(self, self.op.snode)
4609       self.secondaries.append(self.op.snode)
4610
4611     nodenames = [pnode.name] + self.secondaries
4612
4613     req_size = _ComputeDiskSize(self.op.disk_template,
4614                                 self.disks)
4615
4616     # Check lv size requirements
4617     if req_size is not None:
4618       nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4619                                          self.op.hypervisor)
4620       for node in nodenames:
4621         info = nodeinfo[node]
4622         info.Raise()
4623         info = info.data
4624         if not info:
4625           raise errors.OpPrereqError("Cannot get current information"
4626                                      " from node '%s'" % node)
4627         vg_free = info.get('vg_free', None)
4628         if not isinstance(vg_free, int):
4629           raise errors.OpPrereqError("Can't compute free disk space on"
4630                                      " node %s" % node)
4631         if req_size > info['vg_free']:
4632           raise errors.OpPrereqError("Not enough disk space on target node %s."
4633                                      " %d MB available, %d MB required" %
4634                                      (node, info['vg_free'], req_size))
4635
4636     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4637
4638     # os verification
4639     result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4640     result.Raise()
4641     if not isinstance(result.data, objects.OS):
4642       raise errors.OpPrereqError("OS '%s' not in supported os list for"
4643                                  " primary node"  % self.op.os_type)
4644
4645     # bridge check on primary node
4646     bridges = [n.bridge for n in self.nics]
4647     result = self.rpc.call_bridges_exist(self.pnode.name, bridges)
4648     result.Raise()
4649     if not result.data:
4650       raise errors.OpPrereqError("One of the target bridges '%s' does not"
4651                                  " exist on destination node '%s'" %
4652                                  (",".join(bridges), pnode.name))
4653
4654     # memory check on primary node
4655     if self.op.start:
4656       _CheckNodeFreeMemory(self, self.pnode.name,
4657                            "creating instance %s" % self.op.instance_name,
4658                            self.be_full[constants.BE_MEMORY],
4659                            self.op.hypervisor)
4660
4661   def Exec(self, feedback_fn):
4662     """Create and add the instance to the cluster.
4663
4664     """
4665     instance = self.op.instance_name
4666     pnode_name = self.pnode.name
4667
4668     ht_kind = self.op.hypervisor
4669     if ht_kind in constants.HTS_REQ_PORT:
4670       network_port = self.cfg.AllocatePort()
4671     else:
4672       network_port = None
4673
4674     ##if self.op.vnc_bind_address is None:
4675     ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
4676
4677     # this is needed because os.path.join does not accept None arguments
4678     if self.op.file_storage_dir is None:
4679       string_file_storage_dir = ""
4680     else:
4681       string_file_storage_dir = self.op.file_storage_dir
4682
4683     # build the full file storage dir path
4684     file_storage_dir = os.path.normpath(os.path.join(
4685                                         self.cfg.GetFileStorageDir(),
4686                                         string_file_storage_dir, instance))
4687
4688
4689     disks = _GenerateDiskTemplate(self,
4690                                   self.op.disk_template,
4691                                   instance, pnode_name,
4692                                   self.secondaries,
4693                                   self.disks,
4694                                   file_storage_dir,
4695                                   self.op.file_driver,
4696                                   0)
4697
4698     iobj = objects.Instance(name=instance, os=self.op.os_type,
4699                             primary_node=pnode_name,
4700                             nics=self.nics, disks=disks,
4701                             disk_template=self.op.disk_template,
4702                             admin_up=False,
4703                             network_port=network_port,
4704                             beparams=self.op.beparams,
4705                             hvparams=self.op.hvparams,
4706                             hypervisor=self.op.hypervisor,
4707                             )
4708
4709     feedback_fn("* creating instance disks...")
4710     try:
4711       _CreateDisks(self, iobj)
4712     except errors.OpExecError:
4713       self.LogWarning("Device creation failed, reverting...")
4714       try:
4715         _RemoveDisks(self, iobj)
4716       finally:
4717         self.cfg.ReleaseDRBDMinors(instance)
4718         raise
4719
4720     feedback_fn("adding instance %s to cluster config" % instance)
4721
4722     self.cfg.AddInstance(iobj)
4723     # Declare that we don't want to remove the instance lock anymore, as we've
4724     # added the instance to the config
4725     del self.remove_locks[locking.LEVEL_INSTANCE]
4726     # Unlock all the nodes
4727     if self.op.mode == constants.INSTANCE_IMPORT:
4728       nodes_keep = [self.op.src_node]
4729       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
4730                        if node != self.op.src_node]
4731       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
4732       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
4733     else:
4734       self.context.glm.release(locking.LEVEL_NODE)
4735       del self.acquired_locks[locking.LEVEL_NODE]
4736
4737     if self.op.wait_for_sync:
4738       disk_abort = not _WaitForSync(self, iobj)
4739     elif iobj.disk_template in constants.DTS_NET_MIRROR:
4740       # make sure the disks are not degraded (still sync-ing is ok)
4741       time.sleep(15)
4742       feedback_fn("* checking mirrors status")
4743       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
4744     else:
4745       disk_abort = False
4746
4747     if disk_abort:
4748       _RemoveDisks(self, iobj)
4749       self.cfg.RemoveInstance(iobj.name)
4750       # Make sure the instance lock gets removed
4751       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
4752       raise errors.OpExecError("There are some degraded disks for"
4753                                " this instance")
4754
4755     feedback_fn("creating os for instance %s on node %s" %
4756                 (instance, pnode_name))
4757
4758     if iobj.disk_template != constants.DT_DISKLESS:
4759       if self.op.mode == constants.INSTANCE_CREATE:
4760         feedback_fn("* running the instance OS create scripts...")
4761         result = self.rpc.call_instance_os_add(pnode_name, iobj)
4762         msg = result.RemoteFailMsg()
4763         if msg:
4764           raise errors.OpExecError("Could not add os for instance %s"
4765                                    " on node %s: %s" %
4766                                    (instance, pnode_name, msg))
4767
4768       elif self.op.mode == constants.INSTANCE_IMPORT:
4769         feedback_fn("* running the instance OS import scripts...")
4770         src_node = self.op.src_node
4771         src_images = self.src_images
4772         cluster_name = self.cfg.GetClusterName()
4773         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
4774                                                          src_node, src_images,
4775                                                          cluster_name)
4776         import_result.Raise()
4777         for idx, result in enumerate(import_result.data):
4778           if not result:
4779             self.LogWarning("Could not import the image %s for instance"
4780                             " %s, disk %d, on node %s" %
4781                             (src_images[idx], instance, idx, pnode_name))
4782       else:
4783         # also checked in the prereq part
4784         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
4785                                      % self.op.mode)
4786
4787     if self.op.start:
4788       iobj.admin_up = True
4789       self.cfg.Update(iobj)
4790       logging.info("Starting instance %s on node %s", instance, pnode_name)
4791       feedback_fn("* starting instance...")
4792       result = self.rpc.call_instance_start(pnode_name, iobj)
4793       msg = result.RemoteFailMsg()
4794       if msg:
4795         raise errors.OpExecError("Could not start instance: %s" % msg)
4796
4797
4798 class LUConnectConsole(NoHooksLU):
4799   """Connect to an instance's console.
4800
4801   This is somewhat special in that it returns the command line that
4802   you need to run on the master node in order to connect to the
4803   console.
4804
4805   """
4806   _OP_REQP = ["instance_name"]
4807   REQ_BGL = False
4808
4809   def ExpandNames(self):
4810     self._ExpandAndLockInstance()
4811
4812   def CheckPrereq(self):
4813     """Check prerequisites.
4814
4815     This checks that the instance is in the cluster.
4816
4817     """
4818     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4819     assert self.instance is not None, \
4820       "Cannot retrieve locked instance %s" % self.op.instance_name
4821     _CheckNodeOnline(self, self.instance.primary_node)
4822
4823   def Exec(self, feedback_fn):
4824     """Connect to the console of an instance
4825
4826     """
4827     instance = self.instance
4828     node = instance.primary_node
4829
4830     node_insts = self.rpc.call_instance_list([node],
4831                                              [instance.hypervisor])[node]
4832     node_insts.Raise()
4833
4834     if instance.name not in node_insts.data:
4835       raise errors.OpExecError("Instance %s is not running." % instance.name)
4836
4837     logging.debug("Connecting to console of %s on %s", instance.name, node)
4838
4839     hyper = hypervisor.GetHypervisor(instance.hypervisor)
4840     cluster = self.cfg.GetClusterInfo()
4841     # beparams and hvparams are passed separately, to avoid editing the
4842     # instance and then saving the defaults in the instance itself.
4843     hvparams = cluster.FillHV(instance)
4844     beparams = cluster.FillBE(instance)
4845     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
4846
4847     # build ssh cmdline
4848     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
4849
4850
4851 class LUReplaceDisks(LogicalUnit):
4852   """Replace the disks of an instance.
4853
4854   """
4855   HPATH = "mirrors-replace"
4856   HTYPE = constants.HTYPE_INSTANCE
4857   _OP_REQP = ["instance_name", "mode", "disks"]
4858   REQ_BGL = False
4859
4860   def CheckArguments(self):
4861     if not hasattr(self.op, "remote_node"):
4862       self.op.remote_node = None
4863     if not hasattr(self.op, "iallocator"):
4864       self.op.iallocator = None
4865
4866     # check for valid parameter combination
4867     cnt = [self.op.remote_node, self.op.iallocator].count(None)
4868     if self.op.mode == constants.REPLACE_DISK_CHG:
4869       if cnt == 2:
4870         raise errors.OpPrereqError("When changing the secondary either an"
4871                                    " iallocator script must be used or the"
4872                                    " new node given")
4873       elif cnt == 0:
4874         raise errors.OpPrereqError("Give either the iallocator or the new"
4875                                    " secondary, not both")
4876     else: # not replacing the secondary
4877       if cnt != 2:
4878         raise errors.OpPrereqError("The iallocator and new node options can"
4879                                    " be used only when changing the"
4880                                    " secondary node")
4881
4882   def ExpandNames(self):
4883     self._ExpandAndLockInstance()
4884
4885     if self.op.iallocator is not None:
4886       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4887     elif self.op.remote_node is not None:
4888       remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
4889       if remote_node is None:
4890         raise errors.OpPrereqError("Node '%s' not known" %
4891                                    self.op.remote_node)
4892       self.op.remote_node = remote_node
4893       # Warning: do not remove the locking of the new secondary here
4894       # unless DRBD8.AddChildren is changed to work in parallel;
4895       # currently it doesn't since parallel invocations of
4896       # FindUnusedMinor will conflict
4897       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
4898       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4899     else:
4900       self.needed_locks[locking.LEVEL_NODE] = []
4901       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4902
4903   def DeclareLocks(self, level):
4904     # If we're not already locking all nodes in the set we have to declare the
4905     # instance's primary/secondary nodes.
4906     if (level == locking.LEVEL_NODE and
4907         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
4908       self._LockInstancesNodes()
4909
4910   def _RunAllocator(self):
4911     """Compute a new secondary node using an IAllocator.
4912
4913     """
4914     ial = IAllocator(self,
4915                      mode=constants.IALLOCATOR_MODE_RELOC,
4916                      name=self.op.instance_name,
4917                      relocate_from=[self.sec_node])
4918
4919     ial.Run(self.op.iallocator)
4920
4921     if not ial.success:
4922       raise errors.OpPrereqError("Can't compute nodes using"
4923                                  " iallocator '%s': %s" % (self.op.iallocator,
4924                                                            ial.info))
4925     if len(ial.nodes) != ial.required_nodes:
4926       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4927                                  " of nodes (%s), required %s" %
4928                                  (len(ial.nodes), ial.required_nodes))
4929     self.op.remote_node = ial.nodes[0]
4930     self.LogInfo("Selected new secondary for the instance: %s",
4931                  self.op.remote_node)
4932
4933   def BuildHooksEnv(self):
4934     """Build hooks env.
4935
4936     This runs on the master, the primary and all the secondaries.
4937
4938     """
4939     env = {
4940       "MODE": self.op.mode,
4941       "NEW_SECONDARY": self.op.remote_node,
4942       "OLD_SECONDARY": self.instance.secondary_nodes[0],
4943       }
4944     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4945     nl = [
4946       self.cfg.GetMasterNode(),
4947       self.instance.primary_node,
4948       ]
4949     if self.op.remote_node is not None:
4950       nl.append(self.op.remote_node)
4951     return env, nl, nl
4952
4953   def CheckPrereq(self):
4954     """Check prerequisites.
4955
4956     This checks that the instance is in the cluster.
4957
4958     """
4959     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4960     assert instance is not None, \
4961       "Cannot retrieve locked instance %s" % self.op.instance_name
4962     self.instance = instance
4963
4964     if instance.disk_template != constants.DT_DRBD8:
4965       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
4966                                  " instances")
4967
4968     if len(instance.secondary_nodes) != 1:
4969       raise errors.OpPrereqError("The instance has a strange layout,"
4970                                  " expected one secondary but found %d" %
4971                                  len(instance.secondary_nodes))
4972
4973     self.sec_node = instance.secondary_nodes[0]
4974
4975     if self.op.iallocator is not None:
4976       self._RunAllocator()
4977
4978     remote_node = self.op.remote_node
4979     if remote_node is not None:
4980       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
4981       assert self.remote_node_info is not None, \
4982         "Cannot retrieve locked node %s" % remote_node
4983     else:
4984       self.remote_node_info = None
4985     if remote_node == instance.primary_node:
4986       raise errors.OpPrereqError("The specified node is the primary node of"
4987                                  " the instance.")
4988     elif remote_node == self.sec_node:
4989       raise errors.OpPrereqError("The specified node is already the"
4990                                  " secondary node of the instance.")
4991
4992     if self.op.mode == constants.REPLACE_DISK_PRI:
4993       n1 = self.tgt_node = instance.primary_node
4994       n2 = self.oth_node = self.sec_node
4995     elif self.op.mode == constants.REPLACE_DISK_SEC:
4996       n1 = self.tgt_node = self.sec_node
4997       n2 = self.oth_node = instance.primary_node
4998     elif self.op.mode == constants.REPLACE_DISK_CHG:
4999       n1 = self.new_node = remote_node
5000       n2 = self.oth_node = instance.primary_node
5001       self.tgt_node = self.sec_node
5002       _CheckNodeNotDrained(self, remote_node)
5003     else:
5004       raise errors.ProgrammerError("Unhandled disk replace mode")
5005
5006     _CheckNodeOnline(self, n1)
5007     _CheckNodeOnline(self, n2)
5008
5009     if not self.op.disks:
5010       self.op.disks = range(len(instance.disks))
5011
5012     for disk_idx in self.op.disks:
5013       instance.FindDisk(disk_idx)
5014
5015   def _ExecD8DiskOnly(self, feedback_fn):
5016     """Replace a disk on the primary or secondary for dbrd8.
5017
5018     The algorithm for replace is quite complicated:
5019
5020       1. for each disk to be replaced:
5021
5022         1. create new LVs on the target node with unique names
5023         1. detach old LVs from the drbd device
5024         1. rename old LVs to name_replaced.<time_t>
5025         1. rename new LVs to old LVs
5026         1. attach the new LVs (with the old names now) to the drbd device
5027
5028       1. wait for sync across all devices
5029
5030       1. for each modified disk:
5031
5032         1. remove old LVs (which have the name name_replaces.<time_t>)
5033
5034     Failures are not very well handled.
5035
5036     """
5037     steps_total = 6
5038     warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5039     instance = self.instance
5040     iv_names = {}
5041     vgname = self.cfg.GetVGName()
5042     # start of work
5043     cfg = self.cfg
5044     tgt_node = self.tgt_node
5045     oth_node = self.oth_node
5046
5047     # Step: check device activation
5048     self.proc.LogStep(1, steps_total, "check device existence")
5049     info("checking volume groups")
5050     my_vg = cfg.GetVGName()
5051     results = self.rpc.call_vg_list([oth_node, tgt_node])
5052     if not results:
5053       raise errors.OpExecError("Can't list volume groups on the nodes")
5054     for node in oth_node, tgt_node:
5055       res = results[node]
5056       if res.failed or not res.data or my_vg not in res.data:
5057         raise errors.OpExecError("Volume group '%s' not found on %s" %
5058                                  (my_vg, node))
5059     for idx, dev in enumerate(instance.disks):
5060       if idx not in self.op.disks:
5061         continue
5062       for node in tgt_node, oth_node:
5063         info("checking disk/%d on %s" % (idx, node))
5064         cfg.SetDiskID(dev, node)
5065         result = self.rpc.call_blockdev_find(node, dev)
5066         msg = result.RemoteFailMsg()
5067         if not msg and not result.payload:
5068           msg = "disk not found"
5069         if msg:
5070           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5071                                    (idx, node, msg))
5072
5073     # Step: check other node consistency
5074     self.proc.LogStep(2, steps_total, "check peer consistency")
5075     for idx, dev in enumerate(instance.disks):
5076       if idx not in self.op.disks:
5077         continue
5078       info("checking disk/%d consistency on %s" % (idx, oth_node))
5079       if not _CheckDiskConsistency(self, dev, oth_node,
5080                                    oth_node==instance.primary_node):
5081         raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
5082                                  " to replace disks on this node (%s)" %
5083                                  (oth_node, tgt_node))
5084
5085     # Step: create new storage
5086     self.proc.LogStep(3, steps_total, "allocate new storage")
5087     for idx, dev in enumerate(instance.disks):
5088       if idx not in self.op.disks:
5089         continue
5090       size = dev.size
5091       cfg.SetDiskID(dev, tgt_node)
5092       lv_names = [".disk%d_%s" % (idx, suf)
5093                   for suf in ["data", "meta"]]
5094       names = _GenerateUniqueNames(self, lv_names)
5095       lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5096                              logical_id=(vgname, names[0]))
5097       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5098                              logical_id=(vgname, names[1]))
5099       new_lvs = [lv_data, lv_meta]
5100       old_lvs = dev.children
5101       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5102       info("creating new local storage on %s for %s" %
5103            (tgt_node, dev.iv_name))
5104       # we pass force_create=True to force the LVM creation
5105       for new_lv in new_lvs:
5106         _CreateBlockDev(self, tgt_node, instance, new_lv, True,
5107                         _GetInstanceInfoText(instance), False)
5108
5109     # Step: for each lv, detach+rename*2+attach
5110     self.proc.LogStep(4, steps_total, "change drbd configuration")
5111     for dev, old_lvs, new_lvs in iv_names.itervalues():
5112       info("detaching %s drbd from local storage" % dev.iv_name)
5113       result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
5114       result.Raise()
5115       if not result.data:
5116         raise errors.OpExecError("Can't detach drbd from local storage on node"
5117                                  " %s for device %s" % (tgt_node, dev.iv_name))
5118       #dev.children = []
5119       #cfg.Update(instance)
5120
5121       # ok, we created the new LVs, so now we know we have the needed
5122       # storage; as such, we proceed on the target node to rename
5123       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5124       # using the assumption that logical_id == physical_id (which in
5125       # turn is the unique_id on that node)
5126
5127       # FIXME(iustin): use a better name for the replaced LVs
5128       temp_suffix = int(time.time())
5129       ren_fn = lambda d, suff: (d.physical_id[0],
5130                                 d.physical_id[1] + "_replaced-%s" % suff)
5131       # build the rename list based on what LVs exist on the node
5132       rlist = []
5133       for to_ren in old_lvs:
5134         result = self.rpc.call_blockdev_find(tgt_node, to_ren)
5135         if not result.RemoteFailMsg() and result.payload:
5136           # device exists
5137           rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
5138
5139       info("renaming the old LVs on the target node")
5140       result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5141       result.Raise()
5142       if not result.data:
5143         raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
5144       # now we rename the new LVs to the old LVs
5145       info("renaming the new LVs on the target node")
5146       rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
5147       result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5148       result.Raise()
5149       if not result.data:
5150         raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
5151
5152       for old, new in zip(old_lvs, new_lvs):
5153         new.logical_id = old.logical_id
5154         cfg.SetDiskID(new, tgt_node)
5155
5156       for disk in old_lvs:
5157         disk.logical_id = ren_fn(disk, temp_suffix)
5158         cfg.SetDiskID(disk, tgt_node)
5159
5160       # now that the new lvs have the old name, we can add them to the device
5161       info("adding new mirror component on %s" % tgt_node)
5162       result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
5163       if result.failed or not result.data:
5164         for new_lv in new_lvs:
5165           msg = self.rpc.call_blockdev_remove(tgt_node, new_lv).RemoteFailMsg()
5166           if msg:
5167             warning("Can't rollback device %s: %s", dev, msg,
5168                     hint="cleanup manually the unused logical volumes")
5169         raise errors.OpExecError("Can't add local storage to drbd")
5170
5171       dev.children = new_lvs
5172       cfg.Update(instance)
5173
5174     # Step: wait for sync
5175
5176     # this can fail as the old devices are degraded and _WaitForSync
5177     # does a combined result over all disks, so we don't check its
5178     # return value
5179     self.proc.LogStep(5, steps_total, "sync devices")
5180     _WaitForSync(self, instance, unlock=True)
5181
5182     # so check manually all the devices
5183     for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5184       cfg.SetDiskID(dev, instance.primary_node)
5185       result = self.rpc.call_blockdev_find(instance.primary_node, dev)
5186       msg = result.RemoteFailMsg()
5187       if not msg and not result.payload:
5188         msg = "disk not found"
5189       if msg:
5190         raise errors.OpExecError("Can't find DRBD device %s: %s" %
5191                                  (name, msg))
5192       if result.payload[5]:
5193         raise errors.OpExecError("DRBD device %s is degraded!" % name)
5194
5195     # Step: remove old storage
5196     self.proc.LogStep(6, steps_total, "removing old storage")
5197     for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5198       info("remove logical volumes for %s" % name)
5199       for lv in old_lvs:
5200         cfg.SetDiskID(lv, tgt_node)
5201         msg = self.rpc.call_blockdev_remove(tgt_node, lv).RemoteFailMsg()
5202         if msg:
5203           warning("Can't remove old LV: %s" % msg,
5204                   hint="manually remove unused LVs")
5205           continue
5206
5207   def _ExecD8Secondary(self, feedback_fn):
5208     """Replace the secondary node for drbd8.
5209
5210     The algorithm for replace is quite complicated:
5211       - for all disks of the instance:
5212         - create new LVs on the new node with same names
5213         - shutdown the drbd device on the old secondary
5214         - disconnect the drbd network on the primary
5215         - create the drbd device on the new secondary
5216         - network attach the drbd on the primary, using an artifice:
5217           the drbd code for Attach() will connect to the network if it
5218           finds a device which is connected to the good local disks but
5219           not network enabled
5220       - wait for sync across all devices
5221       - remove all disks from the old secondary
5222
5223     Failures are not very well handled.
5224
5225     """
5226     steps_total = 6
5227     warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5228     instance = self.instance
5229     iv_names = {}
5230     # start of work
5231     cfg = self.cfg
5232     old_node = self.tgt_node
5233     new_node = self.new_node
5234     pri_node = instance.primary_node
5235     nodes_ip = {
5236       old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
5237       new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
5238       pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
5239       }
5240
5241     # Step: check device activation
5242     self.proc.LogStep(1, steps_total, "check device existence")
5243     info("checking volume groups")
5244     my_vg = cfg.GetVGName()
5245     results = self.rpc.call_vg_list([pri_node, new_node])
5246     for node in pri_node, new_node:
5247       res = results[node]
5248       if res.failed or not res.data or my_vg not in res.data:
5249         raise errors.OpExecError("Volume group '%s' not found on %s" %
5250                                  (my_vg, node))
5251     for idx, dev in enumerate(instance.disks):
5252       if idx not in self.op.disks:
5253         continue
5254       info("checking disk/%d on %s" % (idx, pri_node))
5255       cfg.SetDiskID(dev, pri_node)
5256       result = self.rpc.call_blockdev_find(pri_node, dev)
5257       msg = result.RemoteFailMsg()
5258       if not msg and not result.payload:
5259         msg = "disk not found"
5260       if msg:
5261         raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5262                                  (idx, pri_node, msg))
5263
5264     # Step: check other node consistency
5265     self.proc.LogStep(2, steps_total, "check peer consistency")
5266     for idx, dev in enumerate(instance.disks):
5267       if idx not in self.op.disks:
5268         continue
5269       info("checking disk/%d consistency on %s" % (idx, pri_node))
5270       if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
5271         raise errors.OpExecError("Primary node (%s) has degraded storage,"
5272                                  " unsafe to replace the secondary" %
5273                                  pri_node)
5274
5275     # Step: create new storage
5276     self.proc.LogStep(3, steps_total, "allocate new storage")
5277     for idx, dev in enumerate(instance.disks):
5278       info("adding new local storage on %s for disk/%d" %
5279            (new_node, idx))
5280       # we pass force_create=True to force LVM creation
5281       for new_lv in dev.children:
5282         _CreateBlockDev(self, new_node, instance, new_lv, True,
5283                         _GetInstanceInfoText(instance), False)
5284
5285     # Step 4: dbrd minors and drbd setups changes
5286     # after this, we must manually remove the drbd minors on both the
5287     # error and the success paths
5288     minors = cfg.AllocateDRBDMinor([new_node for dev in instance.disks],
5289                                    instance.name)
5290     logging.debug("Allocated minors %s" % (minors,))
5291     self.proc.LogStep(4, steps_total, "changing drbd configuration")
5292     for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
5293       size = dev.size
5294       info("activating a new drbd on %s for disk/%d" % (new_node, idx))
5295       # create new devices on new_node; note that we create two IDs:
5296       # one without port, so the drbd will be activated without
5297       # networking information on the new node at this stage, and one
5298       # with network, for the latter activation in step 4
5299       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5300       if pri_node == o_node1:
5301         p_minor = o_minor1
5302       else:
5303         p_minor = o_minor2
5304
5305       new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
5306       new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
5307
5308       iv_names[idx] = (dev, dev.children, new_net_id)
5309       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5310                     new_net_id)
5311       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5312                               logical_id=new_alone_id,
5313                               children=dev.children)
5314       try:
5315         _CreateSingleBlockDev(self, new_node, instance, new_drbd,
5316                               _GetInstanceInfoText(instance), False)
5317       except errors.GenericError:
5318         self.cfg.ReleaseDRBDMinors(instance.name)
5319         raise
5320
5321     for idx, dev in enumerate(instance.disks):
5322       # we have new devices, shutdown the drbd on the old secondary
5323       info("shutting down drbd for disk/%d on old node" % idx)
5324       cfg.SetDiskID(dev, old_node)
5325       msg = self.rpc.call_blockdev_shutdown(old_node, dev).RemoteFailMsg()
5326       if msg:
5327         warning("Failed to shutdown drbd for disk/%d on old node: %s" %
5328                 (idx, msg),
5329                 hint="Please cleanup this device manually as soon as possible")
5330
5331     info("detaching primary drbds from the network (=> standalone)")
5332     result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
5333                                                instance.disks)[pri_node]
5334
5335     msg = result.RemoteFailMsg()
5336     if msg:
5337       # detaches didn't succeed (unlikely)
5338       self.cfg.ReleaseDRBDMinors(instance.name)
5339       raise errors.OpExecError("Can't detach the disks from the network on"
5340                                " old node: %s" % (msg,))
5341
5342     # if we managed to detach at least one, we update all the disks of
5343     # the instance to point to the new secondary
5344     info("updating instance configuration")
5345     for dev, _, new_logical_id in iv_names.itervalues():
5346       dev.logical_id = new_logical_id
5347       cfg.SetDiskID(dev, pri_node)
5348     cfg.Update(instance)
5349
5350     # and now perform the drbd attach
5351     info("attaching primary drbds to new secondary (standalone => connected)")
5352     result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
5353                                            instance.disks, instance.name,
5354                                            False)
5355     for to_node, to_result in result.items():
5356       msg = to_result.RemoteFailMsg()
5357       if msg:
5358         warning("can't attach drbd disks on node %s: %s", to_node, msg,
5359                 hint="please do a gnt-instance info to see the"
5360                 " status of disks")
5361
5362     # this can fail as the old devices are degraded and _WaitForSync
5363     # does a combined result over all disks, so we don't check its
5364     # return value
5365     self.proc.LogStep(5, steps_total, "sync devices")
5366     _WaitForSync(self, instance, unlock=True)
5367
5368     # so check manually all the devices
5369     for idx, (dev, old_lvs, _) in iv_names.iteritems():
5370       cfg.SetDiskID(dev, pri_node)
5371       result = self.rpc.call_blockdev_find(pri_node, dev)
5372       msg = result.RemoteFailMsg()
5373       if not msg and not result.payload:
5374         msg = "disk not found"
5375       if msg:
5376         raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
5377                                  (idx, msg))
5378       if result.payload[5]:
5379         raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
5380
5381     self.proc.LogStep(6, steps_total, "removing old storage")
5382     for idx, (dev, old_lvs, _) in iv_names.iteritems():
5383       info("remove logical volumes for disk/%d" % idx)
5384       for lv in old_lvs:
5385         cfg.SetDiskID(lv, old_node)
5386         msg = self.rpc.call_blockdev_remove(old_node, lv).RemoteFailMsg()
5387         if msg:
5388           warning("Can't remove LV on old secondary: %s", msg,
5389                   hint="Cleanup stale volumes by hand")
5390
5391   def Exec(self, feedback_fn):
5392     """Execute disk replacement.
5393
5394     This dispatches the disk replacement to the appropriate handler.
5395
5396     """
5397     instance = self.instance
5398
5399     # Activate the instance disks if we're replacing them on a down instance
5400     if not instance.admin_up:
5401       _StartInstanceDisks(self, instance, True)
5402
5403     if self.op.mode == constants.REPLACE_DISK_CHG:
5404       fn = self._ExecD8Secondary
5405     else:
5406       fn = self._ExecD8DiskOnly
5407
5408     ret = fn(feedback_fn)
5409
5410     # Deactivate the instance disks if we're replacing them on a down instance
5411     if not instance.admin_up:
5412       _SafeShutdownInstanceDisks(self, instance)
5413
5414     return ret
5415
5416
5417 class LUGrowDisk(LogicalUnit):
5418   """Grow a disk of an instance.
5419
5420   """
5421   HPATH = "disk-grow"
5422   HTYPE = constants.HTYPE_INSTANCE
5423   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5424   REQ_BGL = False
5425
5426   def ExpandNames(self):
5427     self._ExpandAndLockInstance()
5428     self.needed_locks[locking.LEVEL_NODE] = []
5429     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5430
5431   def DeclareLocks(self, level):
5432     if level == locking.LEVEL_NODE:
5433       self._LockInstancesNodes()
5434
5435   def BuildHooksEnv(self):
5436     """Build hooks env.
5437
5438     This runs on the master, the primary and all the secondaries.
5439
5440     """
5441     env = {
5442       "DISK": self.op.disk,
5443       "AMOUNT": self.op.amount,
5444       }
5445     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5446     nl = [
5447       self.cfg.GetMasterNode(),
5448       self.instance.primary_node,
5449       ]
5450     return env, nl, nl
5451
5452   def CheckPrereq(self):
5453     """Check prerequisites.
5454
5455     This checks that the instance is in the cluster.
5456
5457     """
5458     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5459     assert instance is not None, \
5460       "Cannot retrieve locked instance %s" % self.op.instance_name
5461     nodenames = list(instance.all_nodes)
5462     for node in nodenames:
5463       _CheckNodeOnline(self, node)
5464
5465
5466     self.instance = instance
5467
5468     if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5469       raise errors.OpPrereqError("Instance's disk layout does not support"
5470                                  " growing.")
5471
5472     self.disk = instance.FindDisk(self.op.disk)
5473
5474     nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5475                                        instance.hypervisor)
5476     for node in nodenames:
5477       info = nodeinfo[node]
5478       if info.failed or not info.data:
5479         raise errors.OpPrereqError("Cannot get current information"
5480                                    " from node '%s'" % node)
5481       vg_free = info.data.get('vg_free', None)
5482       if not isinstance(vg_free, int):
5483         raise errors.OpPrereqError("Can't compute free disk space on"
5484                                    " node %s" % node)
5485       if self.op.amount > vg_free:
5486         raise errors.OpPrereqError("Not enough disk space on target node %s:"
5487                                    " %d MiB available, %d MiB required" %
5488                                    (node, vg_free, self.op.amount))
5489
5490   def Exec(self, feedback_fn):
5491     """Execute disk grow.
5492
5493     """
5494     instance = self.instance
5495     disk = self.disk
5496     for node in instance.all_nodes:
5497       self.cfg.SetDiskID(disk, node)
5498       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5499       msg = result.RemoteFailMsg()
5500       if msg:
5501         raise errors.OpExecError("Grow request failed to node %s: %s" %
5502                                  (node, msg))
5503     disk.RecordGrow(self.op.amount)
5504     self.cfg.Update(instance)
5505     if self.op.wait_for_sync:
5506       disk_abort = not _WaitForSync(self, instance)
5507       if disk_abort:
5508         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5509                              " status.\nPlease check the instance.")
5510
5511
5512 class LUQueryInstanceData(NoHooksLU):
5513   """Query runtime instance data.
5514
5515   """
5516   _OP_REQP = ["instances", "static"]
5517   REQ_BGL = False
5518
5519   def ExpandNames(self):
5520     self.needed_locks = {}
5521     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
5522
5523     if not isinstance(self.op.instances, list):
5524       raise errors.OpPrereqError("Invalid argument type 'instances'")
5525
5526     if self.op.instances:
5527       self.wanted_names = []
5528       for name in self.op.instances:
5529         full_name = self.cfg.ExpandInstanceName(name)
5530         if full_name is None:
5531           raise errors.OpPrereqError("Instance '%s' not known" % name)
5532         self.wanted_names.append(full_name)
5533       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
5534     else:
5535       self.wanted_names = None
5536       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5537
5538     self.needed_locks[locking.LEVEL_NODE] = []
5539     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5540
5541   def DeclareLocks(self, level):
5542     if level == locking.LEVEL_NODE:
5543       self._LockInstancesNodes()
5544
5545   def CheckPrereq(self):
5546     """Check prerequisites.
5547
5548     This only checks the optional instance list against the existing names.
5549
5550     """
5551     if self.wanted_names is None:
5552       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5553
5554     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
5555                              in self.wanted_names]
5556     return
5557
5558   def _ComputeDiskStatus(self, instance, snode, dev):
5559     """Compute block device status.
5560
5561     """
5562     static = self.op.static
5563     if not static:
5564       self.cfg.SetDiskID(dev, instance.primary_node)
5565       dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
5566       if dev_pstatus.offline:
5567         dev_pstatus = None
5568       else:
5569         msg = dev_pstatus.RemoteFailMsg()
5570         if msg:
5571           raise errors.OpExecError("Can't compute disk status for %s: %s" %
5572                                    (instance.name, msg))
5573         dev_pstatus = dev_pstatus.payload
5574     else:
5575       dev_pstatus = None
5576
5577     if dev.dev_type in constants.LDS_DRBD:
5578       # we change the snode then (otherwise we use the one passed in)
5579       if dev.logical_id[0] == instance.primary_node:
5580         snode = dev.logical_id[1]
5581       else:
5582         snode = dev.logical_id[0]
5583
5584     if snode and not static:
5585       self.cfg.SetDiskID(dev, snode)
5586       dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
5587       if dev_sstatus.offline:
5588         dev_sstatus = None
5589       else:
5590         msg = dev_sstatus.RemoteFailMsg()
5591         if msg:
5592           raise errors.OpExecError("Can't compute disk status for %s: %s" %
5593                                    (instance.name, msg))
5594         dev_sstatus = dev_sstatus.payload
5595     else:
5596       dev_sstatus = None
5597
5598     if dev.children:
5599       dev_children = [self._ComputeDiskStatus(instance, snode, child)
5600                       for child in dev.children]
5601     else:
5602       dev_children = []
5603
5604     data = {
5605       "iv_name": dev.iv_name,
5606       "dev_type": dev.dev_type,
5607       "logical_id": dev.logical_id,
5608       "physical_id": dev.physical_id,
5609       "pstatus": dev_pstatus,
5610       "sstatus": dev_sstatus,
5611       "children": dev_children,
5612       "mode": dev.mode,
5613       }
5614
5615     return data
5616
5617   def Exec(self, feedback_fn):
5618     """Gather and return data"""
5619     result = {}
5620
5621     cluster = self.cfg.GetClusterInfo()
5622
5623     for instance in self.wanted_instances:
5624       if not self.op.static:
5625         remote_info = self.rpc.call_instance_info(instance.primary_node,
5626                                                   instance.name,
5627                                                   instance.hypervisor)
5628         remote_info.Raise()
5629         remote_info = remote_info.data
5630         if remote_info and "state" in remote_info:
5631           remote_state = "up"
5632         else:
5633           remote_state = "down"
5634       else:
5635         remote_state = None
5636       if instance.admin_up:
5637         config_state = "up"
5638       else:
5639         config_state = "down"
5640
5641       disks = [self._ComputeDiskStatus(instance, None, device)
5642                for device in instance.disks]
5643
5644       idict = {
5645         "name": instance.name,
5646         "config_state": config_state,
5647         "run_state": remote_state,
5648         "pnode": instance.primary_node,
5649         "snodes": instance.secondary_nodes,
5650         "os": instance.os,
5651         "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
5652         "disks": disks,
5653         "hypervisor": instance.hypervisor,
5654         "network_port": instance.network_port,
5655         "hv_instance": instance.hvparams,
5656         "hv_actual": cluster.FillHV(instance),
5657         "be_instance": instance.beparams,
5658         "be_actual": cluster.FillBE(instance),
5659         }
5660
5661       result[instance.name] = idict
5662
5663     return result
5664
5665
5666 class LUSetInstanceParams(LogicalUnit):
5667   """Modifies an instances's parameters.
5668
5669   """
5670   HPATH = "instance-modify"
5671   HTYPE = constants.HTYPE_INSTANCE
5672   _OP_REQP = ["instance_name"]
5673   REQ_BGL = False
5674
5675   def CheckArguments(self):
5676     if not hasattr(self.op, 'nics'):
5677       self.op.nics = []
5678     if not hasattr(self.op, 'disks'):
5679       self.op.disks = []
5680     if not hasattr(self.op, 'beparams'):
5681       self.op.beparams = {}
5682     if not hasattr(self.op, 'hvparams'):
5683       self.op.hvparams = {}
5684     self.op.force = getattr(self.op, "force", False)
5685     if not (self.op.nics or self.op.disks or
5686             self.op.hvparams or self.op.beparams):
5687       raise errors.OpPrereqError("No changes submitted")
5688
5689     # Disk validation
5690     disk_addremove = 0
5691     for disk_op, disk_dict in self.op.disks:
5692       if disk_op == constants.DDM_REMOVE:
5693         disk_addremove += 1
5694         continue
5695       elif disk_op == constants.DDM_ADD:
5696         disk_addremove += 1
5697       else:
5698         if not isinstance(disk_op, int):
5699           raise errors.OpPrereqError("Invalid disk index")
5700       if disk_op == constants.DDM_ADD:
5701         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
5702         if mode not in constants.DISK_ACCESS_SET:
5703           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
5704         size = disk_dict.get('size', None)
5705         if size is None:
5706           raise errors.OpPrereqError("Required disk parameter size missing")
5707         try:
5708           size = int(size)
5709         except ValueError, err:
5710           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
5711                                      str(err))
5712         disk_dict['size'] = size
5713       else:
5714         # modification of disk
5715         if 'size' in disk_dict:
5716           raise errors.OpPrereqError("Disk size change not possible, use"
5717                                      " grow-disk")
5718
5719     if disk_addremove > 1:
5720       raise errors.OpPrereqError("Only one disk add or remove operation"
5721                                  " supported at a time")
5722
5723     # NIC validation
5724     nic_addremove = 0
5725     for nic_op, nic_dict in self.op.nics:
5726       if nic_op == constants.DDM_REMOVE:
5727         nic_addremove += 1
5728         continue
5729       elif nic_op == constants.DDM_ADD:
5730         nic_addremove += 1
5731       else:
5732         if not isinstance(nic_op, int):
5733           raise errors.OpPrereqError("Invalid nic index")
5734
5735       # nic_dict should be a dict
5736       nic_ip = nic_dict.get('ip', None)
5737       if nic_ip is not None:
5738         if nic_ip.lower() == constants.VALUE_NONE:
5739           nic_dict['ip'] = None
5740         else:
5741           if not utils.IsValidIP(nic_ip):
5742             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
5743
5744       if nic_op == constants.DDM_ADD:
5745         nic_bridge = nic_dict.get('bridge', None)
5746         if nic_bridge is None:
5747           nic_dict['bridge'] = self.cfg.GetDefBridge()
5748         nic_mac = nic_dict.get('mac', None)
5749         if nic_mac is None:
5750           nic_dict['mac'] = constants.VALUE_AUTO
5751
5752       if 'mac' in nic_dict:
5753         nic_mac = nic_dict['mac']
5754         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5755           if not utils.IsValidMac(nic_mac):
5756             raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
5757         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
5758           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
5759                                      " modifying an existing nic")
5760
5761     if nic_addremove > 1:
5762       raise errors.OpPrereqError("Only one NIC add or remove operation"
5763                                  " supported at a time")
5764
5765   def ExpandNames(self):
5766     self._ExpandAndLockInstance()
5767     self.needed_locks[locking.LEVEL_NODE] = []
5768     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5769
5770   def DeclareLocks(self, level):
5771     if level == locking.LEVEL_NODE:
5772       self._LockInstancesNodes()
5773
5774   def BuildHooksEnv(self):
5775     """Build hooks env.
5776
5777     This runs on the master, primary and secondaries.
5778
5779     """
5780     args = dict()
5781     if constants.BE_MEMORY in self.be_new:
5782       args['memory'] = self.be_new[constants.BE_MEMORY]
5783     if constants.BE_VCPUS in self.be_new:
5784       args['vcpus'] = self.be_new[constants.BE_VCPUS]
5785     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
5786     # information at all.
5787     if self.op.nics:
5788       args['nics'] = []
5789       nic_override = dict(self.op.nics)
5790       for idx, nic in enumerate(self.instance.nics):
5791         if idx in nic_override:
5792           this_nic_override = nic_override[idx]
5793         else:
5794           this_nic_override = {}
5795         if 'ip' in this_nic_override:
5796           ip = this_nic_override['ip']
5797         else:
5798           ip = nic.ip
5799         if 'bridge' in this_nic_override:
5800           bridge = this_nic_override['bridge']
5801         else:
5802           bridge = nic.bridge
5803         if 'mac' in this_nic_override:
5804           mac = this_nic_override['mac']
5805         else:
5806           mac = nic.mac
5807         args['nics'].append((ip, bridge, mac))
5808       if constants.DDM_ADD in nic_override:
5809         ip = nic_override[constants.DDM_ADD].get('ip', None)
5810         bridge = nic_override[constants.DDM_ADD]['bridge']
5811         mac = nic_override[constants.DDM_ADD]['mac']
5812         args['nics'].append((ip, bridge, mac))
5813       elif constants.DDM_REMOVE in nic_override:
5814         del args['nics'][-1]
5815
5816     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
5817     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5818     return env, nl, nl
5819
5820   def CheckPrereq(self):
5821     """Check prerequisites.
5822
5823     This only checks the instance list against the existing names.
5824
5825     """
5826     force = self.force = self.op.force
5827
5828     # checking the new params on the primary/secondary nodes
5829
5830     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5831     assert self.instance is not None, \
5832       "Cannot retrieve locked instance %s" % self.op.instance_name
5833     pnode = instance.primary_node
5834     nodelist = list(instance.all_nodes)
5835
5836     # hvparams processing
5837     if self.op.hvparams:
5838       i_hvdict = copy.deepcopy(instance.hvparams)
5839       for key, val in self.op.hvparams.iteritems():
5840         if val == constants.VALUE_DEFAULT:
5841           try:
5842             del i_hvdict[key]
5843           except KeyError:
5844             pass
5845         else:
5846           i_hvdict[key] = val
5847       cluster = self.cfg.GetClusterInfo()
5848       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
5849       hv_new = cluster.FillDict(cluster.hvparams[instance.hypervisor],
5850                                 i_hvdict)
5851       # local check
5852       hypervisor.GetHypervisor(
5853         instance.hypervisor).CheckParameterSyntax(hv_new)
5854       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
5855       self.hv_new = hv_new # the new actual values
5856       self.hv_inst = i_hvdict # the new dict (without defaults)
5857     else:
5858       self.hv_new = self.hv_inst = {}
5859
5860     # beparams processing
5861     if self.op.beparams:
5862       i_bedict = copy.deepcopy(instance.beparams)
5863       for key, val in self.op.beparams.iteritems():
5864         if val == constants.VALUE_DEFAULT:
5865           try:
5866             del i_bedict[key]
5867           except KeyError:
5868             pass
5869         else:
5870           i_bedict[key] = val
5871       cluster = self.cfg.GetClusterInfo()
5872       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
5873       be_new = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
5874                                 i_bedict)
5875       self.be_new = be_new # the new actual values
5876       self.be_inst = i_bedict # the new dict (without defaults)
5877     else:
5878       self.be_new = self.be_inst = {}
5879
5880     self.warn = []
5881
5882     if constants.BE_MEMORY in self.op.beparams and not self.force:
5883       mem_check_list = [pnode]
5884       if be_new[constants.BE_AUTO_BALANCE]:
5885         # either we changed auto_balance to yes or it was from before
5886         mem_check_list.extend(instance.secondary_nodes)
5887       instance_info = self.rpc.call_instance_info(pnode, instance.name,
5888                                                   instance.hypervisor)
5889       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
5890                                          instance.hypervisor)
5891       if nodeinfo[pnode].failed or not isinstance(nodeinfo[pnode].data, dict):
5892         # Assume the primary node is unreachable and go ahead
5893         self.warn.append("Can't get info from primary node %s" % pnode)
5894       else:
5895         if not instance_info.failed and instance_info.data:
5896           current_mem = instance_info.data['memory']
5897         else:
5898           # Assume instance not running
5899           # (there is a slight race condition here, but it's not very probable,
5900           # and we have no other way to check)
5901           current_mem = 0
5902         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
5903                     nodeinfo[pnode].data['memory_free'])
5904         if miss_mem > 0:
5905           raise errors.OpPrereqError("This change will prevent the instance"
5906                                      " from starting, due to %d MB of memory"
5907                                      " missing on its primary node" % miss_mem)
5908
5909       if be_new[constants.BE_AUTO_BALANCE]:
5910         for node, nres in nodeinfo.iteritems():
5911           if node not in instance.secondary_nodes:
5912             continue
5913           if nres.failed or not isinstance(nres.data, dict):
5914             self.warn.append("Can't get info from secondary node %s" % node)
5915           elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
5916             self.warn.append("Not enough memory to failover instance to"
5917                              " secondary node %s" % node)
5918
5919     # NIC processing
5920     for nic_op, nic_dict in self.op.nics:
5921       if nic_op == constants.DDM_REMOVE:
5922         if not instance.nics:
5923           raise errors.OpPrereqError("Instance has no NICs, cannot remove")
5924         continue
5925       if nic_op != constants.DDM_ADD:
5926         # an existing nic
5927         if nic_op < 0 or nic_op >= len(instance.nics):
5928           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
5929                                      " are 0 to %d" %
5930                                      (nic_op, len(instance.nics)))
5931       if 'bridge' in nic_dict:
5932         nic_bridge = nic_dict['bridge']
5933         if nic_bridge is None:
5934           raise errors.OpPrereqError('Cannot set the nic bridge to None')
5935         if not self.rpc.call_bridges_exist(pnode, [nic_bridge]):
5936           msg = ("Bridge '%s' doesn't exist on one of"
5937                  " the instance nodes" % nic_bridge)
5938           if self.force:
5939             self.warn.append(msg)
5940           else:
5941             raise errors.OpPrereqError(msg)
5942       if 'mac' in nic_dict:
5943         nic_mac = nic_dict['mac']
5944         if nic_mac is None:
5945           raise errors.OpPrereqError('Cannot set the nic mac to None')
5946         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5947           # otherwise generate the mac
5948           nic_dict['mac'] = self.cfg.GenerateMAC()
5949         else:
5950           # or validate/reserve the current one
5951           if self.cfg.IsMacInUse(nic_mac):
5952             raise errors.OpPrereqError("MAC address %s already in use"
5953                                        " in cluster" % nic_mac)
5954
5955     # DISK processing
5956     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
5957       raise errors.OpPrereqError("Disk operations not supported for"
5958                                  " diskless instances")
5959     for disk_op, disk_dict in self.op.disks:
5960       if disk_op == constants.DDM_REMOVE:
5961         if len(instance.disks) == 1:
5962           raise errors.OpPrereqError("Cannot remove the last disk of"
5963                                      " an instance")
5964         ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
5965         ins_l = ins_l[pnode]
5966         if ins_l.failed or not isinstance(ins_l.data, list):
5967           raise errors.OpPrereqError("Can't contact node '%s'" % pnode)
5968         if instance.name in ins_l.data:
5969           raise errors.OpPrereqError("Instance is running, can't remove"
5970                                      " disks.")
5971
5972       if (disk_op == constants.DDM_ADD and
5973           len(instance.nics) >= constants.MAX_DISKS):
5974         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
5975                                    " add more" % constants.MAX_DISKS)
5976       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
5977         # an existing disk
5978         if disk_op < 0 or disk_op >= len(instance.disks):
5979           raise errors.OpPrereqError("Invalid disk index %s, valid values"
5980                                      " are 0 to %d" %
5981                                      (disk_op, len(instance.disks)))
5982
5983     return
5984
5985   def Exec(self, feedback_fn):
5986     """Modifies an instance.
5987
5988     All parameters take effect only at the next restart of the instance.
5989
5990     """
5991     # Process here the warnings from CheckPrereq, as we don't have a
5992     # feedback_fn there.
5993     for warn in self.warn:
5994       feedback_fn("WARNING: %s" % warn)
5995
5996     result = []
5997     instance = self.instance
5998     # disk changes
5999     for disk_op, disk_dict in self.op.disks:
6000       if disk_op == constants.DDM_REMOVE:
6001         # remove the last disk
6002         device = instance.disks.pop()
6003         device_idx = len(instance.disks)
6004         for node, disk in device.ComputeNodeTree(instance.primary_node):
6005           self.cfg.SetDiskID(disk, node)
6006           msg = self.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
6007           if msg:
6008             self.LogWarning("Could not remove disk/%d on node %s: %s,"
6009                             " continuing anyway", device_idx, node, msg)
6010         result.append(("disk/%d" % device_idx, "remove"))
6011       elif disk_op == constants.DDM_ADD:
6012         # add a new disk
6013         if instance.disk_template == constants.DT_FILE:
6014           file_driver, file_path = instance.disks[0].logical_id
6015           file_path = os.path.dirname(file_path)
6016         else:
6017           file_driver = file_path = None
6018         disk_idx_base = len(instance.disks)
6019         new_disk = _GenerateDiskTemplate(self,
6020                                          instance.disk_template,
6021                                          instance.name, instance.primary_node,
6022                                          instance.secondary_nodes,
6023                                          [disk_dict],
6024                                          file_path,
6025                                          file_driver,
6026                                          disk_idx_base)[0]
6027         instance.disks.append(new_disk)
6028         info = _GetInstanceInfoText(instance)
6029
6030         logging.info("Creating volume %s for instance %s",
6031                      new_disk.iv_name, instance.name)
6032         # Note: this needs to be kept in sync with _CreateDisks
6033         #HARDCODE
6034         for node in instance.all_nodes:
6035           f_create = node == instance.primary_node
6036           try:
6037             _CreateBlockDev(self, node, instance, new_disk,
6038                             f_create, info, f_create)
6039           except errors.OpExecError, err:
6040             self.LogWarning("Failed to create volume %s (%s) on"
6041                             " node %s: %s",
6042                             new_disk.iv_name, new_disk, node, err)
6043         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6044                        (new_disk.size, new_disk.mode)))
6045       else:
6046         # change a given disk
6047         instance.disks[disk_op].mode = disk_dict['mode']
6048         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6049     # NIC changes
6050     for nic_op, nic_dict in self.op.nics:
6051       if nic_op == constants.DDM_REMOVE:
6052         # remove the last nic
6053         del instance.nics[-1]
6054         result.append(("nic.%d" % len(instance.nics), "remove"))
6055       elif nic_op == constants.DDM_ADD:
6056         # mac and bridge should be set, by now
6057         mac = nic_dict['mac']
6058         bridge = nic_dict['bridge']
6059         new_nic = objects.NIC(mac=mac, ip=nic_dict.get('ip', None),
6060                               bridge=bridge)
6061         instance.nics.append(new_nic)
6062         result.append(("nic.%d" % (len(instance.nics) - 1),
6063                        "add:mac=%s,ip=%s,bridge=%s" %
6064                        (new_nic.mac, new_nic.ip, new_nic.bridge)))
6065       else:
6066         # change a given nic
6067         for key in 'mac', 'ip', 'bridge':
6068           if key in nic_dict:
6069             setattr(instance.nics[nic_op], key, nic_dict[key])
6070             result.append(("nic.%s/%d" % (key, nic_op), nic_dict[key]))
6071
6072     # hvparams changes
6073     if self.op.hvparams:
6074       instance.hvparams = self.hv_inst
6075       for key, val in self.op.hvparams.iteritems():
6076         result.append(("hv/%s" % key, val))
6077
6078     # beparams changes
6079     if self.op.beparams:
6080       instance.beparams = self.be_inst
6081       for key, val in self.op.beparams.iteritems():
6082         result.append(("be/%s" % key, val))
6083
6084     self.cfg.Update(instance)
6085
6086     return result
6087
6088
6089 class LUQueryExports(NoHooksLU):
6090   """Query the exports list
6091
6092   """
6093   _OP_REQP = ['nodes']
6094   REQ_BGL = False
6095
6096   def ExpandNames(self):
6097     self.needed_locks = {}
6098     self.share_locks[locking.LEVEL_NODE] = 1
6099     if not self.op.nodes:
6100       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6101     else:
6102       self.needed_locks[locking.LEVEL_NODE] = \
6103         _GetWantedNodes(self, self.op.nodes)
6104
6105   def CheckPrereq(self):
6106     """Check prerequisites.
6107
6108     """
6109     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6110
6111   def Exec(self, feedback_fn):
6112     """Compute the list of all the exported system images.
6113
6114     @rtype: dict
6115     @return: a dictionary with the structure node->(export-list)
6116         where export-list is a list of the instances exported on
6117         that node.
6118
6119     """
6120     rpcresult = self.rpc.call_export_list(self.nodes)
6121     result = {}
6122     for node in rpcresult:
6123       if rpcresult[node].failed:
6124         result[node] = False
6125       else:
6126         result[node] = rpcresult[node].data
6127
6128     return result
6129
6130
6131 class LUExportInstance(LogicalUnit):
6132   """Export an instance to an image in the cluster.
6133
6134   """
6135   HPATH = "instance-export"
6136   HTYPE = constants.HTYPE_INSTANCE
6137   _OP_REQP = ["instance_name", "target_node", "shutdown"]
6138   REQ_BGL = False
6139
6140   def ExpandNames(self):
6141     self._ExpandAndLockInstance()
6142     # FIXME: lock only instance primary and destination node
6143     #
6144     # Sad but true, for now we have do lock all nodes, as we don't know where
6145     # the previous export might be, and and in this LU we search for it and
6146     # remove it from its current node. In the future we could fix this by:
6147     #  - making a tasklet to search (share-lock all), then create the new one,
6148     #    then one to remove, after
6149     #  - removing the removal operation altoghether
6150     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6151
6152   def DeclareLocks(self, level):
6153     """Last minute lock declaration."""
6154     # All nodes are locked anyway, so nothing to do here.
6155
6156   def BuildHooksEnv(self):
6157     """Build hooks env.
6158
6159     This will run on the master, primary node and target node.
6160
6161     """
6162     env = {
6163       "EXPORT_NODE": self.op.target_node,
6164       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6165       }
6166     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6167     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6168           self.op.target_node]
6169     return env, nl, nl
6170
6171   def CheckPrereq(self):
6172     """Check prerequisites.
6173
6174     This checks that the instance and node names are valid.
6175
6176     """
6177     instance_name = self.op.instance_name
6178     self.instance = self.cfg.GetInstanceInfo(instance_name)
6179     assert self.instance is not None, \
6180           "Cannot retrieve locked instance %s" % self.op.instance_name
6181     _CheckNodeOnline(self, self.instance.primary_node)
6182
6183     self.dst_node = self.cfg.GetNodeInfo(
6184       self.cfg.ExpandNodeName(self.op.target_node))
6185
6186     if self.dst_node is None:
6187       # This is wrong node name, not a non-locked node
6188       raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6189     _CheckNodeOnline(self, self.dst_node.name)
6190     _CheckNodeNotDrained(self, self.dst_node.name)
6191
6192     # instance disk type verification
6193     for disk in self.instance.disks:
6194       if disk.dev_type == constants.LD_FILE:
6195         raise errors.OpPrereqError("Export not supported for instances with"
6196                                    " file-based disks")
6197
6198   def Exec(self, feedback_fn):
6199     """Export an instance to an image in the cluster.
6200
6201     """
6202     instance = self.instance
6203     dst_node = self.dst_node
6204     src_node = instance.primary_node
6205     if self.op.shutdown:
6206       # shutdown the instance, but not the disks
6207       result = self.rpc.call_instance_shutdown(src_node, instance)
6208       msg = result.RemoteFailMsg()
6209       if msg:
6210         raise errors.OpExecError("Could not shutdown instance %s on"
6211                                  " node %s: %s" %
6212                                  (instance.name, src_node, msg))
6213
6214     vgname = self.cfg.GetVGName()
6215
6216     snap_disks = []
6217
6218     # set the disks ID correctly since call_instance_start needs the
6219     # correct drbd minor to create the symlinks
6220     for disk in instance.disks:
6221       self.cfg.SetDiskID(disk, src_node)
6222
6223     try:
6224       for disk in instance.disks:
6225         # new_dev_name will be a snapshot of an lvm leaf of the one we passed
6226         new_dev_name = self.rpc.call_blockdev_snapshot(src_node, disk)
6227         if new_dev_name.failed or not new_dev_name.data:
6228           self.LogWarning("Could not snapshot block device %s on node %s",
6229                           disk.logical_id[1], src_node)
6230           snap_disks.append(False)
6231         else:
6232           new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6233                                  logical_id=(vgname, new_dev_name.data),
6234                                  physical_id=(vgname, new_dev_name.data),
6235                                  iv_name=disk.iv_name)
6236           snap_disks.append(new_dev)
6237
6238     finally:
6239       if self.op.shutdown and instance.admin_up:
6240         result = self.rpc.call_instance_start(src_node, instance)
6241         msg = result.RemoteFailMsg()
6242         if msg:
6243           _ShutdownInstanceDisks(self, instance)
6244           raise errors.OpExecError("Could not start instance: %s" % msg)
6245
6246     # TODO: check for size
6247
6248     cluster_name = self.cfg.GetClusterName()
6249     for idx, dev in enumerate(snap_disks):
6250       if dev:
6251         result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6252                                                instance, cluster_name, idx)
6253         if result.failed or not result.data:
6254           self.LogWarning("Could not export block device %s from node %s to"
6255                           " node %s", dev.logical_id[1], src_node,
6256                           dst_node.name)
6257         msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg()
6258         if msg:
6259           self.LogWarning("Could not remove snapshot block device %s from node"
6260                           " %s: %s", dev.logical_id[1], src_node, msg)
6261
6262     result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6263     if result.failed or not result.data:
6264       self.LogWarning("Could not finalize export for instance %s on node %s",
6265                       instance.name, dst_node.name)
6266
6267     nodelist = self.cfg.GetNodeList()
6268     nodelist.remove(dst_node.name)
6269
6270     # on one-node clusters nodelist will be empty after the removal
6271     # if we proceed the backup would be removed because OpQueryExports
6272     # substitutes an empty list with the full cluster node list.
6273     if nodelist:
6274       exportlist = self.rpc.call_export_list(nodelist)
6275       for node in exportlist:
6276         if exportlist[node].failed:
6277           continue
6278         if instance.name in exportlist[node].data:
6279           if not self.rpc.call_export_remove(node, instance.name):
6280             self.LogWarning("Could not remove older export for instance %s"
6281                             " on node %s", instance.name, node)
6282
6283
6284 class LURemoveExport(NoHooksLU):
6285   """Remove exports related to the named instance.
6286
6287   """
6288   _OP_REQP = ["instance_name"]
6289   REQ_BGL = False
6290
6291   def ExpandNames(self):
6292     self.needed_locks = {}
6293     # We need all nodes to be locked in order for RemoveExport to work, but we
6294     # don't need to lock the instance itself, as nothing will happen to it (and
6295     # we can remove exports also for a removed instance)
6296     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6297
6298   def CheckPrereq(self):
6299     """Check prerequisites.
6300     """
6301     pass
6302
6303   def Exec(self, feedback_fn):
6304     """Remove any export.
6305
6306     """
6307     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6308     # If the instance was not found we'll try with the name that was passed in.
6309     # This will only work if it was an FQDN, though.
6310     fqdn_warn = False
6311     if not instance_name:
6312       fqdn_warn = True
6313       instance_name = self.op.instance_name
6314
6315     exportlist = self.rpc.call_export_list(self.acquired_locks[
6316       locking.LEVEL_NODE])
6317     found = False
6318     for node in exportlist:
6319       if exportlist[node].failed:
6320         self.LogWarning("Failed to query node %s, continuing" % node)
6321         continue
6322       if instance_name in exportlist[node].data:
6323         found = True
6324         result = self.rpc.call_export_remove(node, instance_name)
6325         if result.failed or not result.data:
6326           logging.error("Could not remove export for instance %s"
6327                         " on node %s", instance_name, node)
6328
6329     if fqdn_warn and not found:
6330       feedback_fn("Export not found. If trying to remove an export belonging"
6331                   " to a deleted instance please use its Fully Qualified"
6332                   " Domain Name.")
6333
6334
6335 class TagsLU(NoHooksLU):
6336   """Generic tags LU.
6337
6338   This is an abstract class which is the parent of all the other tags LUs.
6339
6340   """
6341
6342   def ExpandNames(self):
6343     self.needed_locks = {}
6344     if self.op.kind == constants.TAG_NODE:
6345       name = self.cfg.ExpandNodeName(self.op.name)
6346       if name is None:
6347         raise errors.OpPrereqError("Invalid node name (%s)" %
6348                                    (self.op.name,))
6349       self.op.name = name
6350       self.needed_locks[locking.LEVEL_NODE] = name
6351     elif self.op.kind == constants.TAG_INSTANCE:
6352       name = self.cfg.ExpandInstanceName(self.op.name)
6353       if name is None:
6354         raise errors.OpPrereqError("Invalid instance name (%s)" %
6355                                    (self.op.name,))
6356       self.op.name = name
6357       self.needed_locks[locking.LEVEL_INSTANCE] = name
6358
6359   def CheckPrereq(self):
6360     """Check prerequisites.
6361
6362     """
6363     if self.op.kind == constants.TAG_CLUSTER:
6364       self.target = self.cfg.GetClusterInfo()
6365     elif self.op.kind == constants.TAG_NODE:
6366       self.target = self.cfg.GetNodeInfo(self.op.name)
6367     elif self.op.kind == constants.TAG_INSTANCE:
6368       self.target = self.cfg.GetInstanceInfo(self.op.name)
6369     else:
6370       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6371                                  str(self.op.kind))
6372
6373
6374 class LUGetTags(TagsLU):
6375   """Returns the tags of a given object.
6376
6377   """
6378   _OP_REQP = ["kind", "name"]
6379   REQ_BGL = False
6380
6381   def Exec(self, feedback_fn):
6382     """Returns the tag list.
6383
6384     """
6385     return list(self.target.GetTags())
6386
6387
6388 class LUSearchTags(NoHooksLU):
6389   """Searches the tags for a given pattern.
6390
6391   """
6392   _OP_REQP = ["pattern"]
6393   REQ_BGL = False
6394
6395   def ExpandNames(self):
6396     self.needed_locks = {}
6397
6398   def CheckPrereq(self):
6399     """Check prerequisites.
6400
6401     This checks the pattern passed for validity by compiling it.
6402
6403     """
6404     try:
6405       self.re = re.compile(self.op.pattern)
6406     except re.error, err:
6407       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6408                                  (self.op.pattern, err))
6409
6410   def Exec(self, feedback_fn):
6411     """Returns the tag list.
6412
6413     """
6414     cfg = self.cfg
6415     tgts = [("/cluster", cfg.GetClusterInfo())]
6416     ilist = cfg.GetAllInstancesInfo().values()
6417     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6418     nlist = cfg.GetAllNodesInfo().values()
6419     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6420     results = []
6421     for path, target in tgts:
6422       for tag in target.GetTags():
6423         if self.re.search(tag):
6424           results.append((path, tag))
6425     return results
6426
6427
6428 class LUAddTags(TagsLU):
6429   """Sets a tag on a given object.
6430
6431   """
6432   _OP_REQP = ["kind", "name", "tags"]
6433   REQ_BGL = False
6434
6435   def CheckPrereq(self):
6436     """Check prerequisites.
6437
6438     This checks the type and length of the tag name and value.
6439
6440     """
6441     TagsLU.CheckPrereq(self)
6442     for tag in self.op.tags:
6443       objects.TaggableObject.ValidateTag(tag)
6444
6445   def Exec(self, feedback_fn):
6446     """Sets the tag.
6447
6448     """
6449     try:
6450       for tag in self.op.tags:
6451         self.target.AddTag(tag)
6452     except errors.TagError, err:
6453       raise errors.OpExecError("Error while setting tag: %s" % str(err))
6454     try:
6455       self.cfg.Update(self.target)
6456     except errors.ConfigurationError:
6457       raise errors.OpRetryError("There has been a modification to the"
6458                                 " config file and the operation has been"
6459                                 " aborted. Please retry.")
6460
6461
6462 class LUDelTags(TagsLU):
6463   """Delete a list of tags from a given object.
6464
6465   """
6466   _OP_REQP = ["kind", "name", "tags"]
6467   REQ_BGL = False
6468
6469   def CheckPrereq(self):
6470     """Check prerequisites.
6471
6472     This checks that we have the given tag.
6473
6474     """
6475     TagsLU.CheckPrereq(self)
6476     for tag in self.op.tags:
6477       objects.TaggableObject.ValidateTag(tag)
6478     del_tags = frozenset(self.op.tags)
6479     cur_tags = self.target.GetTags()
6480     if not del_tags <= cur_tags:
6481       diff_tags = del_tags - cur_tags
6482       diff_names = ["'%s'" % tag for tag in diff_tags]
6483       diff_names.sort()
6484       raise errors.OpPrereqError("Tag(s) %s not found" %
6485                                  (",".join(diff_names)))
6486
6487   def Exec(self, feedback_fn):
6488     """Remove the tag from the object.
6489
6490     """
6491     for tag in self.op.tags:
6492       self.target.RemoveTag(tag)
6493     try:
6494       self.cfg.Update(self.target)
6495     except errors.ConfigurationError:
6496       raise errors.OpRetryError("There has been a modification to the"
6497                                 " config file and the operation has been"
6498                                 " aborted. Please retry.")
6499
6500
6501 class LUTestDelay(NoHooksLU):
6502   """Sleep for a specified amount of time.
6503
6504   This LU sleeps on the master and/or nodes for a specified amount of
6505   time.
6506
6507   """
6508   _OP_REQP = ["duration", "on_master", "on_nodes"]
6509   REQ_BGL = False
6510
6511   def ExpandNames(self):
6512     """Expand names and set required locks.
6513
6514     This expands the node list, if any.
6515
6516     """
6517     self.needed_locks = {}
6518     if self.op.on_nodes:
6519       # _GetWantedNodes can be used here, but is not always appropriate to use
6520       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
6521       # more information.
6522       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
6523       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
6524
6525   def CheckPrereq(self):
6526     """Check prerequisites.
6527
6528     """
6529
6530   def Exec(self, feedback_fn):
6531     """Do the actual sleep.
6532
6533     """
6534     if self.op.on_master:
6535       if not utils.TestDelay(self.op.duration):
6536         raise errors.OpExecError("Error during master delay test")
6537     if self.op.on_nodes:
6538       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
6539       if not result:
6540         raise errors.OpExecError("Complete failure from rpc call")
6541       for node, node_result in result.items():
6542         node_result.Raise()
6543         if not node_result.data:
6544           raise errors.OpExecError("Failure during rpc call to node %s,"
6545                                    " result: %s" % (node, node_result.data))
6546
6547
6548 class IAllocator(object):
6549   """IAllocator framework.
6550
6551   An IAllocator instance has three sets of attributes:
6552     - cfg that is needed to query the cluster
6553     - input data (all members of the _KEYS class attribute are required)
6554     - four buffer attributes (in|out_data|text), that represent the
6555       input (to the external script) in text and data structure format,
6556       and the output from it, again in two formats
6557     - the result variables from the script (success, info, nodes) for
6558       easy usage
6559
6560   """
6561   _ALLO_KEYS = [
6562     "mem_size", "disks", "disk_template",
6563     "os", "tags", "nics", "vcpus", "hypervisor",
6564     ]
6565   _RELO_KEYS = [
6566     "relocate_from",
6567     ]
6568
6569   def __init__(self, lu, mode, name, **kwargs):
6570     self.lu = lu
6571     # init buffer variables
6572     self.in_text = self.out_text = self.in_data = self.out_data = None
6573     # init all input fields so that pylint is happy
6574     self.mode = mode
6575     self.name = name
6576     self.mem_size = self.disks = self.disk_template = None
6577     self.os = self.tags = self.nics = self.vcpus = None
6578     self.hypervisor = None
6579     self.relocate_from = None
6580     # computed fields
6581     self.required_nodes = None
6582     # init result fields
6583     self.success = self.info = self.nodes = None
6584     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6585       keyset = self._ALLO_KEYS
6586     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6587       keyset = self._RELO_KEYS
6588     else:
6589       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
6590                                    " IAllocator" % self.mode)
6591     for key in kwargs:
6592       if key not in keyset:
6593         raise errors.ProgrammerError("Invalid input parameter '%s' to"
6594                                      " IAllocator" % key)
6595       setattr(self, key, kwargs[key])
6596     for key in keyset:
6597       if key not in kwargs:
6598         raise errors.ProgrammerError("Missing input parameter '%s' to"
6599                                      " IAllocator" % key)
6600     self._BuildInputData()
6601
6602   def _ComputeClusterData(self):
6603     """Compute the generic allocator input data.
6604
6605     This is the data that is independent of the actual operation.
6606
6607     """
6608     cfg = self.lu.cfg
6609     cluster_info = cfg.GetClusterInfo()
6610     # cluster data
6611     data = {
6612       "version": constants.IALLOCATOR_VERSION,
6613       "cluster_name": cfg.GetClusterName(),
6614       "cluster_tags": list(cluster_info.GetTags()),
6615       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
6616       # we don't have job IDs
6617       }
6618     iinfo = cfg.GetAllInstancesInfo().values()
6619     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
6620
6621     # node data
6622     node_results = {}
6623     node_list = cfg.GetNodeList()
6624
6625     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6626       hypervisor_name = self.hypervisor
6627     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6628       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
6629
6630     node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
6631                                            hypervisor_name)
6632     node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
6633                        cluster_info.enabled_hypervisors)
6634     for nname, nresult in node_data.items():
6635       # first fill in static (config-based) values
6636       ninfo = cfg.GetNodeInfo(nname)
6637       pnr = {
6638         "tags": list(ninfo.GetTags()),
6639         "primary_ip": ninfo.primary_ip,
6640         "secondary_ip": ninfo.secondary_ip,
6641         "offline": ninfo.offline,
6642         "drained": ninfo.drained,
6643         "master_candidate": ninfo.master_candidate,
6644         }
6645
6646       if not ninfo.offline:
6647         nresult.Raise()
6648         if not isinstance(nresult.data, dict):
6649           raise errors.OpExecError("Can't get data for node %s" % nname)
6650         remote_info = nresult.data
6651         for attr in ['memory_total', 'memory_free', 'memory_dom0',
6652                      'vg_size', 'vg_free', 'cpu_total']:
6653           if attr not in remote_info:
6654             raise errors.OpExecError("Node '%s' didn't return attribute"
6655                                      " '%s'" % (nname, attr))
6656           try:
6657             remote_info[attr] = int(remote_info[attr])
6658           except ValueError, err:
6659             raise errors.OpExecError("Node '%s' returned invalid value"
6660                                      " for '%s': %s" % (nname, attr, err))
6661         # compute memory used by primary instances
6662         i_p_mem = i_p_up_mem = 0
6663         for iinfo, beinfo in i_list:
6664           if iinfo.primary_node == nname:
6665             i_p_mem += beinfo[constants.BE_MEMORY]
6666             if iinfo.name not in node_iinfo[nname].data:
6667               i_used_mem = 0
6668             else:
6669               i_used_mem = int(node_iinfo[nname].data[iinfo.name]['memory'])
6670             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
6671             remote_info['memory_free'] -= max(0, i_mem_diff)
6672
6673             if iinfo.admin_up:
6674               i_p_up_mem += beinfo[constants.BE_MEMORY]
6675
6676         # compute memory used by instances
6677         pnr_dyn = {
6678           "total_memory": remote_info['memory_total'],
6679           "reserved_memory": remote_info['memory_dom0'],
6680           "free_memory": remote_info['memory_free'],
6681           "total_disk": remote_info['vg_size'],
6682           "free_disk": remote_info['vg_free'],
6683           "total_cpus": remote_info['cpu_total'],
6684           "i_pri_memory": i_p_mem,
6685           "i_pri_up_memory": i_p_up_mem,
6686           }
6687         pnr.update(pnr_dyn)
6688
6689       node_results[nname] = pnr
6690     data["nodes"] = node_results
6691
6692     # instance data
6693     instance_data = {}
6694     for iinfo, beinfo in i_list:
6695       nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge}
6696                   for n in iinfo.nics]
6697       pir = {
6698         "tags": list(iinfo.GetTags()),
6699         "admin_up": iinfo.admin_up,
6700         "vcpus": beinfo[constants.BE_VCPUS],
6701         "memory": beinfo[constants.BE_MEMORY],
6702         "os": iinfo.os,
6703         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
6704         "nics": nic_data,
6705         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
6706         "disk_template": iinfo.disk_template,
6707         "hypervisor": iinfo.hypervisor,
6708         }
6709       instance_data[iinfo.name] = pir
6710
6711     data["instances"] = instance_data
6712
6713     self.in_data = data
6714
6715   def _AddNewInstance(self):
6716     """Add new instance data to allocator structure.
6717
6718     This in combination with _AllocatorGetClusterData will create the
6719     correct structure needed as input for the allocator.
6720
6721     The checks for the completeness of the opcode must have already been
6722     done.
6723
6724     """
6725     data = self.in_data
6726
6727     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
6728
6729     if self.disk_template in constants.DTS_NET_MIRROR:
6730       self.required_nodes = 2
6731     else:
6732       self.required_nodes = 1
6733     request = {
6734       "type": "allocate",
6735       "name": self.name,
6736       "disk_template": self.disk_template,
6737       "tags": self.tags,
6738       "os": self.os,
6739       "vcpus": self.vcpus,
6740       "memory": self.mem_size,
6741       "disks": self.disks,
6742       "disk_space_total": disk_space,
6743       "nics": self.nics,
6744       "required_nodes": self.required_nodes,
6745       }
6746     data["request"] = request
6747
6748   def _AddRelocateInstance(self):
6749     """Add relocate instance data to allocator structure.
6750
6751     This in combination with _IAllocatorGetClusterData will create the
6752     correct structure needed as input for the allocator.
6753
6754     The checks for the completeness of the opcode must have already been
6755     done.
6756
6757     """
6758     instance = self.lu.cfg.GetInstanceInfo(self.name)
6759     if instance is None:
6760       raise errors.ProgrammerError("Unknown instance '%s' passed to"
6761                                    " IAllocator" % self.name)
6762
6763     if instance.disk_template not in constants.DTS_NET_MIRROR:
6764       raise errors.OpPrereqError("Can't relocate non-mirrored instances")
6765
6766     if len(instance.secondary_nodes) != 1:
6767       raise errors.OpPrereqError("Instance has not exactly one secondary node")
6768
6769     self.required_nodes = 1
6770     disk_sizes = [{'size': disk.size} for disk in instance.disks]
6771     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
6772
6773     request = {
6774       "type": "relocate",
6775       "name": self.name,
6776       "disk_space_total": disk_space,
6777       "required_nodes": self.required_nodes,
6778       "relocate_from": self.relocate_from,
6779       }
6780     self.in_data["request"] = request
6781
6782   def _BuildInputData(self):
6783     """Build input data structures.
6784
6785     """
6786     self._ComputeClusterData()
6787
6788     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6789       self._AddNewInstance()
6790     else:
6791       self._AddRelocateInstance()
6792
6793     self.in_text = serializer.Dump(self.in_data)
6794
6795   def Run(self, name, validate=True, call_fn=None):
6796     """Run an instance allocator and return the results.
6797
6798     """
6799     if call_fn is None:
6800       call_fn = self.lu.rpc.call_iallocator_runner
6801     data = self.in_text
6802
6803     result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
6804     result.Raise()
6805
6806     if not isinstance(result.data, (list, tuple)) or len(result.data) != 4:
6807       raise errors.OpExecError("Invalid result from master iallocator runner")
6808
6809     rcode, stdout, stderr, fail = result.data
6810
6811     if rcode == constants.IARUN_NOTFOUND:
6812       raise errors.OpExecError("Can't find allocator '%s'" % name)
6813     elif rcode == constants.IARUN_FAILURE:
6814       raise errors.OpExecError("Instance allocator call failed: %s,"
6815                                " output: %s" % (fail, stdout+stderr))
6816     self.out_text = stdout
6817     if validate:
6818       self._ValidateResult()
6819
6820   def _ValidateResult(self):
6821     """Process the allocator results.
6822
6823     This will process and if successful save the result in
6824     self.out_data and the other parameters.
6825
6826     """
6827     try:
6828       rdict = serializer.Load(self.out_text)
6829     except Exception, err:
6830       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
6831
6832     if not isinstance(rdict, dict):
6833       raise errors.OpExecError("Can't parse iallocator results: not a dict")
6834
6835     for key in "success", "info", "nodes":
6836       if key not in rdict:
6837         raise errors.OpExecError("Can't parse iallocator results:"
6838                                  " missing key '%s'" % key)
6839       setattr(self, key, rdict[key])
6840
6841     if not isinstance(rdict["nodes"], list):
6842       raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
6843                                " is not a list")
6844     self.out_data = rdict
6845
6846
6847 class LUTestAllocator(NoHooksLU):
6848   """Run allocator tests.
6849
6850   This LU runs the allocator tests
6851
6852   """
6853   _OP_REQP = ["direction", "mode", "name"]
6854
6855   def CheckPrereq(self):
6856     """Check prerequisites.
6857
6858     This checks the opcode parameters depending on the director and mode test.
6859
6860     """
6861     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
6862       for attr in ["name", "mem_size", "disks", "disk_template",
6863                    "os", "tags", "nics", "vcpus"]:
6864         if not hasattr(self.op, attr):
6865           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
6866                                      attr)
6867       iname = self.cfg.ExpandInstanceName(self.op.name)
6868       if iname is not None:
6869         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
6870                                    iname)
6871       if not isinstance(self.op.nics, list):
6872         raise errors.OpPrereqError("Invalid parameter 'nics'")
6873       for row in self.op.nics:
6874         if (not isinstance(row, dict) or
6875             "mac" not in row or
6876             "ip" not in row or
6877             "bridge" not in row):
6878           raise errors.OpPrereqError("Invalid contents of the"
6879                                      " 'nics' parameter")
6880       if not isinstance(self.op.disks, list):
6881         raise errors.OpPrereqError("Invalid parameter 'disks'")
6882       for row in self.op.disks:
6883         if (not isinstance(row, dict) or
6884             "size" not in row or
6885             not isinstance(row["size"], int) or
6886             "mode" not in row or
6887             row["mode"] not in ['r', 'w']):
6888           raise errors.OpPrereqError("Invalid contents of the"
6889                                      " 'disks' parameter")
6890       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
6891         self.op.hypervisor = self.cfg.GetHypervisorType()
6892     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
6893       if not hasattr(self.op, "name"):
6894         raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
6895       fname = self.cfg.ExpandInstanceName(self.op.name)
6896       if fname is None:
6897         raise errors.OpPrereqError("Instance '%s' not found for relocation" %
6898                                    self.op.name)
6899       self.op.name = fname
6900       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
6901     else:
6902       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
6903                                  self.op.mode)
6904
6905     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
6906       if not hasattr(self.op, "allocator") or self.op.allocator is None:
6907         raise errors.OpPrereqError("Missing allocator name")
6908     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
6909       raise errors.OpPrereqError("Wrong allocator test '%s'" %
6910                                  self.op.direction)
6911
6912   def Exec(self, feedback_fn):
6913     """Run the allocator test.
6914
6915     """
6916     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
6917       ial = IAllocator(self,
6918                        mode=self.op.mode,
6919                        name=self.op.name,
6920                        mem_size=self.op.mem_size,
6921                        disks=self.op.disks,
6922                        disk_template=self.op.disk_template,
6923                        os=self.op.os,
6924                        tags=self.op.tags,
6925                        nics=self.op.nics,
6926                        vcpus=self.op.vcpus,
6927                        hypervisor=self.op.hypervisor,
6928                        )
6929     else:
6930       ial = IAllocator(self,
6931                        mode=self.op.mode,
6932                        name=self.op.name,
6933                        relocate_from=list(self.relocate_from),
6934                        )
6935
6936     if self.op.direction == constants.IALLOCATOR_DIR_IN:
6937       result = ial.in_text
6938     else:
6939       ial.Run(self.op.allocator, validate=False)
6940       result = ial.out_text
6941     return result