code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0613,W0201
  25
  26 import os
  27 import os.path
  28 import sha
  29 import time
  30 import tempfile
  31 import re
  32 import platform
  33 import logging
  34 import copy
  35 import random
  36
  37 from ganeti import ssh
  38 from ganeti import utils
  39 from ganeti import errors
  40 from ganeti import hypervisor
  41 from ganeti import locking
  42 from ganeti import constants
  43 from ganeti import objects
  44 from ganeti import opcodes
  45 from ganeti import serializer
  46 from ganeti import ssconf
  47
  48
  49 class LogicalUnit(object):
  50   """Logical Unit base class.
  51
  52   Subclasses must follow these rules:
  53     - implement ExpandNames
  54     - implement CheckPrereq
  55     - implement Exec
  56     - implement BuildHooksEnv
  57     - redefine HPATH and HTYPE
  58     - optionally redefine their run requirements:
  59         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  60
  61   Note that all commands require root permissions.
  62
  63   """
  64   HPATH = None
  65   HTYPE = None
  66   _OP_REQP = []
  67   REQ_BGL = True
  68
  69   def __init__(self, processor, op, context, rpc):
  70     """Constructor for LogicalUnit.
  71
  72     This needs to be overriden in derived classes in order to check op
  73     validity.
  74
  75     """
  76     self.proc = processor
  77     self.op = op
  78     self.cfg = context.cfg
  79     self.context = context
  80     self.rpc = rpc
  81     # Dicts used to declare locking needs to mcpu
  82     self.needed_locks = None
  83     self.acquired_locks = {}
  84     self.share_locks = dict(((i, 0) for i in locking.LEVELS))
  85     self.add_locks = {}
  86     self.remove_locks = {}
  87     # Used to force good behavior when calling helper functions
  88     self.recalculate_locks = {}
  89     self.__ssh = None
  90     # logging
  91     self.LogWarning = processor.LogWarning
  92     self.LogInfo = processor.LogInfo
  93
  94     for attr_name in self._OP_REQP:
  95       attr_val = getattr(op, attr_name, None)
  96       if attr_val is None:
  97         raise errors.OpPrereqError("Required parameter '%s' missing" %
  98                                    attr_name)
  99     self.CheckArguments()
 100
 101   def __GetSSH(self):
 102     """Returns the SshRunner object
 103
 104     """
 105     if not self.__ssh:
 106       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 107     return self.__ssh
 108
 109   ssh = property(fget=__GetSSH)
 110
 111   def CheckArguments(self):
 112     """Check syntactic validity for the opcode arguments.
 113
 114     This method is for doing a simple syntactic check and ensure
 115     validity of opcode parameters, without any cluster-related
 116     checks. While the same can be accomplished in ExpandNames and/or
 117     CheckPrereq, doing these separate is better because:
 118
 119       - ExpandNames is left as as purely a lock-related function
 120       - CheckPrereq is run after we have aquired locks (and possible
 121         waited for them)
 122
 123     The function is allowed to change the self.op attribute so that
 124     later methods can no longer worry about missing parameters.
 125
 126     """
 127     pass
 128
 129   def ExpandNames(self):
 130     """Expand names for this LU.
 131
 132     This method is called before starting to execute the opcode, and it should
 133     update all the parameters of the opcode to their canonical form (e.g. a
 134     short node name must be fully expanded after this method has successfully
 135     completed). This way locking, hooks, logging, ecc. can work correctly.
 136
 137     LUs which implement this method must also populate the self.needed_locks
 138     member, as a dict with lock levels as keys, and a list of needed lock names
 139     as values. Rules:
 140
 141       - use an empty dict if you don't need any lock
 142       - if you don't need any lock at a particular level omit that level
 143       - don't put anything for the BGL level
 144       - if you want all locks at a level use locking.ALL_SET as a value
 145
 146     If you need to share locks (rather than acquire them exclusively) at one
 147     level you can modify self.share_locks, setting a true value (usually 1) for
 148     that level. By default locks are not shared.
 149
 150     Examples::
 151
 152       # Acquire all nodes and one instance
 153       self.needed_locks = {
 154         locking.LEVEL_NODE: locking.ALL_SET,
 155         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 156       }
 157       # Acquire just two nodes
 158       self.needed_locks = {
 159         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 160       }
 161       # Acquire no locks
 162       self.needed_locks = {} # No, you can't leave it to the default value None
 163
 164     """
 165     # The implementation of this method is mandatory only if the new LU is
 166     # concurrent, so that old LUs don't need to be changed all at the same
 167     # time.
 168     if self.REQ_BGL:
 169       self.needed_locks = {} # Exclusive LUs don't need locks.
 170     else:
 171       raise NotImplementedError
 172
 173   def DeclareLocks(self, level):
 174     """Declare LU locking needs for a level
 175
 176     While most LUs can just declare their locking needs at ExpandNames time,
 177     sometimes there's the need to calculate some locks after having acquired
 178     the ones before. This function is called just before acquiring locks at a
 179     particular level, but after acquiring the ones at lower levels, and permits
 180     such calculations. It can be used to modify self.needed_locks, and by
 181     default it does nothing.
 182
 183     This function is only called if you have something already set in
 184     self.needed_locks for the level.
 185
 186     @param level: Locking level which is going to be locked
 187     @type level: member of ganeti.locking.LEVELS
 188
 189     """
 190
 191   def CheckPrereq(self):
 192     """Check prerequisites for this LU.
 193
 194     This method should check that the prerequisites for the execution
 195     of this LU are fulfilled. It can do internode communication, but
 196     it should be idempotent - no cluster or system changes are
 197     allowed.
 198
 199     The method should raise errors.OpPrereqError in case something is
 200     not fulfilled. Its return value is ignored.
 201
 202     This method should also update all the parameters of the opcode to
 203     their canonical form if it hasn't been done by ExpandNames before.
 204
 205     """
 206     raise NotImplementedError
 207
 208   def Exec(self, feedback_fn):
 209     """Execute the LU.
 210
 211     This method should implement the actual work. It should raise
 212     errors.OpExecError for failures that are somewhat dealt with in
 213     code, or expected.
 214
 215     """
 216     raise NotImplementedError
 217
 218   def BuildHooksEnv(self):
 219     """Build hooks environment for this LU.
 220
 221     This method should return a three-node tuple consisting of: a dict
 222     containing the environment that will be used for running the
 223     specific hook for this LU, a list of node names on which the hook
 224     should run before the execution, and a list of node names on which
 225     the hook should run after the execution.
 226
 227     The keys of the dict must not have 'GANETI_' prefixed as this will
 228     be handled in the hooks runner. Also note additional keys will be
 229     added by the hooks runner. If the LU doesn't define any
 230     environment, an empty dict (and not None) should be returned.
 231
 232     No nodes should be returned as an empty list (and not None).
 233
 234     Note that if the HPATH for a LU class is None, this function will
 235     not be called.
 236
 237     """
 238     raise NotImplementedError
 239
 240   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 241     """Notify the LU about the results of its hooks.
 242
 243     This method is called every time a hooks phase is executed, and notifies
 244     the Logical Unit about the hooks' result. The LU can then use it to alter
 245     its result based on the hooks.  By default the method does nothing and the
 246     previous result is passed back unchanged but any LU can define it if it
 247     wants to use the local cluster hook-scripts somehow.
 248
 249     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 250         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 251     @param hook_results: the results of the multi-node hooks rpc call
 252     @param feedback_fn: function used send feedback back to the caller
 253     @param lu_result: the previous Exec result this LU had, or None
 254         in the PRE phase
 255     @return: the new Exec result, based on the previous result
 256         and hook results
 257
 258     """
 259     return lu_result
 260
 261   def _ExpandAndLockInstance(self):
 262     """Helper function to expand and lock an instance.
 263
 264     Many LUs that work on an instance take its name in self.op.instance_name
 265     and need to expand it and then declare the expanded name for locking. This
 266     function does it, and then updates self.op.instance_name to the expanded
 267     name. It also initializes needed_locks as a dict, if this hasn't been done
 268     before.
 269
 270     """
 271     if self.needed_locks is None:
 272       self.needed_locks = {}
 273     else:
 274       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 275         "_ExpandAndLockInstance called with instance-level locks set"
 276     expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
 277     if expanded_name is None:
 278       raise errors.OpPrereqError("Instance '%s' not known" %
 279                                   self.op.instance_name)
 280     self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
 281     self.op.instance_name = expanded_name
 282
 283   def _LockInstancesNodes(self, primary_only=False):
 284     """Helper function to declare instances' nodes for locking.
 285
 286     This function should be called after locking one or more instances to lock
 287     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 288     with all primary or secondary nodes for instances already locked and
 289     present in self.needed_locks[locking.LEVEL_INSTANCE].
 290
 291     It should be called from DeclareLocks, and for safety only works if
 292     self.recalculate_locks[locking.LEVEL_NODE] is set.
 293
 294     In the future it may grow parameters to just lock some instance's nodes, or
 295     to just lock primaries or secondary nodes, if needed.
 296
 297     If should be called in DeclareLocks in a way similar to::
 298
 299       if level == locking.LEVEL_NODE:
 300         self._LockInstancesNodes()
 301
 302     @type primary_only: boolean
 303     @param primary_only: only lock primary nodes of locked instances
 304
 305     """
 306     assert locking.LEVEL_NODE in self.recalculate_locks, \
 307       "_LockInstancesNodes helper function called with no nodes to recalculate"
 308
 309     # TODO: check if we're really been called with the instance locks held
 310
 311     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 312     # future we might want to have different behaviors depending on the value
 313     # of self.recalculate_locks[locking.LEVEL_NODE]
 314     wanted_nodes = []
 315     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 316       instance = self.context.cfg.GetInstanceInfo(instance_name)
 317       wanted_nodes.append(instance.primary_node)
 318       if not primary_only:
 319         wanted_nodes.extend(instance.secondary_nodes)
 320
 321     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 322       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 323     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 324       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 325
 326     del self.recalculate_locks[locking.LEVEL_NODE]
 327
 328
 329 class NoHooksLU(LogicalUnit):
 330   """Simple LU which runs no hooks.
 331
 332   This LU is intended as a parent for other LogicalUnits which will
 333   run no hooks, in order to reduce duplicate code.
 334
 335   """
 336   HPATH = None
 337   HTYPE = None
 338
 339
 340 def _GetWantedNodes(lu, nodes):
 341   """Returns list of checked and expanded node names.
 342
 343   @type lu: L{LogicalUnit}
 344   @param lu: the logical unit on whose behalf we execute
 345   @type nodes: list
 346   @param nodes: list of node names or None for all nodes
 347   @rtype: list
 348   @return: the list of nodes, sorted
 349   @raise errors.OpProgrammerError: if the nodes parameter is wrong type
 350
 351   """
 352   if not isinstance(nodes, list):
 353     raise errors.OpPrereqError("Invalid argument type 'nodes'")
 354
 355   if not nodes:
 356     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 357       " non-empty list of nodes whose name is to be expanded.")
 358
 359   wanted = []
 360   for name in nodes:
 361     node = lu.cfg.ExpandNodeName(name)
 362     if node is None:
 363       raise errors.OpPrereqError("No such node name '%s'" % name)
 364     wanted.append(node)
 365
 366   return utils.NiceSort(wanted)
 367
 368
 369 def _GetWantedInstances(lu, instances):
 370   """Returns list of checked and expanded instance names.
 371
 372   @type lu: L{LogicalUnit}
 373   @param lu: the logical unit on whose behalf we execute
 374   @type instances: list
 375   @param instances: list of instance names or None for all instances
 376   @rtype: list
 377   @return: the list of instances, sorted
 378   @raise errors.OpPrereqError: if the instances parameter is wrong type
 379   @raise errors.OpPrereqError: if any of the passed instances is not found
 380
 381   """
 382   if not isinstance(instances, list):
 383     raise errors.OpPrereqError("Invalid argument type 'instances'")
 384
 385   if instances:
 386     wanted = []
 387
 388     for name in instances:
 389       instance = lu.cfg.ExpandInstanceName(name)
 390       if instance is None:
 391         raise errors.OpPrereqError("No such instance name '%s'" % name)
 392       wanted.append(instance)
 393
 394   else:
 395     wanted = lu.cfg.GetInstanceList()
 396   return utils.NiceSort(wanted)
 397
 398
 399 def _CheckOutputFields(static, dynamic, selected):
 400   """Checks whether all selected fields are valid.
 401
 402   @type static: L{utils.FieldSet}
 403   @param static: static fields set
 404   @type dynamic: L{utils.FieldSet}
 405   @param dynamic: dynamic fields set
 406
 407   """
 408   f = utils.FieldSet()
 409   f.Extend(static)
 410   f.Extend(dynamic)
 411
 412   delta = f.NonMatching(selected)
 413   if delta:
 414     raise errors.OpPrereqError("Unknown output fields selected: %s"
 415                                % ",".join(delta))
 416
 417
 418 def _CheckBooleanOpField(op, name):
 419   """Validates boolean opcode parameters.
 420
 421   This will ensure that an opcode parameter is either a boolean value,
 422   or None (but that it always exists).
 423
 424   """
 425   val = getattr(op, name, None)
 426   if not (val is None or isinstance(val, bool)):
 427     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 428                                (name, str(val)))
 429   setattr(op, name, val)
 430
 431
 432 def _CheckNodeOnline(lu, node):
 433   """Ensure that a given node is online.
 434
 435   @param lu: the LU on behalf of which we make the check
 436   @param node: the node to check
 437   @raise errors.OpPrereqError: if the nodes is offline
 438
 439   """
 440   if lu.cfg.GetNodeInfo(node).offline:
 441     raise errors.OpPrereqError("Can't use offline node %s" % node)
 442
 443
 444 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 445                           memory, vcpus, nics):
 446   """Builds instance related env variables for hooks
 447
 448   This builds the hook environment from individual variables.
 449
 450   @type name: string
 451   @param name: the name of the instance
 452   @type primary_node: string
 453   @param primary_node: the name of the instance's primary node
 454   @type secondary_nodes: list
 455   @param secondary_nodes: list of secondary nodes as strings
 456   @type os_type: string
 457   @param os_type: the name of the instance's OS
 458   @type status: string
 459   @param status: the desired status of the instances
 460   @type memory: string
 461   @param memory: the memory size of the instance
 462   @type vcpus: string
 463   @param vcpus: the count of VCPUs the instance has
 464   @type nics: list
 465   @param nics: list of tuples (ip, bridge, mac) representing
 466       the NICs the instance  has
 467   @rtype: dict
 468   @return: the hook environment for this instance
 469
 470   """
 471   env = {
 472     "OP_TARGET": name,
 473     "INSTANCE_NAME": name,
 474     "INSTANCE_PRIMARY": primary_node,
 475     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 476     "INSTANCE_OS_TYPE": os_type,
 477     "INSTANCE_STATUS": status,
 478     "INSTANCE_MEMORY": memory,
 479     "INSTANCE_VCPUS": vcpus,
 480   }
 481
 482   if nics:
 483     nic_count = len(nics)
 484     for idx, (ip, bridge, mac) in enumerate(nics):
 485       if ip is None:
 486         ip = ""
 487       env["INSTANCE_NIC%d_IP" % idx] = ip
 488       env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
 489       env["INSTANCE_NIC%d_HWADDR" % idx] = mac
 490   else:
 491     nic_count = 0
 492
 493   env["INSTANCE_NIC_COUNT"] = nic_count
 494
 495   return env
 496
 497
 498 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 499   """Builds instance related env variables for hooks from an object.
 500
 501   @type lu: L{LogicalUnit}
 502   @param lu: the logical unit on whose behalf we execute
 503   @type instance: L{objects.Instance}
 504   @param instance: the instance for which we should build the
 505       environment
 506   @type override: dict
 507   @param override: dictionary with key/values that will override
 508       our values
 509   @rtype: dict
 510   @return: the hook environment dictionary
 511
 512   """
 513   bep = lu.cfg.GetClusterInfo().FillBE(instance)
 514   args = {
 515     'name': instance.name,
 516     'primary_node': instance.primary_node,
 517     'secondary_nodes': instance.secondary_nodes,
 518     'os_type': instance.os,
 519     'status': instance.os,
 520     'memory': bep[constants.BE_MEMORY],
 521     'vcpus': bep[constants.BE_VCPUS],
 522     'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
 523   }
 524   if override:
 525     args.update(override)
 526   return _BuildInstanceHookEnv(**args)
 527
 528
 529 def _AdjustCandidatePool(lu):
 530   """Adjust the candidate pool after node operations.
 531
 532   """
 533   mod_list = lu.cfg.MaintainCandidatePool()
 534   if mod_list:
 535     lu.LogInfo("Promoted nodes to master candidate role: %s",
 536                ", ".join(node.name for node in mod_list))
 537     for name in mod_list:
 538       lu.context.ReaddNode(name)
 539   mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
 540   if mc_now > mc_max:
 541     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 542                (mc_now, mc_max))
 543
 544
 545 def _CheckInstanceBridgesExist(lu, instance):
 546   """Check that the brigdes needed by an instance exist.
 547
 548   """
 549   # check bridges existance
 550   brlist = [nic.bridge for nic in instance.nics]
 551   result = lu.rpc.call_bridges_exist(instance.primary_node, brlist)
 552   result.Raise()
 553   if not result.data:
 554     raise errors.OpPrereqError("One or more target bridges %s does not"
 555                                " exist on destination node '%s'" %
 556                                (brlist, instance.primary_node))
 557
 558
 559 class LUDestroyCluster(NoHooksLU):
 560   """Logical unit for destroying the cluster.
 561
 562   """
 563   _OP_REQP = []
 564
 565   def CheckPrereq(self):
 566     """Check prerequisites.
 567
 568     This checks whether the cluster is empty.
 569
 570     Any errors are signalled by raising errors.OpPrereqError.
 571
 572     """
 573     master = self.cfg.GetMasterNode()
 574
 575     nodelist = self.cfg.GetNodeList()
 576     if len(nodelist) != 1 or nodelist[0] != master:
 577       raise errors.OpPrereqError("There are still %d node(s) in"
 578                                  " this cluster." % (len(nodelist) - 1))
 579     instancelist = self.cfg.GetInstanceList()
 580     if instancelist:
 581       raise errors.OpPrereqError("There are still %d instance(s) in"
 582                                  " this cluster." % len(instancelist))
 583
 584   def Exec(self, feedback_fn):
 585     """Destroys the cluster.
 586
 587     """
 588     master = self.cfg.GetMasterNode()
 589     result = self.rpc.call_node_stop_master(master, False)
 590     result.Raise()
 591     if not result.data:
 592       raise errors.OpExecError("Could not disable the master role")
 593     priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
 594     utils.CreateBackup(priv_key)
 595     utils.CreateBackup(pub_key)
 596     return master
 597
 598
 599 class LUVerifyCluster(LogicalUnit):
 600   """Verifies the cluster status.
 601
 602   """
 603   HPATH = "cluster-verify"
 604   HTYPE = constants.HTYPE_CLUSTER
 605   _OP_REQP = ["skip_checks"]
 606   REQ_BGL = False
 607
 608   def ExpandNames(self):
 609     self.needed_locks = {
 610       locking.LEVEL_NODE: locking.ALL_SET,
 611       locking.LEVEL_INSTANCE: locking.ALL_SET,
 612     }
 613     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
 614
 615   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
 616                   node_result, feedback_fn, master_files):
 617     """Run multiple tests against a node.
 618
 619     Test list:
 620
 621       - compares ganeti version
 622       - checks vg existance and size > 20G
 623       - checks config file checksum
 624       - checks ssh to other nodes
 625
 626     @type nodeinfo: L{objects.Node}
 627     @param nodeinfo: the node to check
 628     @param file_list: required list of files
 629     @param local_cksum: dictionary of local files and their checksums
 630     @param node_result: the results from the node
 631     @param feedback_fn: function used to accumulate results
 632     @param master_files: list of files that only masters should have
 633
 634     """
 635     node = nodeinfo.name
 636
 637     # main result, node_result should be a non-empty dict
 638     if not node_result or not isinstance(node_result, dict):
 639       feedback_fn("  - ERROR: unable to verify node %s." % (node,))
 640       return True
 641
 642     # compares ganeti version
 643     local_version = constants.PROTOCOL_VERSION
 644     remote_version = node_result.get('version', None)
 645     if not remote_version:
 646       feedback_fn("  - ERROR: connection to %s failed" % (node))
 647       return True
 648
 649     if local_version != remote_version:
 650       feedback_fn("  - ERROR: sw version mismatch: master %s, node(%s) %s" %
 651                       (local_version, node, remote_version))
 652       return True
 653
 654     # checks vg existance and size > 20G
 655
 656     bad = False
 657     vglist = node_result.get(constants.NV_VGLIST, None)
 658     if not vglist:
 659       feedback_fn("  - ERROR: unable to check volume groups on node %s." %
 660                       (node,))
 661       bad = True
 662     else:
 663       vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
 664                                             constants.MIN_VG_SIZE)
 665       if vgstatus:
 666         feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
 667         bad = True
 668
 669     # checks config file checksum
 670
 671     remote_cksum = node_result.get(constants.NV_FILELIST, None)
 672     if not isinstance(remote_cksum, dict):
 673       bad = True
 674       feedback_fn("  - ERROR: node hasn't returned file checksum data")
 675     else:
 676       for file_name in file_list:
 677         node_is_mc = nodeinfo.master_candidate
 678         must_have_file = file_name not in master_files
 679         if file_name not in remote_cksum:
 680           if node_is_mc or must_have_file:
 681             bad = True
 682             feedback_fn("  - ERROR: file '%s' missing" % file_name)
 683         elif remote_cksum[file_name] != local_cksum[file_name]:
 684           if node_is_mc or must_have_file:
 685             bad = True
 686             feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
 687           else:
 688             # not candidate and this is not a must-have file
 689             bad = True
 690             feedback_fn("  - ERROR: non master-candidate has old/wrong file"
 691                         " '%s'" % file_name)
 692         else:
 693           # all good, except non-master/non-must have combination
 694           if not node_is_mc and not must_have_file:
 695             feedback_fn("  - ERROR: file '%s' should not exist on non master"
 696                         " candidates" % file_name)
 697
 698     # checks ssh to any
 699
 700     if constants.NV_NODELIST not in node_result:
 701       bad = True
 702       feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
 703     else:
 704       if node_result[constants.NV_NODELIST]:
 705         bad = True
 706         for node in node_result[constants.NV_NODELIST]:
 707           feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
 708                           (node, node_result[constants.NV_NODELIST][node]))
 709
 710     if constants.NV_NODENETTEST not in node_result:
 711       bad = True
 712       feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
 713     else:
 714       if node_result[constants.NV_NODENETTEST]:
 715         bad = True
 716         nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
 717         for node in nlist:
 718           feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
 719                           (node, node_result[constants.NV_NODENETTEST][node]))
 720
 721     hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
 722     if isinstance(hyp_result, dict):
 723       for hv_name, hv_result in hyp_result.iteritems():
 724         if hv_result is not None:
 725           feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
 726                       (hv_name, hv_result))
 727     return bad
 728
 729   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
 730                       node_instance, feedback_fn, n_offline):
 731     """Verify an instance.
 732
 733     This function checks to see if the required block devices are
 734     available on the instance's node.
 735
 736     """
 737     bad = False
 738
 739     node_current = instanceconfig.primary_node
 740
 741     node_vol_should = {}
 742     instanceconfig.MapLVsByNode(node_vol_should)
 743
 744     for node in node_vol_should:
 745       if node in n_offline:
 746         # ignore missing volumes on offline nodes
 747         continue
 748       for volume in node_vol_should[node]:
 749         if node not in node_vol_is or volume not in node_vol_is[node]:
 750           feedback_fn("  - ERROR: volume %s missing on node %s" %
 751                           (volume, node))
 752           bad = True
 753
 754     if not instanceconfig.status == 'down':
 755       if ((node_current not in node_instance or
 756           not instance in node_instance[node_current]) and
 757           node_current not in n_offline):
 758         feedback_fn("  - ERROR: instance %s not running on node %s" %
 759                         (instance, node_current))
 760         bad = True
 761
 762     for node in node_instance:
 763       if (not node == node_current):
 764         if instance in node_instance[node]:
 765           feedback_fn("  - ERROR: instance %s should not run on node %s" %
 766                           (instance, node))
 767           bad = True
 768
 769     return bad
 770
 771   def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
 772     """Verify if there are any unknown volumes in the cluster.
 773
 774     The .os, .swap and backup volumes are ignored. All other volumes are
 775     reported as unknown.
 776
 777     """
 778     bad = False
 779
 780     for node in node_vol_is:
 781       for volume in node_vol_is[node]:
 782         if node not in node_vol_should or volume not in node_vol_should[node]:
 783           feedback_fn("  - ERROR: volume %s on node %s should not exist" %
 784                       (volume, node))
 785           bad = True
 786     return bad
 787
 788   def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
 789     """Verify the list of running instances.
 790
 791     This checks what instances are running but unknown to the cluster.
 792
 793     """
 794     bad = False
 795     for node in node_instance:
 796       for runninginstance in node_instance[node]:
 797         if runninginstance not in instancelist:
 798           feedback_fn("  - ERROR: instance %s on node %s should not exist" %
 799                           (runninginstance, node))
 800           bad = True
 801     return bad
 802
 803   def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
 804     """Verify N+1 Memory Resilience.
 805
 806     Check that if one single node dies we can still start all the instances it
 807     was primary for.
 808
 809     """
 810     bad = False
 811
 812     for node, nodeinfo in node_info.iteritems():
 813       # This code checks that every node which is now listed as secondary has
 814       # enough memory to host all instances it is supposed to should a single
 815       # other node in the cluster fail.
 816       # FIXME: not ready for failover to an arbitrary node
 817       # FIXME: does not support file-backed instances
 818       # WARNING: we currently take into account down instances as well as up
 819       # ones, considering that even if they're down someone might want to start
 820       # them even in the event of a node failure.
 821       for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
 822         needed_mem = 0
 823         for instance in instances:
 824           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
 825           if bep[constants.BE_AUTO_BALANCE]:
 826             needed_mem += bep[constants.BE_MEMORY]
 827         if nodeinfo['mfree'] < needed_mem:
 828           feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
 829                       " failovers should node %s fail" % (node, prinode))
 830           bad = True
 831     return bad
 832
 833   def CheckPrereq(self):
 834     """Check prerequisites.
 835
 836     Transform the list of checks we're going to skip into a set and check that
 837     all its members are valid.
 838
 839     """
 840     self.skip_set = frozenset(self.op.skip_checks)
 841     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
 842       raise errors.OpPrereqError("Invalid checks to be skipped specified")
 843
 844   def BuildHooksEnv(self):
 845     """Build hooks env.
 846
 847     Cluster-Verify hooks just rone in the post phase and their failure makes
 848     the output be logged in the verify output and the verification to fail.
 849
 850     """
 851     all_nodes = self.cfg.GetNodeList()
 852     # TODO: populate the environment with useful information for verify hooks
 853     env = {}
 854     return env, [], all_nodes
 855
 856   def Exec(self, feedback_fn):
 857     """Verify integrity of cluster, performing various test on nodes.
 858
 859     """
 860     bad = False
 861     feedback_fn("* Verifying global settings")
 862     for msg in self.cfg.VerifyConfig():
 863       feedback_fn("  - ERROR: %s" % msg)
 864
 865     vg_name = self.cfg.GetVGName()
 866     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
 867     nodelist = utils.NiceSort(self.cfg.GetNodeList())
 868     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
 869     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
 870     i_non_redundant = [] # Non redundant instances
 871     i_non_a_balanced = [] # Non auto-balanced instances
 872     n_offline = [] # List of offline nodes
 873     node_volume = {}
 874     node_instance = {}
 875     node_info = {}
 876     instance_cfg = {}
 877
 878     # FIXME: verify OS list
 879     # do local checksums
 880     master_files = [constants.CLUSTER_CONF_FILE]
 881
 882     file_names = ssconf.SimpleStore().GetFileList()
 883     file_names.append(constants.SSL_CERT_FILE)
 884     file_names.append(constants.RAPI_CERT_FILE)
 885     file_names.extend(master_files)
 886
 887     local_checksums = utils.FingerprintFiles(file_names)
 888
 889     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
 890     node_verify_param = {
 891       constants.NV_FILELIST: file_names,
 892       constants.NV_NODELIST: [node.name for node in nodeinfo
 893                               if not node.offline],
 894       constants.NV_HYPERVISOR: hypervisors,
 895       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
 896                                   node.secondary_ip) for node in nodeinfo
 897                                  if not node.offline],
 898       constants.NV_LVLIST: vg_name,
 899       constants.NV_INSTANCELIST: hypervisors,
 900       constants.NV_VGLIST: None,
 901       constants.NV_VERSION: None,
 902       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
 903       }
 904     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
 905                                            self.cfg.GetClusterName())
 906
 907     cluster = self.cfg.GetClusterInfo()
 908     master_node = self.cfg.GetMasterNode()
 909     for node_i in nodeinfo:
 910       node = node_i.name
 911       nresult = all_nvinfo[node].data
 912
 913       if node_i.offline:
 914         feedback_fn("* Skipping offline node %s" % (node,))
 915         n_offline.append(node)
 916         continue
 917
 918       if node == master_node:
 919         ntype = "master"
 920       elif node_i.master_candidate:
 921         ntype = "master candidate"
 922       else:
 923         ntype = "regular"
 924       feedback_fn("* Verifying node %s (%s)" % (node, ntype))
 925
 926       if all_nvinfo[node].failed or not isinstance(nresult, dict):
 927         feedback_fn("  - ERROR: connection to %s failed" % (node,))
 928         bad = True
 929         continue
 930
 931       result = self._VerifyNode(node_i, file_names, local_checksums,
 932                                 nresult, feedback_fn, master_files)
 933       bad = bad or result
 934
 935       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
 936       if isinstance(lvdata, basestring):
 937         feedback_fn("  - ERROR: LVM problem on node %s: %s" %
 938                     (node, lvdata.encode('string_escape')))
 939         bad = True
 940         node_volume[node] = {}
 941       elif not isinstance(lvdata, dict):
 942         feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
 943         bad = True
 944         continue
 945       else:
 946         node_volume[node] = lvdata
 947
 948       # node_instance
 949       idata = nresult.get(constants.NV_INSTANCELIST, None)
 950       if not isinstance(idata, list):
 951         feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
 952                     (node,))
 953         bad = True
 954         continue
 955
 956       node_instance[node] = idata
 957
 958       # node_info
 959       nodeinfo = nresult.get(constants.NV_HVINFO, None)
 960       if not isinstance(nodeinfo, dict):
 961         feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
 962         bad = True
 963         continue
 964
 965       try:
 966         node_info[node] = {
 967           "mfree": int(nodeinfo['memory_free']),
 968           "dfree": int(nresult[constants.NV_VGLIST][vg_name]),
 969           "pinst": [],
 970           "sinst": [],
 971           # dictionary holding all instances this node is secondary for,
 972           # grouped by their primary node. Each key is a cluster node, and each
 973           # value is a list of instances which have the key as primary and the
 974           # current node as secondary.  this is handy to calculate N+1 memory
 975           # availability if you can only failover from a primary to its
 976           # secondary.
 977           "sinst-by-pnode": {},
 978         }
 979       except ValueError:
 980         feedback_fn("  - ERROR: invalid value returned from node %s" % (node,))
 981         bad = True
 982         continue
 983
 984     node_vol_should = {}
 985
 986     for instance in instancelist:
 987       feedback_fn("* Verifying instance %s" % instance)
 988       inst_config = self.cfg.GetInstanceInfo(instance)
 989       result =  self._VerifyInstance(instance, inst_config, node_volume,
 990                                      node_instance, feedback_fn, n_offline)
 991       bad = bad or result
 992       inst_nodes_offline = []
 993
 994       inst_config.MapLVsByNode(node_vol_should)
 995
 996       instance_cfg[instance] = inst_config
 997
 998       pnode = inst_config.primary_node
 999       if pnode in node_info:
1000         node_info[pnode]['pinst'].append(instance)
1001       elif pnode not in n_offline:
1002         feedback_fn("  - ERROR: instance %s, connection to primary node"
1003                     " %s failed" % (instance, pnode))
1004         bad = True
1005
1006       if pnode in n_offline:
1007         inst_nodes_offline.append(pnode)
1008
1009       # If the instance is non-redundant we cannot survive losing its primary
1010       # node, so we are not N+1 compliant. On the other hand we have no disk
1011       # templates with more than one secondary so that situation is not well
1012       # supported either.
1013       # FIXME: does not support file-backed instances
1014       if len(inst_config.secondary_nodes) == 0:
1015         i_non_redundant.append(instance)
1016       elif len(inst_config.secondary_nodes) > 1:
1017         feedback_fn("  - WARNING: multiple secondaries for instance %s"
1018                     % instance)
1019
1020       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1021         i_non_a_balanced.append(instance)
1022
1023       for snode in inst_config.secondary_nodes:
1024         if snode in node_info:
1025           node_info[snode]['sinst'].append(instance)
1026           if pnode not in node_info[snode]['sinst-by-pnode']:
1027             node_info[snode]['sinst-by-pnode'][pnode] = []
1028           node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1029         elif snode not in n_offline:
1030           feedback_fn("  - ERROR: instance %s, connection to secondary node"
1031                       " %s failed" % (instance, snode))
1032           bad = True
1033         if snode in n_offline:
1034           inst_nodes_offline.append(snode)
1035
1036       if inst_nodes_offline:
1037         # warn that the instance lives on offline nodes, and set bad=True
1038         feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1039                     ", ".join(inst_nodes_offline))
1040         bad = True
1041
1042     feedback_fn("* Verifying orphan volumes")
1043     result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1044                                        feedback_fn)
1045     bad = bad or result
1046
1047     feedback_fn("* Verifying remaining instances")
1048     result = self._VerifyOrphanInstances(instancelist, node_instance,
1049                                          feedback_fn)
1050     bad = bad or result
1051
1052     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1053       feedback_fn("* Verifying N+1 Memory redundancy")
1054       result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1055       bad = bad or result
1056
1057     feedback_fn("* Other Notes")
1058     if i_non_redundant:
1059       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1060                   % len(i_non_redundant))
1061
1062     if i_non_a_balanced:
1063       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1064                   % len(i_non_a_balanced))
1065
1066     if n_offline:
1067       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1068
1069     return not bad
1070
1071   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1072     """Analize the post-hooks' result
1073
1074     This method analyses the hook result, handles it, and sends some
1075     nicely-formatted feedback back to the user.
1076
1077     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1078         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1079     @param hooks_results: the results of the multi-node hooks rpc call
1080     @param feedback_fn: function used send feedback back to the caller
1081     @param lu_result: previous Exec result
1082     @return: the new Exec result, based on the previous result
1083         and hook results
1084
1085     """
1086     # We only really run POST phase hooks, and are only interested in
1087     # their results
1088     if phase == constants.HOOKS_PHASE_POST:
1089       # Used to change hooks' output to proper indentation
1090       indent_re = re.compile('^', re.M)
1091       feedback_fn("* Hooks Results")
1092       if not hooks_results:
1093         feedback_fn("  - ERROR: general communication failure")
1094         lu_result = 1
1095       else:
1096         for node_name in hooks_results:
1097           show_node_header = True
1098           res = hooks_results[node_name]
1099           if res.failed or res.data is False or not isinstance(res.data, list):
1100             if res.offline:
1101               # no need to warn or set fail return value
1102               continue
1103             feedback_fn("    Communication failure in hooks execution")
1104             lu_result = 1
1105             continue
1106           for script, hkr, output in res.data:
1107             if hkr == constants.HKR_FAIL:
1108               # The node header is only shown once, if there are
1109               # failing hooks on that node
1110               if show_node_header:
1111                 feedback_fn("  Node %s:" % node_name)
1112                 show_node_header = False
1113               feedback_fn("    ERROR: Script %s failed, output:" % script)
1114               output = indent_re.sub('      ', output)
1115               feedback_fn("%s" % output)
1116               lu_result = 1
1117
1118       return lu_result
1119
1120
1121 class LUVerifyDisks(NoHooksLU):
1122   """Verifies the cluster disks status.
1123
1124   """
1125   _OP_REQP = []
1126   REQ_BGL = False
1127
1128   def ExpandNames(self):
1129     self.needed_locks = {
1130       locking.LEVEL_NODE: locking.ALL_SET,
1131       locking.LEVEL_INSTANCE: locking.ALL_SET,
1132     }
1133     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1134
1135   def CheckPrereq(self):
1136     """Check prerequisites.
1137
1138     This has no prerequisites.
1139
1140     """
1141     pass
1142
1143   def Exec(self, feedback_fn):
1144     """Verify integrity of cluster disks.
1145
1146     """
1147     result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
1148
1149     vg_name = self.cfg.GetVGName()
1150     nodes = utils.NiceSort(self.cfg.GetNodeList())
1151     instances = [self.cfg.GetInstanceInfo(name)
1152                  for name in self.cfg.GetInstanceList()]
1153
1154     nv_dict = {}
1155     for inst in instances:
1156       inst_lvs = {}
1157       if (inst.status != "up" or
1158           inst.disk_template not in constants.DTS_NET_MIRROR):
1159         continue
1160       inst.MapLVsByNode(inst_lvs)
1161       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1162       for node, vol_list in inst_lvs.iteritems():
1163         for vol in vol_list:
1164           nv_dict[(node, vol)] = inst
1165
1166     if not nv_dict:
1167       return result
1168
1169     node_lvs = self.rpc.call_volume_list(nodes, vg_name)
1170
1171     to_act = set()
1172     for node in nodes:
1173       # node_volume
1174       lvs = node_lvs[node]
1175       if lvs.failed:
1176         if not lvs.offline:
1177           self.LogWarning("Connection to node %s failed: %s" %
1178                           (node, lvs.data))
1179         continue
1180       lvs = lvs.data
1181       if isinstance(lvs, basestring):
1182         logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
1183         res_nlvm[node] = lvs
1184       elif not isinstance(lvs, dict):
1185         logging.warning("Connection to node %s failed or invalid data"
1186                         " returned", node)
1187         res_nodes.append(node)
1188         continue
1189
1190       for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems():
1191         inst = nv_dict.pop((node, lv_name), None)
1192         if (not lv_online and inst is not None
1193             and inst.name not in res_instances):
1194           res_instances.append(inst.name)
1195
1196     # any leftover items in nv_dict are missing LVs, let's arrange the
1197     # data better
1198     for key, inst in nv_dict.iteritems():
1199       if inst.name not in res_missing:
1200         res_missing[inst.name] = []
1201       res_missing[inst.name].append(key)
1202
1203     return result
1204
1205
1206 class LURenameCluster(LogicalUnit):
1207   """Rename the cluster.
1208
1209   """
1210   HPATH = "cluster-rename"
1211   HTYPE = constants.HTYPE_CLUSTER
1212   _OP_REQP = ["name"]
1213
1214   def BuildHooksEnv(self):
1215     """Build hooks env.
1216
1217     """
1218     env = {
1219       "OP_TARGET": self.cfg.GetClusterName(),
1220       "NEW_NAME": self.op.name,
1221       }
1222     mn = self.cfg.GetMasterNode()
1223     return env, [mn], [mn]
1224
1225   def CheckPrereq(self):
1226     """Verify that the passed name is a valid one.
1227
1228     """
1229     hostname = utils.HostInfo(self.op.name)
1230
1231     new_name = hostname.name
1232     self.ip = new_ip = hostname.ip
1233     old_name = self.cfg.GetClusterName()
1234     old_ip = self.cfg.GetMasterIP()
1235     if new_name == old_name and new_ip == old_ip:
1236       raise errors.OpPrereqError("Neither the name nor the IP address of the"
1237                                  " cluster has changed")
1238     if new_ip != old_ip:
1239       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1240         raise errors.OpPrereqError("The given cluster IP address (%s) is"
1241                                    " reachable on the network. Aborting." %
1242                                    new_ip)
1243
1244     self.op.name = new_name
1245
1246   def Exec(self, feedback_fn):
1247     """Rename the cluster.
1248
1249     """
1250     clustername = self.op.name
1251     ip = self.ip
1252
1253     # shutdown the master IP
1254     master = self.cfg.GetMasterNode()
1255     result = self.rpc.call_node_stop_master(master, False)
1256     if result.failed or not result.data:
1257       raise errors.OpExecError("Could not disable the master role")
1258
1259     try:
1260       cluster = self.cfg.GetClusterInfo()
1261       cluster.cluster_name = clustername
1262       cluster.master_ip = ip
1263       self.cfg.Update(cluster)
1264
1265       # update the known hosts file
1266       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1267       node_list = self.cfg.GetNodeList()
1268       try:
1269         node_list.remove(master)
1270       except ValueError:
1271         pass
1272       result = self.rpc.call_upload_file(node_list,
1273                                          constants.SSH_KNOWN_HOSTS_FILE)
1274       for to_node, to_result in result.iteritems():
1275         if to_result.failed or not to_result.data:
1276           logging.error("Copy of file %s to node %s failed",
1277                         constants.SSH_KNOWN_HOSTS_FILE, to_node)
1278
1279     finally:
1280       result = self.rpc.call_node_start_master(master, False)
1281       if result.failed or not result.data:
1282         self.LogWarning("Could not re-enable the master role on"
1283                         " the master, please restart manually.")
1284
1285
1286 def _RecursiveCheckIfLVMBased(disk):
1287   """Check if the given disk or its children are lvm-based.
1288
1289   @type disk: L{objects.Disk}
1290   @param disk: the disk to check
1291   @rtype: booleean
1292   @return: boolean indicating whether a LD_LV dev_type was found or not
1293
1294   """
1295   if disk.children:
1296     for chdisk in disk.children:
1297       if _RecursiveCheckIfLVMBased(chdisk):
1298         return True
1299   return disk.dev_type == constants.LD_LV
1300
1301
1302 class LUSetClusterParams(LogicalUnit):
1303   """Change the parameters of the cluster.
1304
1305   """
1306   HPATH = "cluster-modify"
1307   HTYPE = constants.HTYPE_CLUSTER
1308   _OP_REQP = []
1309   REQ_BGL = False
1310
1311   def CheckParameters(self):
1312     """Check parameters
1313
1314     """
1315     if not hasattr(self.op, "candidate_pool_size"):
1316       self.op.candidate_pool_size = None
1317     if self.op.candidate_pool_size is not None:
1318       try:
1319         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1320       except ValueError, err:
1321         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1322                                    str(err))
1323       if self.op.candidate_pool_size < 1:
1324         raise errors.OpPrereqError("At least one master candidate needed")
1325
1326   def ExpandNames(self):
1327     # FIXME: in the future maybe other cluster params won't require checking on
1328     # all nodes to be modified.
1329     self.needed_locks = {
1330       locking.LEVEL_NODE: locking.ALL_SET,
1331     }
1332     self.share_locks[locking.LEVEL_NODE] = 1
1333
1334   def BuildHooksEnv(self):
1335     """Build hooks env.
1336
1337     """
1338     env = {
1339       "OP_TARGET": self.cfg.GetClusterName(),
1340       "NEW_VG_NAME": self.op.vg_name,
1341       }
1342     mn = self.cfg.GetMasterNode()
1343     return env, [mn], [mn]
1344
1345   def CheckPrereq(self):
1346     """Check prerequisites.
1347
1348     This checks whether the given params don't conflict and
1349     if the given volume group is valid.
1350
1351     """
1352     # FIXME: This only works because there is only one parameter that can be
1353     # changed or removed.
1354     if self.op.vg_name is not None and not self.op.vg_name:
1355       instances = self.cfg.GetAllInstancesInfo().values()
1356       for inst in instances:
1357         for disk in inst.disks:
1358           if _RecursiveCheckIfLVMBased(disk):
1359             raise errors.OpPrereqError("Cannot disable lvm storage while"
1360                                        " lvm-based instances exist")
1361
1362     node_list = self.acquired_locks[locking.LEVEL_NODE]
1363
1364     # if vg_name not None, checks given volume group on all nodes
1365     if self.op.vg_name:
1366       vglist = self.rpc.call_vg_list(node_list)
1367       for node in node_list:
1368         if vglist[node].failed:
1369           # ignoring down node
1370           self.LogWarning("Node %s unreachable/error, ignoring" % node)
1371           continue
1372         vgstatus = utils.CheckVolumeGroupSize(vglist[node].data,
1373                                               self.op.vg_name,
1374                                               constants.MIN_VG_SIZE)
1375         if vgstatus:
1376           raise errors.OpPrereqError("Error on node '%s': %s" %
1377                                      (node, vgstatus))
1378
1379     self.cluster = cluster = self.cfg.GetClusterInfo()
1380     # validate beparams changes
1381     if self.op.beparams:
1382       utils.CheckBEParams(self.op.beparams)
1383       self.new_beparams = cluster.FillDict(
1384         cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
1385
1386     # hypervisor list/parameters
1387     self.new_hvparams = cluster.FillDict(cluster.hvparams, {})
1388     if self.op.hvparams:
1389       if not isinstance(self.op.hvparams, dict):
1390         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1391       for hv_name, hv_dict in self.op.hvparams.items():
1392         if hv_name not in self.new_hvparams:
1393           self.new_hvparams[hv_name] = hv_dict
1394         else:
1395           self.new_hvparams[hv_name].update(hv_dict)
1396
1397     if self.op.enabled_hypervisors is not None:
1398       self.hv_list = self.op.enabled_hypervisors
1399     else:
1400       self.hv_list = cluster.enabled_hypervisors
1401
1402     if self.op.hvparams or self.op.enabled_hypervisors is not None:
1403       # either the enabled list has changed, or the parameters have, validate
1404       for hv_name, hv_params in self.new_hvparams.items():
1405         if ((self.op.hvparams and hv_name in self.op.hvparams) or
1406             (self.op.enabled_hypervisors and
1407              hv_name in self.op.enabled_hypervisors)):
1408           # either this is a new hypervisor, or its parameters have changed
1409           hv_class = hypervisor.GetHypervisor(hv_name)
1410           hv_class.CheckParameterSyntax(hv_params)
1411           _CheckHVParams(self, node_list, hv_name, hv_params)
1412
1413   def Exec(self, feedback_fn):
1414     """Change the parameters of the cluster.
1415
1416     """
1417     if self.op.vg_name is not None:
1418       if self.op.vg_name != self.cfg.GetVGName():
1419         self.cfg.SetVGName(self.op.vg_name)
1420       else:
1421         feedback_fn("Cluster LVM configuration already in desired"
1422                     " state, not changing")
1423     if self.op.hvparams:
1424       self.cluster.hvparams = self.new_hvparams
1425     if self.op.enabled_hypervisors is not None:
1426       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1427     if self.op.beparams:
1428       self.cluster.beparams[constants.BEGR_DEFAULT] = self.new_beparams
1429     if self.op.candidate_pool_size is not None:
1430       self.cluster.candidate_pool_size = self.op.candidate_pool_size
1431
1432     self.cfg.Update(self.cluster)
1433
1434     # we want to update nodes after the cluster so that if any errors
1435     # happen, we have recorded and saved the cluster info
1436     if self.op.candidate_pool_size is not None:
1437       _AdjustCandidatePool(self)
1438
1439
1440 class LURedistributeConfig(NoHooksLU):
1441   """Force the redistribution of cluster configuration.
1442
1443   This is a very simple LU.
1444
1445   """
1446   _OP_REQP = []
1447   REQ_BGL = False
1448
1449   def ExpandNames(self):
1450     self.needed_locks = {
1451       locking.LEVEL_NODE: locking.ALL_SET,
1452     }
1453     self.share_locks[locking.LEVEL_NODE] = 1
1454
1455   def CheckPrereq(self):
1456     """Check prerequisites.
1457
1458     """
1459
1460   def Exec(self, feedback_fn):
1461     """Redistribute the configuration.
1462
1463     """
1464     self.cfg.Update(self.cfg.GetClusterInfo())
1465
1466
1467 def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1468   """Sleep and poll for an instance's disk to sync.
1469
1470   """
1471   if not instance.disks:
1472     return True
1473
1474   if not oneshot:
1475     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1476
1477   node = instance.primary_node
1478
1479   for dev in instance.disks:
1480     lu.cfg.SetDiskID(dev, node)
1481
1482   retries = 0
1483   while True:
1484     max_time = 0
1485     done = True
1486     cumul_degraded = False
1487     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1488     if rstats.failed or not rstats.data:
1489       lu.LogWarning("Can't get any data from node %s", node)
1490       retries += 1
1491       if retries >= 10:
1492         raise errors.RemoteError("Can't contact node %s for mirror data,"
1493                                  " aborting." % node)
1494       time.sleep(6)
1495       continue
1496     rstats = rstats.data
1497     retries = 0
1498     for i in range(len(rstats)):
1499       mstat = rstats[i]
1500       if mstat is None:
1501         lu.LogWarning("Can't compute data for node %s/%s",
1502                            node, instance.disks[i].iv_name)
1503         continue
1504       # we ignore the ldisk parameter
1505       perc_done, est_time, is_degraded, _ = mstat
1506       cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1507       if perc_done is not None:
1508         done = False
1509         if est_time is not None:
1510           rem_time = "%d estimated seconds remaining" % est_time
1511           max_time = est_time
1512         else:
1513           rem_time = "no time estimate"
1514         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1515                         (instance.disks[i].iv_name, perc_done, rem_time))
1516     if done or oneshot:
1517       break
1518
1519     time.sleep(min(60, max_time))
1520
1521   if done:
1522     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1523   return not cumul_degraded
1524
1525
1526 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1527   """Check that mirrors are not degraded.
1528
1529   The ldisk parameter, if True, will change the test from the
1530   is_degraded attribute (which represents overall non-ok status for
1531   the device(s)) to the ldisk (representing the local storage status).
1532
1533   """
1534   lu.cfg.SetDiskID(dev, node)
1535   if ldisk:
1536     idx = 6
1537   else:
1538     idx = 5
1539
1540   result = True
1541   if on_primary or dev.AssembleOnSecondary():
1542     rstats = lu.rpc.call_blockdev_find(node, dev)
1543     if rstats.failed or not rstats.data:
1544       logging.warning("Node %s: disk degraded, not found or node down", node)
1545       result = False
1546     else:
1547       result = result and (not rstats.data[idx])
1548   if dev.children:
1549     for child in dev.children:
1550       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1551
1552   return result
1553
1554
1555 class LUDiagnoseOS(NoHooksLU):
1556   """Logical unit for OS diagnose/query.
1557
1558   """
1559   _OP_REQP = ["output_fields", "names"]
1560   REQ_BGL = False
1561   _FIELDS_STATIC = utils.FieldSet()
1562   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1563
1564   def ExpandNames(self):
1565     if self.op.names:
1566       raise errors.OpPrereqError("Selective OS query not supported")
1567
1568     _CheckOutputFields(static=self._FIELDS_STATIC,
1569                        dynamic=self._FIELDS_DYNAMIC,
1570                        selected=self.op.output_fields)
1571
1572     # Lock all nodes, in shared mode
1573     self.needed_locks = {}
1574     self.share_locks[locking.LEVEL_NODE] = 1
1575     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1576
1577   def CheckPrereq(self):
1578     """Check prerequisites.
1579
1580     """
1581
1582   @staticmethod
1583   def _DiagnoseByOS(node_list, rlist):
1584     """Remaps a per-node return list into an a per-os per-node dictionary
1585
1586     @param node_list: a list with the names of all nodes
1587     @param rlist: a map with node names as keys and OS objects as values
1588
1589     @rtype: dict
1590     @returns: a dictionary with osnames as keys and as value another map, with
1591         nodes as keys and list of OS objects as values, eg::
1592
1593           {"debian-etch": {"node1": [<object>,...],
1594                            "node2": [<object>,]}
1595           }
1596
1597     """
1598     all_os = {}
1599     for node_name, nr in rlist.iteritems():
1600       if nr.failed or not nr.data:
1601         continue
1602       for os_obj in nr.data:
1603         if os_obj.name not in all_os:
1604           # build a list of nodes for this os containing empty lists
1605           # for each node in node_list
1606           all_os[os_obj.name] = {}
1607           for nname in node_list:
1608             all_os[os_obj.name][nname] = []
1609         all_os[os_obj.name][node_name].append(os_obj)
1610     return all_os
1611
1612   def Exec(self, feedback_fn):
1613     """Compute the list of OSes.
1614
1615     """
1616     node_list = self.acquired_locks[locking.LEVEL_NODE]
1617     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()
1618                    if node in node_list]
1619     node_data = self.rpc.call_os_diagnose(valid_nodes)
1620     if node_data == False:
1621       raise errors.OpExecError("Can't gather the list of OSes")
1622     pol = self._DiagnoseByOS(valid_nodes, node_data)
1623     output = []
1624     for os_name, os_data in pol.iteritems():
1625       row = []
1626       for field in self.op.output_fields:
1627         if field == "name":
1628           val = os_name
1629         elif field == "valid":
1630           val = utils.all([osl and osl[0] for osl in os_data.values()])
1631         elif field == "node_status":
1632           val = {}
1633           for node_name, nos_list in os_data.iteritems():
1634             val[node_name] = [(v.status, v.path) for v in nos_list]
1635         else:
1636           raise errors.ParameterError(field)
1637         row.append(val)
1638       output.append(row)
1639
1640     return output
1641
1642
1643 class LURemoveNode(LogicalUnit):
1644   """Logical unit for removing a node.
1645
1646   """
1647   HPATH = "node-remove"
1648   HTYPE = constants.HTYPE_NODE
1649   _OP_REQP = ["node_name"]
1650
1651   def BuildHooksEnv(self):
1652     """Build hooks env.
1653
1654     This doesn't run on the target node in the pre phase as a failed
1655     node would then be impossible to remove.
1656
1657     """
1658     env = {
1659       "OP_TARGET": self.op.node_name,
1660       "NODE_NAME": self.op.node_name,
1661       }
1662     all_nodes = self.cfg.GetNodeList()
1663     all_nodes.remove(self.op.node_name)
1664     return env, all_nodes, all_nodes
1665
1666   def CheckPrereq(self):
1667     """Check prerequisites.
1668
1669     This checks:
1670      - the node exists in the configuration
1671      - it does not have primary or secondary instances
1672      - it's not the master
1673
1674     Any errors are signalled by raising errors.OpPrereqError.
1675
1676     """
1677     node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1678     if node is None:
1679       raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1680
1681     instance_list = self.cfg.GetInstanceList()
1682
1683     masternode = self.cfg.GetMasterNode()
1684     if node.name == masternode:
1685       raise errors.OpPrereqError("Node is the master node,"
1686                                  " you need to failover first.")
1687
1688     for instance_name in instance_list:
1689       instance = self.cfg.GetInstanceInfo(instance_name)
1690       if node.name == instance.primary_node:
1691         raise errors.OpPrereqError("Instance %s still running on the node,"
1692                                    " please remove first." % instance_name)
1693       if node.name in instance.secondary_nodes:
1694         raise errors.OpPrereqError("Instance %s has node as a secondary,"
1695                                    " please remove first." % instance_name)
1696     self.op.node_name = node.name
1697     self.node = node
1698
1699   def Exec(self, feedback_fn):
1700     """Removes the node from the cluster.
1701
1702     """
1703     node = self.node
1704     logging.info("Stopping the node daemon and removing configs from node %s",
1705                  node.name)
1706
1707     self.context.RemoveNode(node.name)
1708
1709     self.rpc.call_node_leave_cluster(node.name)
1710
1711     # Promote nodes to master candidate as needed
1712     _AdjustCandidatePool(self)
1713
1714
1715 class LUQueryNodes(NoHooksLU):
1716   """Logical unit for querying nodes.
1717
1718   """
1719   _OP_REQP = ["output_fields", "names"]
1720   REQ_BGL = False
1721   _FIELDS_DYNAMIC = utils.FieldSet(
1722     "dtotal", "dfree",
1723     "mtotal", "mnode", "mfree",
1724     "bootid",
1725     "ctotal",
1726     )
1727
1728   _FIELDS_STATIC = utils.FieldSet(
1729     "name", "pinst_cnt", "sinst_cnt",
1730     "pinst_list", "sinst_list",
1731     "pip", "sip", "tags",
1732     "serial_no",
1733     "master_candidate",
1734     "master",
1735     "offline",
1736     )
1737
1738   def ExpandNames(self):
1739     _CheckOutputFields(static=self._FIELDS_STATIC,
1740                        dynamic=self._FIELDS_DYNAMIC,
1741                        selected=self.op.output_fields)
1742
1743     self.needed_locks = {}
1744     self.share_locks[locking.LEVEL_NODE] = 1
1745
1746     if self.op.names:
1747       self.wanted = _GetWantedNodes(self, self.op.names)
1748     else:
1749       self.wanted = locking.ALL_SET
1750
1751     self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
1752     if self.do_locking:
1753       # if we don't request only static fields, we need to lock the nodes
1754       self.needed_locks[locking.LEVEL_NODE] = self.wanted
1755
1756
1757   def CheckPrereq(self):
1758     """Check prerequisites.
1759
1760     """
1761     # The validation of the node list is done in the _GetWantedNodes,
1762     # if non empty, and if empty, there's no validation to do
1763     pass
1764
1765   def Exec(self, feedback_fn):
1766     """Computes the list of nodes and their attributes.
1767
1768     """
1769     all_info = self.cfg.GetAllNodesInfo()
1770     if self.do_locking:
1771       nodenames = self.acquired_locks[locking.LEVEL_NODE]
1772     elif self.wanted != locking.ALL_SET:
1773       nodenames = self.wanted
1774       missing = set(nodenames).difference(all_info.keys())
1775       if missing:
1776         raise errors.OpExecError(
1777           "Some nodes were removed before retrieving their data: %s" % missing)
1778     else:
1779       nodenames = all_info.keys()
1780
1781     nodenames = utils.NiceSort(nodenames)
1782     nodelist = [all_info[name] for name in nodenames]
1783
1784     # begin data gathering
1785
1786     if self.do_locking:
1787       live_data = {}
1788       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
1789                                           self.cfg.GetHypervisorType())
1790       for name in nodenames:
1791         nodeinfo = node_data[name]
1792         if not nodeinfo.failed and nodeinfo.data:
1793           nodeinfo = nodeinfo.data
1794           fn = utils.TryConvert
1795           live_data[name] = {
1796             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
1797             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
1798             "mfree": fn(int, nodeinfo.get('memory_free', None)),
1799             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
1800             "dfree": fn(int, nodeinfo.get('vg_free', None)),
1801             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
1802             "bootid": nodeinfo.get('bootid', None),
1803             }
1804         else:
1805           live_data[name] = {}
1806     else:
1807       live_data = dict.fromkeys(nodenames, {})
1808
1809     node_to_primary = dict([(name, set()) for name in nodenames])
1810     node_to_secondary = dict([(name, set()) for name in nodenames])
1811
1812     inst_fields = frozenset(("pinst_cnt", "pinst_list",
1813                              "sinst_cnt", "sinst_list"))
1814     if inst_fields & frozenset(self.op.output_fields):
1815       instancelist = self.cfg.GetInstanceList()
1816
1817       for instance_name in instancelist:
1818         inst = self.cfg.GetInstanceInfo(instance_name)
1819         if inst.primary_node in node_to_primary:
1820           node_to_primary[inst.primary_node].add(inst.name)
1821         for secnode in inst.secondary_nodes:
1822           if secnode in node_to_secondary:
1823             node_to_secondary[secnode].add(inst.name)
1824
1825     master_node = self.cfg.GetMasterNode()
1826
1827     # end data gathering
1828
1829     output = []
1830     for node in nodelist:
1831       node_output = []
1832       for field in self.op.output_fields:
1833         if field == "name":
1834           val = node.name
1835         elif field == "pinst_list":
1836           val = list(node_to_primary[node.name])
1837         elif field == "sinst_list":
1838           val = list(node_to_secondary[node.name])
1839         elif field == "pinst_cnt":
1840           val = len(node_to_primary[node.name])
1841         elif field == "sinst_cnt":
1842           val = len(node_to_secondary[node.name])
1843         elif field == "pip":
1844           val = node.primary_ip
1845         elif field == "sip":
1846           val = node.secondary_ip
1847         elif field == "tags":
1848           val = list(node.GetTags())
1849         elif field == "serial_no":
1850           val = node.serial_no
1851         elif field == "master_candidate":
1852           val = node.master_candidate
1853         elif field == "master":
1854           val = node.name == master_node
1855         elif field == "offline":
1856           val = node.offline
1857         elif self._FIELDS_DYNAMIC.Matches(field):
1858           val = live_data[node.name].get(field, None)
1859         else:
1860           raise errors.ParameterError(field)
1861         node_output.append(val)
1862       output.append(node_output)
1863
1864     return output
1865
1866
1867 class LUQueryNodeVolumes(NoHooksLU):
1868   """Logical unit for getting volumes on node(s).
1869
1870   """
1871   _OP_REQP = ["nodes", "output_fields"]
1872   REQ_BGL = False
1873   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
1874   _FIELDS_STATIC = utils.FieldSet("node")
1875
1876   def ExpandNames(self):
1877     _CheckOutputFields(static=self._FIELDS_STATIC,
1878                        dynamic=self._FIELDS_DYNAMIC,
1879                        selected=self.op.output_fields)
1880
1881     self.needed_locks = {}
1882     self.share_locks[locking.LEVEL_NODE] = 1
1883     if not self.op.nodes:
1884       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1885     else:
1886       self.needed_locks[locking.LEVEL_NODE] = \
1887         _GetWantedNodes(self, self.op.nodes)
1888
1889   def CheckPrereq(self):
1890     """Check prerequisites.
1891
1892     This checks that the fields required are valid output fields.
1893
1894     """
1895     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
1896
1897   def Exec(self, feedback_fn):
1898     """Computes the list of nodes and their attributes.
1899
1900     """
1901     nodenames = self.nodes
1902     volumes = self.rpc.call_node_volumes(nodenames)
1903
1904     ilist = [self.cfg.GetInstanceInfo(iname) for iname
1905              in self.cfg.GetInstanceList()]
1906
1907     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
1908
1909     output = []
1910     for node in nodenames:
1911       if node not in volumes or volumes[node].failed or not volumes[node].data:
1912         continue
1913
1914       node_vols = volumes[node].data[:]
1915       node_vols.sort(key=lambda vol: vol['dev'])
1916
1917       for vol in node_vols:
1918         node_output = []
1919         for field in self.op.output_fields:
1920           if field == "node":
1921             val = node
1922           elif field == "phys":
1923             val = vol['dev']
1924           elif field == "vg":
1925             val = vol['vg']
1926           elif field == "name":
1927             val = vol['name']
1928           elif field == "size":
1929             val = int(float(vol['size']))
1930           elif field == "instance":
1931             for inst in ilist:
1932               if node not in lv_by_node[inst]:
1933                 continue
1934               if vol['name'] in lv_by_node[inst][node]:
1935                 val = inst.name
1936                 break
1937             else:
1938               val = '-'
1939           else:
1940             raise errors.ParameterError(field)
1941           node_output.append(str(val))
1942
1943         output.append(node_output)
1944
1945     return output
1946
1947
1948 class LUAddNode(LogicalUnit):
1949   """Logical unit for adding node to the cluster.
1950
1951   """
1952   HPATH = "node-add"
1953   HTYPE = constants.HTYPE_NODE
1954   _OP_REQP = ["node_name"]
1955
1956   def BuildHooksEnv(self):
1957     """Build hooks env.
1958
1959     This will run on all nodes before, and on all nodes + the new node after.
1960
1961     """
1962     env = {
1963       "OP_TARGET": self.op.node_name,
1964       "NODE_NAME": self.op.node_name,
1965       "NODE_PIP": self.op.primary_ip,
1966       "NODE_SIP": self.op.secondary_ip,
1967       }
1968     nodes_0 = self.cfg.GetNodeList()
1969     nodes_1 = nodes_0 + [self.op.node_name, ]
1970     return env, nodes_0, nodes_1
1971
1972   def CheckPrereq(self):
1973     """Check prerequisites.
1974
1975     This checks:
1976      - the new node is not already in the config
1977      - it is resolvable
1978      - its parameters (single/dual homed) matches the cluster
1979
1980     Any errors are signalled by raising errors.OpPrereqError.
1981
1982     """
1983     node_name = self.op.node_name
1984     cfg = self.cfg
1985
1986     dns_data = utils.HostInfo(node_name)
1987
1988     node = dns_data.name
1989     primary_ip = self.op.primary_ip = dns_data.ip
1990     secondary_ip = getattr(self.op, "secondary_ip", None)
1991     if secondary_ip is None:
1992       secondary_ip = primary_ip
1993     if not utils.IsValidIP(secondary_ip):
1994       raise errors.OpPrereqError("Invalid secondary IP given")
1995     self.op.secondary_ip = secondary_ip
1996
1997     node_list = cfg.GetNodeList()
1998     if not self.op.readd and node in node_list:
1999       raise errors.OpPrereqError("Node %s is already in the configuration" %
2000                                  node)
2001     elif self.op.readd and node not in node_list:
2002       raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2003
2004     for existing_node_name in node_list:
2005       existing_node = cfg.GetNodeInfo(existing_node_name)
2006
2007       if self.op.readd and node == existing_node_name:
2008         if (existing_node.primary_ip != primary_ip or
2009             existing_node.secondary_ip != secondary_ip):
2010           raise errors.OpPrereqError("Readded node doesn't have the same IP"
2011                                      " address configuration as before")
2012         continue
2013
2014       if (existing_node.primary_ip == primary_ip or
2015           existing_node.secondary_ip == primary_ip or
2016           existing_node.primary_ip == secondary_ip or
2017           existing_node.secondary_ip == secondary_ip):
2018         raise errors.OpPrereqError("New node ip address(es) conflict with"
2019                                    " existing node %s" % existing_node.name)
2020
2021     # check that the type of the node (single versus dual homed) is the
2022     # same as for the master
2023     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2024     master_singlehomed = myself.secondary_ip == myself.primary_ip
2025     newbie_singlehomed = secondary_ip == primary_ip
2026     if master_singlehomed != newbie_singlehomed:
2027       if master_singlehomed:
2028         raise errors.OpPrereqError("The master has no private ip but the"
2029                                    " new node has one")
2030       else:
2031         raise errors.OpPrereqError("The master has a private ip but the"
2032                                    " new node doesn't have one")
2033
2034     # checks reachablity
2035     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2036       raise errors.OpPrereqError("Node not reachable by ping")
2037
2038     if not newbie_singlehomed:
2039       # check reachability from my secondary ip to newbie's secondary ip
2040       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2041                            source=myself.secondary_ip):
2042         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2043                                    " based ping to noded port")
2044
2045     cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2046     mc_now, _ = self.cfg.GetMasterCandidateStats()
2047     master_candidate = mc_now < cp_size
2048
2049     self.new_node = objects.Node(name=node,
2050                                  primary_ip=primary_ip,
2051                                  secondary_ip=secondary_ip,
2052                                  master_candidate=master_candidate,
2053                                  offline=False)
2054
2055   def Exec(self, feedback_fn):
2056     """Adds the new node to the cluster.
2057
2058     """
2059     new_node = self.new_node
2060     node = new_node.name
2061
2062     # check connectivity
2063     result = self.rpc.call_version([node])[node]
2064     result.Raise()
2065     if result.data:
2066       if constants.PROTOCOL_VERSION == result.data:
2067         logging.info("Communication to node %s fine, sw version %s match",
2068                      node, result.data)
2069       else:
2070         raise errors.OpExecError("Version mismatch master version %s,"
2071                                  " node version %s" %
2072                                  (constants.PROTOCOL_VERSION, result.data))
2073     else:
2074       raise errors.OpExecError("Cannot get version from the new node")
2075
2076     # setup ssh on node
2077     logging.info("Copy ssh key to node %s", node)
2078     priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2079     keyarray = []
2080     keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2081                 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2082                 priv_key, pub_key]
2083
2084     for i in keyfiles:
2085       f = open(i, 'r')
2086       try:
2087         keyarray.append(f.read())
2088       finally:
2089         f.close()
2090
2091     result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2092                                     keyarray[2],
2093                                     keyarray[3], keyarray[4], keyarray[5])
2094
2095     if result.failed or not result.data:
2096       raise errors.OpExecError("Cannot transfer ssh keys to the new node")
2097
2098     # Add node to our /etc/hosts, and add key to known_hosts
2099     utils.AddHostToEtcHosts(new_node.name)
2100
2101     if new_node.secondary_ip != new_node.primary_ip:
2102       result = self.rpc.call_node_has_ip_address(new_node.name,
2103                                                  new_node.secondary_ip)
2104       if result.failed or not result.data:
2105         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2106                                  " you gave (%s). Please fix and re-run this"
2107                                  " command." % new_node.secondary_ip)
2108
2109     node_verify_list = [self.cfg.GetMasterNode()]
2110     node_verify_param = {
2111       'nodelist': [node],
2112       # TODO: do a node-net-test as well?
2113     }
2114
2115     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2116                                        self.cfg.GetClusterName())
2117     for verifier in node_verify_list:
2118       if result[verifier].failed or not result[verifier].data:
2119         raise errors.OpExecError("Cannot communicate with %s's node daemon"
2120                                  " for remote verification" % verifier)
2121       if result[verifier].data['nodelist']:
2122         for failed in result[verifier].data['nodelist']:
2123           feedback_fn("ssh/hostname verification failed %s -> %s" %
2124                       (verifier, result[verifier]['nodelist'][failed]))
2125         raise errors.OpExecError("ssh/hostname verification failed.")
2126
2127     # Distribute updated /etc/hosts and known_hosts to all nodes,
2128     # including the node just added
2129     myself = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
2130     dist_nodes = self.cfg.GetNodeList()
2131     if not self.op.readd:
2132       dist_nodes.append(node)
2133     if myself.name in dist_nodes:
2134       dist_nodes.remove(myself.name)
2135
2136     logging.debug("Copying hosts and known_hosts to all nodes")
2137     for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
2138       result = self.rpc.call_upload_file(dist_nodes, fname)
2139       for to_node, to_result in result.iteritems():
2140         if to_result.failed or not to_result.data:
2141           logging.error("Copy of file %s to node %s failed", fname, to_node)
2142
2143     to_copy = []
2144     if constants.HT_XEN_HVM in self.cfg.GetClusterInfo().enabled_hypervisors:
2145       to_copy.append(constants.VNC_PASSWORD_FILE)
2146     for fname in to_copy:
2147       result = self.rpc.call_upload_file([node], fname)
2148       if result[node].failed or not result[node]:
2149         logging.error("Could not copy file %s to node %s", fname, node)
2150
2151     if self.op.readd:
2152       self.context.ReaddNode(new_node)
2153     else:
2154       self.context.AddNode(new_node)
2155
2156
2157 class LUSetNodeParams(LogicalUnit):
2158   """Modifies the parameters of a node.
2159
2160   """
2161   HPATH = "node-modify"
2162   HTYPE = constants.HTYPE_NODE
2163   _OP_REQP = ["node_name"]
2164   REQ_BGL = False
2165
2166   def CheckArguments(self):
2167     node_name = self.cfg.ExpandNodeName(self.op.node_name)
2168     if node_name is None:
2169       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2170     self.op.node_name = node_name
2171     _CheckBooleanOpField(self.op, 'master_candidate')
2172     _CheckBooleanOpField(self.op, 'offline')
2173     if self.op.master_candidate is None and self.op.offline is None:
2174       raise errors.OpPrereqError("Please pass at least one modification")
2175     if self.op.offline == True and self.op.master_candidate == True:
2176       raise errors.OpPrereqError("Can't set the node into offline and"
2177                                  " master_candidate at the same time")
2178
2179   def ExpandNames(self):
2180     self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2181
2182   def BuildHooksEnv(self):
2183     """Build hooks env.
2184
2185     This runs on the master node.
2186
2187     """
2188     env = {
2189       "OP_TARGET": self.op.node_name,
2190       "MASTER_CANDIDATE": str(self.op.master_candidate),
2191       "OFFLINE": str(self.op.offline),
2192       }
2193     nl = [self.cfg.GetMasterNode(),
2194           self.op.node_name]
2195     return env, nl, nl
2196
2197   def CheckPrereq(self):
2198     """Check prerequisites.
2199
2200     This only checks the instance list against the existing names.
2201
2202     """
2203     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2204
2205     if ((self.op.master_candidate == False or self.op.offline == True)
2206         and node.master_candidate):
2207       # we will demote the node from master_candidate
2208       if self.op.node_name == self.cfg.GetMasterNode():
2209         raise errors.OpPrereqError("The master node has to be a"
2210                                    " master candidate and online")
2211       cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2212       num_candidates, _ = self.cfg.GetMasterCandidateStats()
2213       if num_candidates <= cp_size:
2214         msg = ("Not enough master candidates (desired"
2215                " %d, new value will be %d)" % (cp_size, num_candidates-1))
2216         if self.op.force:
2217           self.LogWarning(msg)
2218         else:
2219           raise errors.OpPrereqError(msg)
2220
2221     if (self.op.master_candidate == True and node.offline and
2222         not self.op.offline == False):
2223       raise errors.OpPrereqError("Can't set an offline node to"
2224                                  " master_candidate")
2225
2226     return
2227
2228   def Exec(self, feedback_fn):
2229     """Modifies a node.
2230
2231     """
2232     node = self.node
2233
2234     result = []
2235
2236     if self.op.offline is not None:
2237       node.offline = self.op.offline
2238       result.append(("offline", str(self.op.offline)))
2239       if self.op.offline == True and node.master_candidate:
2240         node.master_candidate = False
2241         result.append(("master_candidate", "auto-demotion due to offline"))
2242
2243     if self.op.master_candidate is not None:
2244       node.master_candidate = self.op.master_candidate
2245       result.append(("master_candidate", str(self.op.master_candidate)))
2246       if self.op.master_candidate == False:
2247         rrc = self.rpc.call_node_demote_from_mc(node.name)
2248         if (rrc.failed or not isinstance(rrc.data, (tuple, list))
2249             or len(rrc.data) != 2):
2250           self.LogWarning("Node rpc error: %s" % rrc.error)
2251         elif not rrc.data[0]:
2252           self.LogWarning("Node failed to demote itself: %s" % rrc.data[1])
2253
2254     # this will trigger configuration file update, if needed
2255     self.cfg.Update(node)
2256     # this will trigger job queue propagation or cleanup
2257     if self.op.node_name != self.cfg.GetMasterNode():
2258       self.context.ReaddNode(node)
2259
2260     return result
2261
2262
2263 class LUQueryClusterInfo(NoHooksLU):
2264   """Query cluster configuration.
2265
2266   """
2267   _OP_REQP = []
2268   REQ_BGL = False
2269
2270   def ExpandNames(self):
2271     self.needed_locks = {}
2272
2273   def CheckPrereq(self):
2274     """No prerequsites needed for this LU.
2275
2276     """
2277     pass
2278
2279   def Exec(self, feedback_fn):
2280     """Return cluster config.
2281
2282     """
2283     cluster = self.cfg.GetClusterInfo()
2284     result = {
2285       "software_version": constants.RELEASE_VERSION,
2286       "protocol_version": constants.PROTOCOL_VERSION,
2287       "config_version": constants.CONFIG_VERSION,
2288       "os_api_version": constants.OS_API_VERSION,
2289       "export_version": constants.EXPORT_VERSION,
2290       "architecture": (platform.architecture()[0], platform.machine()),
2291       "name": cluster.cluster_name,
2292       "master": cluster.master_node,
2293       "default_hypervisor": cluster.default_hypervisor,
2294       "enabled_hypervisors": cluster.enabled_hypervisors,
2295       "hvparams": cluster.hvparams,
2296       "beparams": cluster.beparams,
2297       "candidate_pool_size": cluster.candidate_pool_size,
2298       }
2299
2300     return result
2301
2302
2303 class LUQueryConfigValues(NoHooksLU):
2304   """Return configuration values.
2305
2306   """
2307   _OP_REQP = []
2308   REQ_BGL = False
2309   _FIELDS_DYNAMIC = utils.FieldSet()
2310   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2311
2312   def ExpandNames(self):
2313     self.needed_locks = {}
2314
2315     _CheckOutputFields(static=self._FIELDS_STATIC,
2316                        dynamic=self._FIELDS_DYNAMIC,
2317                        selected=self.op.output_fields)
2318
2319   def CheckPrereq(self):
2320     """No prerequisites.
2321
2322     """
2323     pass
2324
2325   def Exec(self, feedback_fn):
2326     """Dump a representation of the cluster config to the standard output.
2327
2328     """
2329     values = []
2330     for field in self.op.output_fields:
2331       if field == "cluster_name":
2332         entry = self.cfg.GetClusterName()
2333       elif field == "master_node":
2334         entry = self.cfg.GetMasterNode()
2335       elif field == "drain_flag":
2336         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2337       else:
2338         raise errors.ParameterError(field)
2339       values.append(entry)
2340     return values
2341
2342
2343 class LUActivateInstanceDisks(NoHooksLU):
2344   """Bring up an instance's disks.
2345
2346   """
2347   _OP_REQP = ["instance_name"]
2348   REQ_BGL = False
2349
2350   def ExpandNames(self):
2351     self._ExpandAndLockInstance()
2352     self.needed_locks[locking.LEVEL_NODE] = []
2353     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2354
2355   def DeclareLocks(self, level):
2356     if level == locking.LEVEL_NODE:
2357       self._LockInstancesNodes()
2358
2359   def CheckPrereq(self):
2360     """Check prerequisites.
2361
2362     This checks that the instance is in the cluster.
2363
2364     """
2365     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2366     assert self.instance is not None, \
2367       "Cannot retrieve locked instance %s" % self.op.instance_name
2368     _CheckNodeOnline(self, self.instance.primary_node)
2369
2370   def Exec(self, feedback_fn):
2371     """Activate the disks.
2372
2373     """
2374     disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2375     if not disks_ok:
2376       raise errors.OpExecError("Cannot activate block devices")
2377
2378     return disks_info
2379
2380
2381 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2382   """Prepare the block devices for an instance.
2383
2384   This sets up the block devices on all nodes.
2385
2386   @type lu: L{LogicalUnit}
2387   @param lu: the logical unit on whose behalf we execute
2388   @type instance: L{objects.Instance}
2389   @param instance: the instance for whose disks we assemble
2390   @type ignore_secondaries: boolean
2391   @param ignore_secondaries: if true, errors on secondary nodes
2392       won't result in an error return from the function
2393   @return: False if the operation failed, otherwise a list of
2394       (host, instance_visible_name, node_visible_name)
2395       with the mapping from node devices to instance devices
2396
2397   """
2398   device_info = []
2399   disks_ok = True
2400   iname = instance.name
2401   # With the two passes mechanism we try to reduce the window of
2402   # opportunity for the race condition of switching DRBD to primary
2403   # before handshaking occured, but we do not eliminate it
2404
2405   # The proper fix would be to wait (with some limits) until the
2406   # connection has been made and drbd transitions from WFConnection
2407   # into any other network-connected state (Connected, SyncTarget,
2408   # SyncSource, etc.)
2409
2410   # 1st pass, assemble on all nodes in secondary mode
2411   for inst_disk in instance.disks:
2412     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2413       lu.cfg.SetDiskID(node_disk, node)
2414       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2415       if result.failed or not result:
2416         lu.proc.LogWarning("Could not prepare block device %s on node %s"
2417                            " (is_primary=False, pass=1)",
2418                            inst_disk.iv_name, node)
2419         if not ignore_secondaries:
2420           disks_ok = False
2421
2422   # FIXME: race condition on drbd migration to primary
2423
2424   # 2nd pass, do only the primary node
2425   for inst_disk in instance.disks:
2426     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2427       if node != instance.primary_node:
2428         continue
2429       lu.cfg.SetDiskID(node_disk, node)
2430       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2431       if result.failed or not result:
2432         lu.proc.LogWarning("Could not prepare block device %s on node %s"
2433                            " (is_primary=True, pass=2)",
2434                            inst_disk.iv_name, node)
2435         disks_ok = False
2436     device_info.append((instance.primary_node, inst_disk.iv_name, result.data))
2437
2438   # leave the disks configured for the primary node
2439   # this is a workaround that would be fixed better by
2440   # improving the logical/physical id handling
2441   for disk in instance.disks:
2442     lu.cfg.SetDiskID(disk, instance.primary_node)
2443
2444   return disks_ok, device_info
2445
2446
2447 def _StartInstanceDisks(lu, instance, force):
2448   """Start the disks of an instance.
2449
2450   """
2451   disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
2452                                            ignore_secondaries=force)
2453   if not disks_ok:
2454     _ShutdownInstanceDisks(lu, instance)
2455     if force is not None and not force:
2456       lu.proc.LogWarning("", hint="If the message above refers to a"
2457                          " secondary node,"
2458                          " you can retry the operation using '--force'.")
2459     raise errors.OpExecError("Disk consistency error")
2460
2461
2462 class LUDeactivateInstanceDisks(NoHooksLU):
2463   """Shutdown an instance's disks.
2464
2465   """
2466   _OP_REQP = ["instance_name"]
2467   REQ_BGL = False
2468
2469   def ExpandNames(self):
2470     self._ExpandAndLockInstance()
2471     self.needed_locks[locking.LEVEL_NODE] = []
2472     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2473
2474   def DeclareLocks(self, level):
2475     if level == locking.LEVEL_NODE:
2476       self._LockInstancesNodes()
2477
2478   def CheckPrereq(self):
2479     """Check prerequisites.
2480
2481     This checks that the instance is in the cluster.
2482
2483     """
2484     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2485     assert self.instance is not None, \
2486       "Cannot retrieve locked instance %s" % self.op.instance_name
2487
2488   def Exec(self, feedback_fn):
2489     """Deactivate the disks
2490
2491     """
2492     instance = self.instance
2493     _SafeShutdownInstanceDisks(self, instance)
2494
2495
2496 def _SafeShutdownInstanceDisks(lu, instance):
2497   """Shutdown block devices of an instance.
2498
2499   This function checks if an instance is running, before calling
2500   _ShutdownInstanceDisks.
2501
2502   """
2503   ins_l = lu.rpc.call_instance_list([instance.primary_node],
2504                                       [instance.hypervisor])
2505   ins_l = ins_l[instance.primary_node]
2506   if ins_l.failed or not isinstance(ins_l.data, list):
2507     raise errors.OpExecError("Can't contact node '%s'" %
2508                              instance.primary_node)
2509
2510   if instance.name in ins_l.data:
2511     raise errors.OpExecError("Instance is running, can't shutdown"
2512                              " block devices.")
2513
2514   _ShutdownInstanceDisks(lu, instance)
2515
2516
2517 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2518   """Shutdown block devices of an instance.
2519
2520   This does the shutdown on all nodes of the instance.
2521
2522   If the ignore_primary is false, errors on the primary node are
2523   ignored.
2524
2525   """
2526   result = True
2527   for disk in instance.disks:
2528     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2529       lu.cfg.SetDiskID(top_disk, node)
2530       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2531       if result.failed or not result.data:
2532         logging.error("Could not shutdown block device %s on node %s",
2533                       disk.iv_name, node)
2534         if not ignore_primary or node != instance.primary_node:
2535           result = False
2536   return result
2537
2538
2539 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2540   """Checks if a node has enough free memory.
2541
2542   This function check if a given node has the needed amount of free
2543   memory. In case the node has less memory or we cannot get the
2544   information from the node, this function raise an OpPrereqError
2545   exception.
2546
2547   @type lu: C{LogicalUnit}
2548   @param lu: a logical unit from which we get configuration data
2549   @type node: C{str}
2550   @param node: the node to check
2551   @type reason: C{str}
2552   @param reason: string to use in the error message
2553   @type requested: C{int}
2554   @param requested: the amount of memory in MiB to check for
2555   @type hypervisor_name: C{str}
2556   @param hypervisor_name: the hypervisor to ask for memory stats
2557   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2558       we cannot check the node
2559
2560   """
2561   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2562   nodeinfo[node].Raise()
2563   free_mem = nodeinfo[node].data.get('memory_free')
2564   if not isinstance(free_mem, int):
2565     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2566                              " was '%s'" % (node, free_mem))
2567   if requested > free_mem:
2568     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2569                              " needed %s MiB, available %s MiB" %
2570                              (node, reason, requested, free_mem))
2571
2572
2573 class LUStartupInstance(LogicalUnit):
2574   """Starts an instance.
2575
2576   """
2577   HPATH = "instance-start"
2578   HTYPE = constants.HTYPE_INSTANCE
2579   _OP_REQP = ["instance_name", "force"]
2580   REQ_BGL = False
2581
2582   def ExpandNames(self):
2583     self._ExpandAndLockInstance()
2584
2585   def BuildHooksEnv(self):
2586     """Build hooks env.
2587
2588     This runs on master, primary and secondary nodes of the instance.
2589
2590     """
2591     env = {
2592       "FORCE": self.op.force,
2593       }
2594     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2595     nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
2596           list(self.instance.secondary_nodes))
2597     return env, nl, nl
2598
2599   def CheckPrereq(self):
2600     """Check prerequisites.
2601
2602     This checks that the instance is in the cluster.
2603
2604     """
2605     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2606     assert self.instance is not None, \
2607       "Cannot retrieve locked instance %s" % self.op.instance_name
2608
2609     _CheckNodeOnline(self, instance.primary_node)
2610
2611     bep = self.cfg.GetClusterInfo().FillBE(instance)
2612     # check bridges existance
2613     _CheckInstanceBridgesExist(self, instance)
2614
2615     _CheckNodeFreeMemory(self, instance.primary_node,
2616                          "starting instance %s" % instance.name,
2617                          bep[constants.BE_MEMORY], instance.hypervisor)
2618
2619   def Exec(self, feedback_fn):
2620     """Start the instance.
2621
2622     """
2623     instance = self.instance
2624     force = self.op.force
2625     extra_args = getattr(self.op, "extra_args", "")
2626
2627     self.cfg.MarkInstanceUp(instance.name)
2628
2629     node_current = instance.primary_node
2630
2631     _StartInstanceDisks(self, instance, force)
2632
2633     result = self.rpc.call_instance_start(node_current, instance, extra_args)
2634     if result.failed or not result.data:
2635       _ShutdownInstanceDisks(self, instance)
2636       raise errors.OpExecError("Could not start instance")
2637
2638
2639 class LURebootInstance(LogicalUnit):
2640   """Reboot an instance.
2641
2642   """
2643   HPATH = "instance-reboot"
2644   HTYPE = constants.HTYPE_INSTANCE
2645   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2646   REQ_BGL = False
2647
2648   def ExpandNames(self):
2649     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2650                                    constants.INSTANCE_REBOOT_HARD,
2651                                    constants.INSTANCE_REBOOT_FULL]:
2652       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2653                                   (constants.INSTANCE_REBOOT_SOFT,
2654                                    constants.INSTANCE_REBOOT_HARD,
2655                                    constants.INSTANCE_REBOOT_FULL))
2656     self._ExpandAndLockInstance()
2657
2658   def BuildHooksEnv(self):
2659     """Build hooks env.
2660
2661     This runs on master, primary and secondary nodes of the instance.
2662
2663     """
2664     env = {
2665       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2666       }
2667     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2668     nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
2669           list(self.instance.secondary_nodes))
2670     return env, nl, nl
2671
2672   def CheckPrereq(self):
2673     """Check prerequisites.
2674
2675     This checks that the instance is in the cluster.
2676
2677     """
2678     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2679     assert self.instance is not None, \
2680       "Cannot retrieve locked instance %s" % self.op.instance_name
2681
2682     _CheckNodeOnline(self, instance.primary_node)
2683
2684     # check bridges existance
2685     _CheckInstanceBridgesExist(self, instance)
2686
2687   def Exec(self, feedback_fn):
2688     """Reboot the instance.
2689
2690     """
2691     instance = self.instance
2692     ignore_secondaries = self.op.ignore_secondaries
2693     reboot_type = self.op.reboot_type
2694     extra_args = getattr(self.op, "extra_args", "")
2695
2696     node_current = instance.primary_node
2697
2698     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
2699                        constants.INSTANCE_REBOOT_HARD]:
2700       result = self.rpc.call_instance_reboot(node_current, instance,
2701                                              reboot_type, extra_args)
2702       if result.failed or not result.data:
2703         raise errors.OpExecError("Could not reboot instance")
2704     else:
2705       if not self.rpc.call_instance_shutdown(node_current, instance):
2706         raise errors.OpExecError("could not shutdown instance for full reboot")
2707       _ShutdownInstanceDisks(self, instance)
2708       _StartInstanceDisks(self, instance, ignore_secondaries)
2709       result = self.rpc.call_instance_start(node_current, instance, extra_args)
2710       if result.failed or not result.data:
2711         _ShutdownInstanceDisks(self, instance)
2712         raise errors.OpExecError("Could not start instance for full reboot")
2713
2714     self.cfg.MarkInstanceUp(instance.name)
2715
2716
2717 class LUShutdownInstance(LogicalUnit):
2718   """Shutdown an instance.
2719
2720   """
2721   HPATH = "instance-stop"
2722   HTYPE = constants.HTYPE_INSTANCE
2723   _OP_REQP = ["instance_name"]
2724   REQ_BGL = False
2725
2726   def ExpandNames(self):
2727     self._ExpandAndLockInstance()
2728
2729   def BuildHooksEnv(self):
2730     """Build hooks env.
2731
2732     This runs on master, primary and secondary nodes of the instance.
2733
2734     """
2735     env = _BuildInstanceHookEnvByObject(self, self.instance)
2736     nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
2737           list(self.instance.secondary_nodes))
2738     return env, nl, nl
2739
2740   def CheckPrereq(self):
2741     """Check prerequisites.
2742
2743     This checks that the instance is in the cluster.
2744
2745     """
2746     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2747     assert self.instance is not None, \
2748       "Cannot retrieve locked instance %s" % self.op.instance_name
2749     _CheckNodeOnline(self, self.instance.primary_node)
2750
2751   def Exec(self, feedback_fn):
2752     """Shutdown the instance.
2753
2754     """
2755     instance = self.instance
2756     node_current = instance.primary_node
2757     self.cfg.MarkInstanceDown(instance.name)
2758     result = self.rpc.call_instance_shutdown(node_current, instance)
2759     if result.failed or not result.data:
2760       self.proc.LogWarning("Could not shutdown instance")
2761
2762     _ShutdownInstanceDisks(self, instance)
2763
2764
2765 class LUReinstallInstance(LogicalUnit):
2766   """Reinstall an instance.
2767
2768   """
2769   HPATH = "instance-reinstall"
2770   HTYPE = constants.HTYPE_INSTANCE
2771   _OP_REQP = ["instance_name"]
2772   REQ_BGL = False
2773
2774   def ExpandNames(self):
2775     self._ExpandAndLockInstance()
2776
2777   def BuildHooksEnv(self):
2778     """Build hooks env.
2779
2780     This runs on master, primary and secondary nodes of the instance.
2781
2782     """
2783     env = _BuildInstanceHookEnvByObject(self, self.instance)
2784     nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
2785           list(self.instance.secondary_nodes))
2786     return env, nl, nl
2787
2788   def CheckPrereq(self):
2789     """Check prerequisites.
2790
2791     This checks that the instance is in the cluster and is not running.
2792
2793     """
2794     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2795     assert instance is not None, \
2796       "Cannot retrieve locked instance %s" % self.op.instance_name
2797     _CheckNodeOnline(self, instance.primary_node)
2798
2799     if instance.disk_template == constants.DT_DISKLESS:
2800       raise errors.OpPrereqError("Instance '%s' has no disks" %
2801                                  self.op.instance_name)
2802     if instance.status != "down":
2803       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2804                                  self.op.instance_name)
2805     remote_info = self.rpc.call_instance_info(instance.primary_node,
2806                                               instance.name,
2807                                               instance.hypervisor)
2808     if remote_info.failed or remote_info.data:
2809       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2810                                  (self.op.instance_name,
2811                                   instance.primary_node))
2812
2813     self.op.os_type = getattr(self.op, "os_type", None)
2814     if self.op.os_type is not None:
2815       # OS verification
2816       pnode = self.cfg.GetNodeInfo(
2817         self.cfg.ExpandNodeName(instance.primary_node))
2818       if pnode is None:
2819         raise errors.OpPrereqError("Primary node '%s' is unknown" %
2820                                    self.op.pnode)
2821       result = self.rpc.call_os_get(pnode.name, self.op.os_type)
2822       result.Raise()
2823       if not isinstance(result.data, objects.OS):
2824         raise errors.OpPrereqError("OS '%s' not in supported OS list for"
2825                                    " primary node"  % self.op.os_type)
2826
2827     self.instance = instance
2828
2829   def Exec(self, feedback_fn):
2830     """Reinstall the instance.
2831
2832     """
2833     inst = self.instance
2834
2835     if self.op.os_type is not None:
2836       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
2837       inst.os = self.op.os_type
2838       self.cfg.Update(inst)
2839
2840     _StartInstanceDisks(self, inst, None)
2841     try:
2842       feedback_fn("Running the instance OS create scripts...")
2843       result = self.rpc.call_instance_os_add(inst.primary_node, inst)
2844       result.Raise()
2845       if not result.data:
2846         raise errors.OpExecError("Could not install OS for instance %s"
2847                                  " on node %s" %
2848                                  (inst.name, inst.primary_node))
2849     finally:
2850       _ShutdownInstanceDisks(self, inst)
2851
2852
2853 class LURenameInstance(LogicalUnit):
2854   """Rename an instance.
2855
2856   """
2857   HPATH = "instance-rename"
2858   HTYPE = constants.HTYPE_INSTANCE
2859   _OP_REQP = ["instance_name", "new_name"]
2860
2861   def BuildHooksEnv(self):
2862     """Build hooks env.
2863
2864     This runs on master, primary and secondary nodes of the instance.
2865
2866     """
2867     env = _BuildInstanceHookEnvByObject(self, self.instance)
2868     env["INSTANCE_NEW_NAME"] = self.op.new_name
2869     nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
2870           list(self.instance.secondary_nodes))
2871     return env, nl, nl
2872
2873   def CheckPrereq(self):
2874     """Check prerequisites.
2875
2876     This checks that the instance is in the cluster and is not running.
2877
2878     """
2879     instance = self.cfg.GetInstanceInfo(
2880       self.cfg.ExpandInstanceName(self.op.instance_name))
2881     if instance is None:
2882       raise errors.OpPrereqError("Instance '%s' not known" %
2883                                  self.op.instance_name)
2884     _CheckNodeOnline(self, instance.primary_node)
2885
2886     if instance.status != "down":
2887       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2888                                  self.op.instance_name)
2889     remote_info = self.rpc.call_instance_info(instance.primary_node,
2890                                               instance.name,
2891                                               instance.hypervisor)
2892     remote_info.Raise()
2893     if remote_info.data:
2894       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2895                                  (self.op.instance_name,
2896                                   instance.primary_node))
2897     self.instance = instance
2898
2899     # new name verification
2900     name_info = utils.HostInfo(self.op.new_name)
2901
2902     self.op.new_name = new_name = name_info.name
2903     instance_list = self.cfg.GetInstanceList()
2904     if new_name in instance_list:
2905       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
2906                                  new_name)
2907
2908     if not getattr(self.op, "ignore_ip", False):
2909       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
2910         raise errors.OpPrereqError("IP %s of instance %s already in use" %
2911                                    (name_info.ip, new_name))
2912
2913
2914   def Exec(self, feedback_fn):
2915     """Reinstall the instance.
2916
2917     """
2918     inst = self.instance
2919     old_name = inst.name
2920
2921     if inst.disk_template == constants.DT_FILE:
2922       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
2923
2924     self.cfg.RenameInstance(inst.name, self.op.new_name)
2925     # Change the instance lock. This is definitely safe while we hold the BGL
2926     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
2927     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
2928
2929     # re-read the instance from the configuration after rename
2930     inst = self.cfg.GetInstanceInfo(self.op.new_name)
2931
2932     if inst.disk_template == constants.DT_FILE:
2933       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
2934       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
2935                                                      old_file_storage_dir,
2936                                                      new_file_storage_dir)
2937       result.Raise()
2938       if not result.data:
2939         raise errors.OpExecError("Could not connect to node '%s' to rename"
2940                                  " directory '%s' to '%s' (but the instance"
2941                                  " has been renamed in Ganeti)" % (
2942                                  inst.primary_node, old_file_storage_dir,
2943                                  new_file_storage_dir))
2944
2945       if not result.data[0]:
2946         raise errors.OpExecError("Could not rename directory '%s' to '%s'"
2947                                  " (but the instance has been renamed in"
2948                                  " Ganeti)" % (old_file_storage_dir,
2949                                                new_file_storage_dir))
2950
2951     _StartInstanceDisks(self, inst, None)
2952     try:
2953       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
2954                                                  old_name)
2955       if result.failed or not result.data:
2956         msg = ("Could not run OS rename script for instance %s on node %s"
2957                " (but the instance has been renamed in Ganeti)" %
2958                (inst.name, inst.primary_node))
2959         self.proc.LogWarning(msg)
2960     finally:
2961       _ShutdownInstanceDisks(self, inst)
2962
2963
2964 class LURemoveInstance(LogicalUnit):
2965   """Remove an instance.
2966
2967   """
2968   HPATH = "instance-remove"
2969   HTYPE = constants.HTYPE_INSTANCE
2970   _OP_REQP = ["instance_name", "ignore_failures"]
2971   REQ_BGL = False
2972
2973   def ExpandNames(self):
2974     self._ExpandAndLockInstance()
2975     self.needed_locks[locking.LEVEL_NODE] = []
2976     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2977
2978   def DeclareLocks(self, level):
2979     if level == locking.LEVEL_NODE:
2980       self._LockInstancesNodes()
2981
2982   def BuildHooksEnv(self):
2983     """Build hooks env.
2984
2985     This runs on master, primary and secondary nodes of the instance.
2986
2987     """
2988     env = _BuildInstanceHookEnvByObject(self, self.instance)
2989     nl = [self.cfg.GetMasterNode()]
2990     return env, nl, nl
2991
2992   def CheckPrereq(self):
2993     """Check prerequisites.
2994
2995     This checks that the instance is in the cluster.
2996
2997     """
2998     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2999     assert self.instance is not None, \
3000       "Cannot retrieve locked instance %s" % self.op.instance_name
3001
3002   def Exec(self, feedback_fn):
3003     """Remove the instance.
3004
3005     """
3006     instance = self.instance
3007     logging.info("Shutting down instance %s on node %s",
3008                  instance.name, instance.primary_node)
3009
3010     result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3011     if result.failed or not result.data:
3012       if self.op.ignore_failures:
3013         feedback_fn("Warning: can't shutdown instance")
3014       else:
3015         raise errors.OpExecError("Could not shutdown instance %s on node %s" %
3016                                  (instance.name, instance.primary_node))
3017
3018     logging.info("Removing block devices for instance %s", instance.name)
3019
3020     if not _RemoveDisks(self, instance):
3021       if self.op.ignore_failures:
3022         feedback_fn("Warning: can't remove instance's disks")
3023       else:
3024         raise errors.OpExecError("Can't remove instance's disks")
3025
3026     logging.info("Removing instance %s out of cluster config", instance.name)
3027
3028     self.cfg.RemoveInstance(instance.name)
3029     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3030
3031
3032 class LUQueryInstances(NoHooksLU):
3033   """Logical unit for querying instances.
3034
3035   """
3036   _OP_REQP = ["output_fields", "names"]
3037   REQ_BGL = False
3038   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3039                                     "admin_state", "admin_ram",
3040                                     "disk_template", "ip", "mac", "bridge",
3041                                     "sda_size", "sdb_size", "vcpus", "tags",
3042                                     "network_port", "beparams",
3043                                     "(disk).(size)/([0-9]+)",
3044                                     "(disk).(sizes)",
3045                                     "(nic).(mac|ip|bridge)/([0-9]+)",
3046                                     "(nic).(macs|ips|bridges)",
3047                                     "(disk|nic).(count)",
3048                                     "serial_no", "hypervisor", "hvparams",] +
3049                                   ["hv/%s" % name
3050                                    for name in constants.HVS_PARAMETERS] +
3051                                   ["be/%s" % name
3052                                    for name in constants.BES_PARAMETERS])
3053   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3054
3055
3056   def ExpandNames(self):
3057     _CheckOutputFields(static=self._FIELDS_STATIC,
3058                        dynamic=self._FIELDS_DYNAMIC,
3059                        selected=self.op.output_fields)
3060
3061     self.needed_locks = {}
3062     self.share_locks[locking.LEVEL_INSTANCE] = 1
3063     self.share_locks[locking.LEVEL_NODE] = 1
3064
3065     if self.op.names:
3066       self.wanted = _GetWantedInstances(self, self.op.names)
3067     else:
3068       self.wanted = locking.ALL_SET
3069
3070     self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3071     if self.do_locking:
3072       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3073       self.needed_locks[locking.LEVEL_NODE] = []
3074       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3075
3076   def DeclareLocks(self, level):
3077     if level == locking.LEVEL_NODE and self.do_locking:
3078       self._LockInstancesNodes()
3079
3080   def CheckPrereq(self):
3081     """Check prerequisites.
3082
3083     """
3084     pass
3085
3086   def Exec(self, feedback_fn):
3087     """Computes the list of nodes and their attributes.
3088
3089     """
3090     all_info = self.cfg.GetAllInstancesInfo()
3091     if self.do_locking:
3092       instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3093     elif self.wanted != locking.ALL_SET:
3094       instance_names = self.wanted
3095       missing = set(instance_names).difference(all_info.keys())
3096       if missing:
3097         raise errors.OpExecError(
3098           "Some instances were removed before retrieving their data: %s"
3099           % missing)
3100     else:
3101       instance_names = all_info.keys()
3102
3103     instance_names = utils.NiceSort(instance_names)
3104     instance_list = [all_info[iname] for iname in instance_names]
3105
3106     # begin data gathering
3107
3108     nodes = frozenset([inst.primary_node for inst in instance_list])
3109     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3110
3111     bad_nodes = []
3112     off_nodes = []
3113     if self.do_locking:
3114       live_data = {}
3115       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3116       for name in nodes:
3117         result = node_data[name]
3118         if result.offline:
3119           # offline nodes will be in both lists
3120           off_nodes.append(name)
3121         if result.failed:
3122           bad_nodes.append(name)
3123         else:
3124           if result.data:
3125             live_data.update(result.data)
3126             # else no instance is alive
3127     else:
3128       live_data = dict([(name, {}) for name in instance_names])
3129
3130     # end data gathering
3131
3132     HVPREFIX = "hv/"
3133     BEPREFIX = "be/"
3134     output = []
3135     for instance in instance_list:
3136       iout = []
3137       i_hv = self.cfg.GetClusterInfo().FillHV(instance)
3138       i_be = self.cfg.GetClusterInfo().FillBE(instance)
3139       for field in self.op.output_fields:
3140         st_match = self._FIELDS_STATIC.Matches(field)
3141         if field == "name":
3142           val = instance.name
3143         elif field == "os":
3144           val = instance.os
3145         elif field == "pnode":
3146           val = instance.primary_node
3147         elif field == "snodes":
3148           val = list(instance.secondary_nodes)
3149         elif field == "admin_state":
3150           val = (instance.status != "down")
3151         elif field == "oper_state":
3152           if instance.primary_node in bad_nodes:
3153             val = None
3154           else:
3155             val = bool(live_data.get(instance.name))
3156         elif field == "status":
3157           if instance.primary_node in off_nodes:
3158             val = "ERROR_nodeoffline"
3159           elif instance.primary_node in bad_nodes:
3160             val = "ERROR_nodedown"
3161           else:
3162             running = bool(live_data.get(instance.name))
3163             if running:
3164               if instance.status != "down":
3165                 val = "running"
3166               else:
3167                 val = "ERROR_up"
3168             else:
3169               if instance.status != "down":
3170                 val = "ERROR_down"
3171               else:
3172                 val = "ADMIN_down"
3173         elif field == "oper_ram":
3174           if instance.primary_node in bad_nodes:
3175             val = None
3176           elif instance.name in live_data:
3177             val = live_data[instance.name].get("memory", "?")
3178           else:
3179             val = "-"
3180         elif field == "disk_template":
3181           val = instance.disk_template
3182         elif field == "ip":
3183           val = instance.nics[0].ip
3184         elif field == "bridge":
3185           val = instance.nics[0].bridge
3186         elif field == "mac":
3187           val = instance.nics[0].mac
3188         elif field == "sda_size" or field == "sdb_size":
3189           idx = ord(field[2]) - ord('a')
3190           try:
3191             val = instance.FindDisk(idx).size
3192           except errors.OpPrereqError:
3193             val = None
3194         elif field == "tags":
3195           val = list(instance.GetTags())
3196         elif field == "serial_no":
3197           val = instance.serial_no
3198         elif field == "network_port":
3199           val = instance.network_port
3200         elif field == "hypervisor":
3201           val = instance.hypervisor
3202         elif field == "hvparams":
3203           val = i_hv
3204         elif (field.startswith(HVPREFIX) and
3205               field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3206           val = i_hv.get(field[len(HVPREFIX):], None)
3207         elif field == "beparams":
3208           val = i_be
3209         elif (field.startswith(BEPREFIX) and
3210               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3211           val = i_be.get(field[len(BEPREFIX):], None)
3212         elif st_match and st_match.groups():
3213           # matches a variable list
3214           st_groups = st_match.groups()
3215           if st_groups and st_groups[0] == "disk":
3216             if st_groups[1] == "count":
3217               val = len(instance.disks)
3218             elif st_groups[1] == "sizes":
3219               val = [disk.size for disk in instance.disks]
3220             elif st_groups[1] == "size":
3221               try:
3222                 val = instance.FindDisk(st_groups[2]).size
3223               except errors.OpPrereqError:
3224                 val = None
3225             else:
3226               assert False, "Unhandled disk parameter"
3227           elif st_groups[0] == "nic":
3228             if st_groups[1] == "count":
3229               val = len(instance.nics)
3230             elif st_groups[1] == "macs":
3231               val = [nic.mac for nic in instance.nics]
3232             elif st_groups[1] == "ips":
3233               val = [nic.ip for nic in instance.nics]
3234             elif st_groups[1] == "bridges":
3235               val = [nic.bridge for nic in instance.nics]
3236             else:
3237               # index-based item
3238               nic_idx = int(st_groups[2])
3239               if nic_idx >= len(instance.nics):
3240                 val = None
3241               else:
3242                 if st_groups[1] == "mac":
3243                   val = instance.nics[nic_idx].mac
3244                 elif st_groups[1] == "ip":
3245                   val = instance.nics[nic_idx].ip
3246                 elif st_groups[1] == "bridge":
3247                   val = instance.nics[nic_idx].bridge
3248                 else:
3249                   assert False, "Unhandled NIC parameter"
3250           else:
3251             assert False, "Unhandled variable parameter"
3252         else:
3253           raise errors.ParameterError(field)
3254         iout.append(val)
3255       output.append(iout)
3256
3257     return output
3258
3259
3260 class LUFailoverInstance(LogicalUnit):
3261   """Failover an instance.
3262
3263   """
3264   HPATH = "instance-failover"
3265   HTYPE = constants.HTYPE_INSTANCE
3266   _OP_REQP = ["instance_name", "ignore_consistency"]
3267   REQ_BGL = False
3268
3269   def ExpandNames(self):
3270     self._ExpandAndLockInstance()
3271     self.needed_locks[locking.LEVEL_NODE] = []
3272     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3273
3274   def DeclareLocks(self, level):
3275     if level == locking.LEVEL_NODE:
3276       self._LockInstancesNodes()
3277
3278   def BuildHooksEnv(self):
3279     """Build hooks env.
3280
3281     This runs on master, primary and secondary nodes of the instance.
3282
3283     """
3284     env = {
3285       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3286       }
3287     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3288     nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3289     return env, nl, nl
3290
3291   def CheckPrereq(self):
3292     """Check prerequisites.
3293
3294     This checks that the instance is in the cluster.
3295
3296     """
3297     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3298     assert self.instance is not None, \
3299       "Cannot retrieve locked instance %s" % self.op.instance_name
3300
3301     bep = self.cfg.GetClusterInfo().FillBE(instance)
3302     if instance.disk_template not in constants.DTS_NET_MIRROR:
3303       raise errors.OpPrereqError("Instance's disk layout is not"
3304                                  " network mirrored, cannot failover.")
3305
3306     secondary_nodes = instance.secondary_nodes
3307     if not secondary_nodes:
3308       raise errors.ProgrammerError("no secondary node but using "
3309                                    "a mirrored disk template")
3310
3311     target_node = secondary_nodes[0]
3312     _CheckNodeOnline(self, target_node)
3313     # check memory requirements on the secondary node
3314     _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3315                          instance.name, bep[constants.BE_MEMORY],
3316                          instance.hypervisor)
3317
3318     # check bridge existance
3319     brlist = [nic.bridge for nic in instance.nics]
3320     result = self.rpc.call_bridges_exist(target_node, brlist)
3321     result.Raise()
3322     if not result.data:
3323       raise errors.OpPrereqError("One or more target bridges %s does not"
3324                                  " exist on destination node '%s'" %
3325                                  (brlist, target_node))
3326
3327   def Exec(self, feedback_fn):
3328     """Failover an instance.
3329
3330     The failover is done by shutting it down on its present node and
3331     starting it on the secondary.
3332
3333     """
3334     instance = self.instance
3335
3336     source_node = instance.primary_node
3337     target_node = instance.secondary_nodes[0]
3338
3339     feedback_fn("* checking disk consistency between source and target")
3340     for dev in instance.disks:
3341       # for drbd, these are drbd over lvm
3342       if not _CheckDiskConsistency(self, dev, target_node, False):
3343         if instance.status == "up" and not self.op.ignore_consistency:
3344           raise errors.OpExecError("Disk %s is degraded on target node,"
3345                                    " aborting failover." % dev.iv_name)
3346
3347     feedback_fn("* shutting down instance on source node")
3348     logging.info("Shutting down instance %s on node %s",
3349                  instance.name, source_node)
3350
3351     result = self.rpc.call_instance_shutdown(source_node, instance)
3352     if result.failed or not result.data:
3353       if self.op.ignore_consistency:
3354         self.proc.LogWarning("Could not shutdown instance %s on node %s."
3355                              " Proceeding"
3356                              " anyway. Please make sure node %s is down",
3357                              instance.name, source_node, source_node)
3358       else:
3359         raise errors.OpExecError("Could not shutdown instance %s on node %s" %
3360                                  (instance.name, source_node))
3361
3362     feedback_fn("* deactivating the instance's disks on source node")
3363     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3364       raise errors.OpExecError("Can't shut down the instance's disks.")
3365
3366     instance.primary_node = target_node
3367     # distribute new instance config to the other nodes
3368     self.cfg.Update(instance)
3369
3370     # Only start the instance if it's marked as up
3371     if instance.status == "up":
3372       feedback_fn("* activating the instance's disks on target node")
3373       logging.info("Starting instance %s on node %s",
3374                    instance.name, target_node)
3375
3376       disks_ok, dummy = _AssembleInstanceDisks(self, instance,
3377                                                ignore_secondaries=True)
3378       if not disks_ok:
3379         _ShutdownInstanceDisks(self, instance)
3380         raise errors.OpExecError("Can't activate the instance's disks")
3381
3382       feedback_fn("* starting the instance on the target node")
3383       result = self.rpc.call_instance_start(target_node, instance, None)
3384       if result.failed or not result.data:
3385         _ShutdownInstanceDisks(self, instance)
3386         raise errors.OpExecError("Could not start instance %s on node %s." %
3387                                  (instance.name, target_node))
3388
3389
3390 class LUMigrateInstance(LogicalUnit):
3391   """Migrate an instance.
3392
3393   This is migration without shutting down, compared to the failover,
3394   which is done with shutdown.
3395
3396   """
3397   HPATH = "instance-migrate"
3398   HTYPE = constants.HTYPE_INSTANCE
3399   _OP_REQP = ["instance_name", "live", "cleanup"]
3400
3401   REQ_BGL = False
3402
3403   def ExpandNames(self):
3404     self._ExpandAndLockInstance()
3405     self.needed_locks[locking.LEVEL_NODE] = []
3406     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3407
3408   def DeclareLocks(self, level):
3409     if level == locking.LEVEL_NODE:
3410       self._LockInstancesNodes()
3411
3412   def BuildHooksEnv(self):
3413     """Build hooks env.
3414
3415     This runs on master, primary and secondary nodes of the instance.
3416
3417     """
3418     env = _BuildInstanceHookEnvByObject(self, self.instance)
3419     nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3420     return env, nl, nl
3421
3422   def CheckPrereq(self):
3423     """Check prerequisites.
3424
3425     This checks that the instance is in the cluster.
3426
3427     """
3428     instance = self.cfg.GetInstanceInfo(
3429       self.cfg.ExpandInstanceName(self.op.instance_name))
3430     if instance is None:
3431       raise errors.OpPrereqError("Instance '%s' not known" %
3432                                  self.op.instance_name)
3433
3434     if instance.disk_template != constants.DT_DRBD8:
3435       raise errors.OpPrereqError("Instance's disk layout is not"
3436                                  " drbd8, cannot migrate.")
3437
3438     secondary_nodes = instance.secondary_nodes
3439     if not secondary_nodes:
3440       raise errors.ProgrammerError("no secondary node but using "
3441                                    "drbd8 disk template")
3442
3443     i_be = self.cfg.GetClusterInfo().FillBE(instance)
3444
3445     target_node = secondary_nodes[0]
3446     # check memory requirements on the secondary node
3447     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3448                          instance.name, i_be[constants.BE_MEMORY],
3449                          instance.hypervisor)
3450
3451     # check bridge existance
3452     brlist = [nic.bridge for nic in instance.nics]
3453     result = self.rpc.call_bridges_exist(target_node, brlist)
3454     if result.failed or not result.data:
3455       raise errors.OpPrereqError("One or more target bridges %s does not"
3456                                  " exist on destination node '%s'" %
3457                                  (brlist, target_node))
3458
3459     if not self.op.cleanup:
3460       result = self.rpc.call_instance_migratable(instance.primary_node,
3461                                                  instance)
3462       msg = result.RemoteFailMsg()
3463       if msg:
3464         raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
3465                                    msg)
3466
3467     self.instance = instance
3468
3469   def _WaitUntilSync(self):
3470     """Poll with custom rpc for disk sync.
3471
3472     This uses our own step-based rpc call.
3473
3474     """
3475     self.feedback_fn("* wait until resync is done")
3476     all_done = False
3477     while not all_done:
3478       all_done = True
3479       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3480                                             self.nodes_ip,
3481                                             self.instance.disks)
3482       min_percent = 100
3483       for node, nres in result.items():
3484         msg = nres.RemoteFailMsg()
3485         if msg:
3486           raise errors.OpExecError("Cannot resync disks on node %s: %s" %
3487                                    (node, msg))
3488         node_done, node_percent = nres.data[1]
3489         all_done = all_done and node_done
3490         if node_percent is not None:
3491           min_percent = min(min_percent, node_percent)
3492       if not all_done:
3493         if min_percent < 100:
3494           self.feedback_fn("   - progress: %.1f%%" % min_percent)
3495         time.sleep(2)
3496
3497   def _EnsureSecondary(self, node):
3498     """Demote a node to secondary.
3499
3500     """
3501     self.feedback_fn("* switching node %s to secondary mode" % node)
3502
3503     for dev in self.instance.disks:
3504       self.cfg.SetDiskID(dev, node)
3505
3506     result = self.rpc.call_blockdev_close(node, self.instance.name,
3507                                           self.instance.disks)
3508     msg = result.RemoteFailMsg()
3509     if msg:
3510       raise errors.OpExecError("Cannot change disk to secondary on node %s,"
3511                                " error %s" % (node, msg))
3512
3513   def _GoStandalone(self):
3514     """Disconnect from the network.
3515
3516     """
3517     self.feedback_fn("* changing into standalone mode")
3518     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
3519                                                self.instance.disks)
3520     for node, nres in result.items():
3521       msg = nres.RemoteFailMsg()
3522       if msg:
3523         raise errors.OpExecError("Cannot disconnect disks node %s,"
3524                                  " error %s" % (node, msg))
3525
3526   def _GoReconnect(self, multimaster):
3527     """Reconnect to the network.
3528
3529     """
3530     if multimaster:
3531       msg = "dual-master"
3532     else:
3533       msg = "single-master"
3534     self.feedback_fn("* changing disks into %s mode" % msg)
3535     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
3536                                            self.instance.disks,
3537                                            self.instance.name, multimaster)
3538     for node, nres in result.items():
3539       msg = nres.RemoteFailMsg()
3540       if msg:
3541         raise errors.OpExecError("Cannot change disks config on node %s,"
3542                                  " error: %s" % (node, msg))
3543
3544   def _ExecCleanup(self):
3545     """Try to cleanup after a failed migration.
3546
3547     The cleanup is done by:
3548       - check that the instance is running only on one node
3549         (and update the config if needed)
3550       - change disks on its secondary node to secondary
3551       - wait until disks are fully synchronized
3552       - disconnect from the network
3553       - change disks into single-master mode
3554       - wait again until disks are fully synchronized
3555
3556     """
3557     instance = self.instance
3558     target_node = self.target_node
3559     source_node = self.source_node
3560
3561     # check running on only one node
3562     self.feedback_fn("* checking where the instance actually runs"
3563                      " (if this hangs, the hypervisor might be in"
3564                      " a bad state)")
3565     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
3566     for node, result in ins_l.items():
3567       result.Raise()
3568       if not isinstance(result.data, list):
3569         raise errors.OpExecError("Can't contact node '%s'" % node)
3570
3571     runningon_source = instance.name in ins_l[source_node].data
3572     runningon_target = instance.name in ins_l[target_node].data
3573
3574     if runningon_source and runningon_target:
3575       raise errors.OpExecError("Instance seems to be running on two nodes,"
3576                                " or the hypervisor is confused. You will have"
3577                                " to ensure manually that it runs only on one"
3578                                " and restart this operation.")
3579
3580     if not (runningon_source or runningon_target):
3581       raise errors.OpExecError("Instance does not seem to be running at all."
3582                                " In this case, it's safer to repair by"
3583                                " running 'gnt-instance stop' to ensure disk"
3584                                " shutdown, and then restarting it.")
3585
3586     if runningon_target:
3587       # the migration has actually succeeded, we need to update the config
3588       self.feedback_fn("* instance running on secondary node (%s),"
3589                        " updating config" % target_node)
3590       instance.primary_node = target_node
3591       self.cfg.Update(instance)
3592       demoted_node = source_node
3593     else:
3594       self.feedback_fn("* instance confirmed to be running on its"
3595                        " primary node (%s)" % source_node)
3596       demoted_node = target_node
3597
3598     self._EnsureSecondary(demoted_node)
3599     try:
3600       self._WaitUntilSync()
3601     except errors.OpExecError:
3602       # we ignore here errors, since if the device is standalone, it
3603       # won't be able to sync
3604       pass
3605     self._GoStandalone()
3606     self._GoReconnect(False)
3607     self._WaitUntilSync()
3608
3609     self.feedback_fn("* done")
3610
3611   def _ExecMigration(self):
3612     """Migrate an instance.
3613
3614     The migrate is done by:
3615       - change the disks into dual-master mode
3616       - wait until disks are fully synchronized again
3617       - migrate the instance
3618       - change disks on the new secondary node (the old primary) to secondary
3619       - wait until disks are fully synchronized
3620       - change disks into single-master mode
3621
3622     """
3623     instance = self.instance
3624     target_node = self.target_node
3625     source_node = self.source_node
3626
3627     self.feedback_fn("* checking disk consistency between source and target")
3628     for dev in instance.disks:
3629       if not _CheckDiskConsistency(self, dev, target_node, False):
3630         raise errors.OpExecError("Disk %s is degraded or not fully"
3631                                  " synchronized on target node,"
3632                                  " aborting migrate." % dev.iv_name)
3633
3634     self._EnsureSecondary(target_node)
3635     self._GoStandalone()
3636     self._GoReconnect(True)
3637     self._WaitUntilSync()
3638
3639     self.feedback_fn("* migrating instance to %s" % target_node)
3640     time.sleep(10)
3641     result = self.rpc.call_instance_migrate(source_node, instance,
3642                                             self.nodes_ip[target_node],
3643                                             self.op.live)
3644     msg = result.RemoteFailMsg()
3645     if msg:
3646       logging.error("Instance migration failed, trying to revert"
3647                     " disk status: %s", msg)
3648       try:
3649         self._EnsureSecondary(target_node)
3650         self._GoStandalone()
3651         self._GoReconnect(False)
3652         self._WaitUntilSync()
3653       except errors.OpExecError, err:
3654         self.LogWarning("Migration failed and I can't reconnect the"
3655                         " drives: error '%s'\n"
3656                         "Please look and recover the instance status" %
3657                         str(err))
3658
3659       raise errors.OpExecError("Could not migrate instance %s: %s" %
3660                                (instance.name, msg))
3661     time.sleep(10)
3662
3663     instance.primary_node = target_node
3664     # distribute new instance config to the other nodes
3665     self.cfg.Update(instance)
3666
3667     self._EnsureSecondary(source_node)
3668     self._WaitUntilSync()
3669     self._GoStandalone()
3670     self._GoReconnect(False)
3671     self._WaitUntilSync()
3672
3673     self.feedback_fn("* done")
3674
3675   def Exec(self, feedback_fn):
3676     """Perform the migration.
3677
3678     """
3679     self.feedback_fn = feedback_fn
3680
3681     self.source_node = self.instance.primary_node
3682     self.target_node = self.instance.secondary_nodes[0]
3683     self.all_nodes = [self.source_node, self.target_node]
3684     self.nodes_ip = {
3685       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
3686       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
3687       }
3688     if self.op.cleanup:
3689       return self._ExecCleanup()
3690     else:
3691       return self._ExecMigration()
3692
3693
3694 def _CreateBlockDevOnPrimary(lu, node, instance, device, info):
3695   """Create a tree of block devices on the primary node.
3696
3697   This always creates all devices.
3698
3699   """
3700   if device.children:
3701     for child in device.children:
3702       if not _CreateBlockDevOnPrimary(lu, node, instance, child, info):
3703         return False
3704
3705   lu.cfg.SetDiskID(device, node)
3706   new_id = lu.rpc.call_blockdev_create(node, device, device.size,
3707                                        instance.name, True, info)
3708   if new_id.failed or not new_id.data:
3709     return False
3710   if device.physical_id is None:
3711     device.physical_id = new_id
3712   return True
3713
3714
3715 def _CreateBlockDevOnSecondary(lu, node, instance, device, force, info):
3716   """Create a tree of block devices on a secondary node.
3717
3718   If this device type has to be created on secondaries, create it and
3719   all its children.
3720
3721   If not, just recurse to children keeping the same 'force' value.
3722
3723   """
3724   if device.CreateOnSecondary():
3725     force = True
3726   if device.children:
3727     for child in device.children:
3728       if not _CreateBlockDevOnSecondary(lu, node, instance,
3729                                         child, force, info):
3730         return False
3731
3732   if not force:
3733     return True
3734   lu.cfg.SetDiskID(device, node)
3735   new_id = lu.rpc.call_blockdev_create(node, device, device.size,
3736                                        instance.name, False, info)
3737   if new_id.failed or not new_id.data:
3738     return False
3739   if device.physical_id is None:
3740     device.physical_id = new_id
3741   return True
3742
3743
3744 def _GenerateUniqueNames(lu, exts):
3745   """Generate a suitable LV name.
3746
3747   This will generate a logical volume name for the given instance.
3748
3749   """
3750   results = []
3751   for val in exts:
3752     new_id = lu.cfg.GenerateUniqueID()
3753     results.append("%s%s" % (new_id, val))
3754   return results
3755
3756
3757 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
3758                          p_minor, s_minor):
3759   """Generate a drbd8 device complete with its children.
3760
3761   """
3762   port = lu.cfg.AllocatePort()
3763   vgname = lu.cfg.GetVGName()
3764   shared_secret = lu.cfg.GenerateDRBDSecret()
3765   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
3766                           logical_id=(vgname, names[0]))
3767   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
3768                           logical_id=(vgname, names[1]))
3769   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
3770                           logical_id=(primary, secondary, port,
3771                                       p_minor, s_minor,
3772                                       shared_secret),
3773                           children=[dev_data, dev_meta],
3774                           iv_name=iv_name)
3775   return drbd_dev
3776
3777
3778 def _GenerateDiskTemplate(lu, template_name,
3779                           instance_name, primary_node,
3780                           secondary_nodes, disk_info,
3781                           file_storage_dir, file_driver,
3782                           base_index):
3783   """Generate the entire disk layout for a given template type.
3784
3785   """
3786   #TODO: compute space requirements
3787
3788   vgname = lu.cfg.GetVGName()
3789   disk_count = len(disk_info)
3790   disks = []
3791   if template_name == constants.DT_DISKLESS:
3792     pass
3793   elif template_name == constants.DT_PLAIN:
3794     if len(secondary_nodes) != 0:
3795       raise errors.ProgrammerError("Wrong template configuration")
3796
3797     names = _GenerateUniqueNames(lu, [".disk%d" % i
3798                                       for i in range(disk_count)])
3799     for idx, disk in enumerate(disk_info):
3800       disk_index = idx + base_index
3801       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
3802                               logical_id=(vgname, names[idx]),
3803                               iv_name="disk/%d" % disk_index)
3804       disks.append(disk_dev)
3805   elif template_name == constants.DT_DRBD8:
3806     if len(secondary_nodes) != 1:
3807       raise errors.ProgrammerError("Wrong template configuration")
3808     remote_node = secondary_nodes[0]
3809     minors = lu.cfg.AllocateDRBDMinor(
3810       [primary_node, remote_node] * len(disk_info), instance_name)
3811
3812     names = _GenerateUniqueNames(lu,
3813                                  [".disk%d_%s" % (i, s)
3814                                   for i in range(disk_count)
3815                                   for s in ("data", "meta")
3816                                   ])
3817     for idx, disk in enumerate(disk_info):
3818       disk_index = idx + base_index
3819       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
3820                                       disk["size"], names[idx*2:idx*2+2],
3821                                       "disk/%d" % disk_index,
3822                                       minors[idx*2], minors[idx*2+1])
3823       disks.append(disk_dev)
3824   elif template_name == constants.DT_FILE:
3825     if len(secondary_nodes) != 0:
3826       raise errors.ProgrammerError("Wrong template configuration")
3827
3828     for idx, disk in enumerate(disk_info):
3829       disk_index = idx + base_index
3830       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
3831                               iv_name="disk/%d" % disk_index,
3832                               logical_id=(file_driver,
3833                                           "%s/disk%d" % (file_storage_dir,
3834                                                          idx)))
3835       disks.append(disk_dev)
3836   else:
3837     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
3838   return disks
3839
3840
3841 def _GetInstanceInfoText(instance):
3842   """Compute that text that should be added to the disk's metadata.
3843
3844   """
3845   return "originstname+%s" % instance.name
3846
3847
3848 def _CreateDisks(lu, instance):
3849   """Create all disks for an instance.
3850
3851   This abstracts away some work from AddInstance.
3852
3853   @type lu: L{LogicalUnit}
3854   @param lu: the logical unit on whose behalf we execute
3855   @type instance: L{objects.Instance}
3856   @param instance: the instance whose disks we should create
3857   @rtype: boolean
3858   @return: the success of the creation
3859
3860   """
3861   info = _GetInstanceInfoText(instance)
3862
3863   if instance.disk_template == constants.DT_FILE:
3864     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
3865     result = lu.rpc.call_file_storage_dir_create(instance.primary_node,
3866                                                  file_storage_dir)
3867
3868     if result.failed or not result.data:
3869       logging.error("Could not connect to node '%s'", instance.primary_node)
3870       return False
3871
3872     if not result.data[0]:
3873       logging.error("Failed to create directory '%s'", file_storage_dir)
3874       return False
3875
3876   # Note: this needs to be kept in sync with adding of disks in
3877   # LUSetInstanceParams
3878   for device in instance.disks:
3879     logging.info("Creating volume %s for instance %s",
3880                  device.iv_name, instance.name)
3881     #HARDCODE
3882     for secondary_node in instance.secondary_nodes:
3883       if not _CreateBlockDevOnSecondary(lu, secondary_node, instance,
3884                                         device, False, info):
3885         logging.error("Failed to create volume %s (%s) on secondary node %s!",
3886                       device.iv_name, device, secondary_node)
3887         return False
3888     #HARDCODE
3889     if not _CreateBlockDevOnPrimary(lu, instance.primary_node,
3890                                     instance, device, info):
3891       logging.error("Failed to create volume %s on primary!", device.iv_name)
3892       return False
3893
3894   return True
3895
3896
3897 def _RemoveDisks(lu, instance):
3898   """Remove all disks for an instance.
3899
3900   This abstracts away some work from `AddInstance()` and
3901   `RemoveInstance()`. Note that in case some of the devices couldn't
3902   be removed, the removal will continue with the other ones (compare
3903   with `_CreateDisks()`).
3904
3905   @type lu: L{LogicalUnit}
3906   @param lu: the logical unit on whose behalf we execute
3907   @type instance: L{objects.Instance}
3908   @param instance: the instance whose disks we should remove
3909   @rtype: boolean
3910   @return: the success of the removal
3911
3912   """
3913   logging.info("Removing block devices for instance %s", instance.name)
3914
3915   result = True
3916   for device in instance.disks:
3917     for node, disk in device.ComputeNodeTree(instance.primary_node):
3918       lu.cfg.SetDiskID(disk, node)
3919       result = lu.rpc.call_blockdev_remove(node, disk)
3920       if result.failed or not result.data:
3921         lu.proc.LogWarning("Could not remove block device %s on node %s,"
3922                            " continuing anyway", device.iv_name, node)
3923         result = False
3924
3925   if instance.disk_template == constants.DT_FILE:
3926     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
3927     result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
3928                                                  file_storage_dir)
3929     if result.failed or not result.data:
3930       logging.error("Could not remove directory '%s'", file_storage_dir)
3931       result = False
3932
3933   return result
3934
3935
3936 def _ComputeDiskSize(disk_template, disks):
3937   """Compute disk size requirements in the volume group
3938
3939   """
3940   # Required free disk space as a function of disk and swap space
3941   req_size_dict = {
3942     constants.DT_DISKLESS: None,
3943     constants.DT_PLAIN: sum(d["size"] for d in disks),
3944     # 128 MB are added for drbd metadata for each disk
3945     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
3946     constants.DT_FILE: None,
3947   }
3948
3949   if disk_template not in req_size_dict:
3950     raise errors.ProgrammerError("Disk template '%s' size requirement"
3951                                  " is unknown" %  disk_template)
3952
3953   return req_size_dict[disk_template]
3954
3955
3956 def _CheckHVParams(lu, nodenames, hvname, hvparams):
3957   """Hypervisor parameter validation.
3958
3959   This function abstract the hypervisor parameter validation to be
3960   used in both instance create and instance modify.
3961
3962   @type lu: L{LogicalUnit}
3963   @param lu: the logical unit for which we check
3964   @type nodenames: list
3965   @param nodenames: the list of nodes on which we should check
3966   @type hvname: string
3967   @param hvname: the name of the hypervisor we should use
3968   @type hvparams: dict
3969   @param hvparams: the parameters which we need to check
3970   @raise errors.OpPrereqError: if the parameters are not valid
3971
3972   """
3973   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
3974                                                   hvname,
3975                                                   hvparams)
3976   for node in nodenames:
3977     info = hvinfo[node]
3978     info.Raise()
3979     if not info.data or not isinstance(info.data, (tuple, list)):
3980       raise errors.OpPrereqError("Cannot get current information"
3981                                  " from node '%s' (%s)" % (node, info.data))
3982     if not info.data[0]:
3983       raise errors.OpPrereqError("Hypervisor parameter validation failed:"
3984                                  " %s" % info.data[1])
3985
3986
3987 class LUCreateInstance(LogicalUnit):
3988   """Create an instance.
3989
3990   """
3991   HPATH = "instance-add"
3992   HTYPE = constants.HTYPE_INSTANCE
3993   _OP_REQP = ["instance_name", "disks", "disk_template",
3994               "mode", "start",
3995               "wait_for_sync", "ip_check", "nics",
3996               "hvparams", "beparams"]
3997   REQ_BGL = False
3998
3999   def _ExpandNode(self, node):
4000     """Expands and checks one node name.
4001
4002     """
4003     node_full = self.cfg.ExpandNodeName(node)
4004     if node_full is None:
4005       raise errors.OpPrereqError("Unknown node %s" % node)
4006     return node_full
4007
4008   def ExpandNames(self):
4009     """ExpandNames for CreateInstance.
4010
4011     Figure out the right locks for instance creation.
4012
4013     """
4014     self.needed_locks = {}
4015
4016     # set optional parameters to none if they don't exist
4017     for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4018       if not hasattr(self.op, attr):
4019         setattr(self.op, attr, None)
4020
4021     # cheap checks, mostly valid constants given
4022
4023     # verify creation mode
4024     if self.op.mode not in (constants.INSTANCE_CREATE,
4025                             constants.INSTANCE_IMPORT):
4026       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4027                                  self.op.mode)
4028
4029     # disk template and mirror node verification
4030     if self.op.disk_template not in constants.DISK_TEMPLATES:
4031       raise errors.OpPrereqError("Invalid disk template name")
4032
4033     if self.op.hypervisor is None:
4034       self.op.hypervisor = self.cfg.GetHypervisorType()
4035
4036     cluster = self.cfg.GetClusterInfo()
4037     enabled_hvs = cluster.enabled_hypervisors
4038     if self.op.hypervisor not in enabled_hvs:
4039       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4040                                  " cluster (%s)" % (self.op.hypervisor,
4041                                   ",".join(enabled_hvs)))
4042
4043     # check hypervisor parameter syntax (locally)
4044
4045     filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor],
4046                                   self.op.hvparams)
4047     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4048     hv_type.CheckParameterSyntax(filled_hvp)
4049
4050     # fill and remember the beparams dict
4051     utils.CheckBEParams(self.op.beparams)
4052     self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
4053                                     self.op.beparams)
4054
4055     #### instance parameters check
4056
4057     # instance name verification
4058     hostname1 = utils.HostInfo(self.op.instance_name)
4059     self.op.instance_name = instance_name = hostname1.name
4060
4061     # this is just a preventive check, but someone might still add this
4062     # instance in the meantime, and creation will fail at lock-add time
4063     if instance_name in self.cfg.GetInstanceList():
4064       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4065                                  instance_name)
4066
4067     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4068
4069     # NIC buildup
4070     self.nics = []
4071     for nic in self.op.nics:
4072       # ip validity checks
4073       ip = nic.get("ip", None)
4074       if ip is None or ip.lower() == "none":
4075         nic_ip = None
4076       elif ip.lower() == constants.VALUE_AUTO:
4077         nic_ip = hostname1.ip
4078       else:
4079         if not utils.IsValidIP(ip):
4080           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4081                                      " like a valid IP" % ip)
4082         nic_ip = ip
4083
4084       # MAC address verification
4085       mac = nic.get("mac", constants.VALUE_AUTO)
4086       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4087         if not utils.IsValidMac(mac.lower()):
4088           raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4089                                      mac)
4090       # bridge verification
4091       bridge = nic.get("bridge", self.cfg.GetDefBridge())
4092       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge))
4093
4094     # disk checks/pre-build
4095     self.disks = []
4096     for disk in self.op.disks:
4097       mode = disk.get("mode", constants.DISK_RDWR)
4098       if mode not in constants.DISK_ACCESS_SET:
4099         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4100                                    mode)
4101       size = disk.get("size", None)
4102       if size is None:
4103         raise errors.OpPrereqError("Missing disk size")
4104       try:
4105         size = int(size)
4106       except ValueError:
4107         raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4108       self.disks.append({"size": size, "mode": mode})
4109
4110     # used in CheckPrereq for ip ping check
4111     self.check_ip = hostname1.ip
4112
4113     # file storage checks
4114     if (self.op.file_driver and
4115         not self.op.file_driver in constants.FILE_DRIVER):
4116       raise errors.OpPrereqError("Invalid file driver name '%s'" %
4117                                  self.op.file_driver)
4118
4119     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4120       raise errors.OpPrereqError("File storage directory path not absolute")
4121
4122     ### Node/iallocator related checks
4123     if [self.op.iallocator, self.op.pnode].count(None) != 1:
4124       raise errors.OpPrereqError("One and only one of iallocator and primary"
4125                                  " node must be given")
4126
4127     if self.op.iallocator:
4128       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4129     else:
4130       self.op.pnode = self._ExpandNode(self.op.pnode)
4131       nodelist = [self.op.pnode]
4132       if self.op.snode is not None:
4133         self.op.snode = self._ExpandNode(self.op.snode)
4134         nodelist.append(self.op.snode)
4135       self.needed_locks[locking.LEVEL_NODE] = nodelist
4136
4137     # in case of import lock the source node too
4138     if self.op.mode == constants.INSTANCE_IMPORT:
4139       src_node = getattr(self.op, "src_node", None)
4140       src_path = getattr(self.op, "src_path", None)
4141
4142       if src_path is None:
4143         self.op.src_path = src_path = self.op.instance_name
4144
4145       if src_node is None:
4146         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4147         self.op.src_node = None
4148         if os.path.isabs(src_path):
4149           raise errors.OpPrereqError("Importing an instance from an absolute"
4150                                      " path requires a source node option.")
4151       else:
4152         self.op.src_node = src_node = self._ExpandNode(src_node)
4153         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4154           self.needed_locks[locking.LEVEL_NODE].append(src_node)
4155         if not os.path.isabs(src_path):
4156           self.op.src_path = src_path = \
4157             os.path.join(constants.EXPORT_DIR, src_path)
4158
4159     else: # INSTANCE_CREATE
4160       if getattr(self.op, "os_type", None) is None:
4161         raise errors.OpPrereqError("No guest OS specified")
4162
4163   def _RunAllocator(self):
4164     """Run the allocator based on input opcode.
4165
4166     """
4167     nics = [n.ToDict() for n in self.nics]
4168     ial = IAllocator(self,
4169                      mode=constants.IALLOCATOR_MODE_ALLOC,
4170                      name=self.op.instance_name,
4171                      disk_template=self.op.disk_template,
4172                      tags=[],
4173                      os=self.op.os_type,
4174                      vcpus=self.be_full[constants.BE_VCPUS],
4175                      mem_size=self.be_full[constants.BE_MEMORY],
4176                      disks=self.disks,
4177                      nics=nics,
4178                      hypervisor=self.op.hypervisor,
4179                      )
4180
4181     ial.Run(self.op.iallocator)
4182
4183     if not ial.success:
4184       raise errors.OpPrereqError("Can't compute nodes using"
4185                                  " iallocator '%s': %s" % (self.op.iallocator,
4186                                                            ial.info))
4187     if len(ial.nodes) != ial.required_nodes:
4188       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4189                                  " of nodes (%s), required %s" %
4190                                  (self.op.iallocator, len(ial.nodes),
4191                                   ial.required_nodes))
4192     self.op.pnode = ial.nodes[0]
4193     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4194                  self.op.instance_name, self.op.iallocator,
4195                  ", ".join(ial.nodes))
4196     if ial.required_nodes == 2:
4197       self.op.snode = ial.nodes[1]
4198
4199   def BuildHooksEnv(self):
4200     """Build hooks env.
4201
4202     This runs on master, primary and secondary nodes of the instance.
4203
4204     """
4205     env = {
4206       "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
4207       "INSTANCE_DISK_SIZE": ",".join(str(d["size"]) for d in self.disks),
4208       "INSTANCE_ADD_MODE": self.op.mode,
4209       }
4210     if self.op.mode == constants.INSTANCE_IMPORT:
4211       env["INSTANCE_SRC_NODE"] = self.op.src_node
4212       env["INSTANCE_SRC_PATH"] = self.op.src_path
4213       env["INSTANCE_SRC_IMAGES"] = self.src_images
4214
4215     env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
4216       primary_node=self.op.pnode,
4217       secondary_nodes=self.secondaries,
4218       status=self.instance_status,
4219       os_type=self.op.os_type,
4220       memory=self.be_full[constants.BE_MEMORY],
4221       vcpus=self.be_full[constants.BE_VCPUS],
4222       nics=[(n.ip, n.bridge, n.mac) for n in self.nics],
4223     ))
4224
4225     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4226           self.secondaries)
4227     return env, nl, nl
4228
4229
4230   def CheckPrereq(self):
4231     """Check prerequisites.
4232
4233     """
4234     if (not self.cfg.GetVGName() and
4235         self.op.disk_template not in constants.DTS_NOT_LVM):
4236       raise errors.OpPrereqError("Cluster does not support lvm-based"
4237                                  " instances")
4238
4239
4240     if self.op.mode == constants.INSTANCE_IMPORT:
4241       src_node = self.op.src_node
4242       src_path = self.op.src_path
4243
4244       if src_node is None:
4245         exp_list = self.rpc.call_export_list(
4246           self.acquired_locks[locking.LEVEL_NODE])
4247         found = False
4248         for node in exp_list:
4249           if not exp_list[node].failed and src_path in exp_list[node].data:
4250             found = True
4251             self.op.src_node = src_node = node
4252             self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4253                                                        src_path)
4254             break
4255         if not found:
4256           raise errors.OpPrereqError("No export found for relative path %s" %
4257                                       src_path)
4258
4259       _CheckNodeOnline(self, src_node)
4260       result = self.rpc.call_export_info(src_node, src_path)
4261       result.Raise()
4262       if not result.data:
4263         raise errors.OpPrereqError("No export found in dir %s" % src_path)
4264
4265       export_info = result.data
4266       if not export_info.has_section(constants.INISECT_EXP):
4267         raise errors.ProgrammerError("Corrupted export config")
4268
4269       ei_version = export_info.get(constants.INISECT_EXP, 'version')
4270       if (int(ei_version) != constants.EXPORT_VERSION):
4271         raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4272                                    (ei_version, constants.EXPORT_VERSION))
4273
4274       # Check that the new instance doesn't have less disks than the export
4275       instance_disks = len(self.disks)
4276       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4277       if instance_disks < export_disks:
4278         raise errors.OpPrereqError("Not enough disks to import."
4279                                    " (instance: %d, export: %d)" %
4280                                    (instance_disks, export_disks))
4281
4282       self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4283       disk_images = []
4284       for idx in range(export_disks):
4285         option = 'disk%d_dump' % idx
4286         if export_info.has_option(constants.INISECT_INS, option):
4287           # FIXME: are the old os-es, disk sizes, etc. useful?
4288           export_name = export_info.get(constants.INISECT_INS, option)
4289           image = os.path.join(src_path, export_name)
4290           disk_images.append(image)
4291         else:
4292           disk_images.append(False)
4293
4294       self.src_images = disk_images
4295
4296       old_name = export_info.get(constants.INISECT_INS, 'name')
4297       # FIXME: int() here could throw a ValueError on broken exports
4298       exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4299       if self.op.instance_name == old_name:
4300         for idx, nic in enumerate(self.nics):
4301           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4302             nic_mac_ini = 'nic%d_mac' % idx
4303             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4304
4305     # ip ping checks (we use the same ip that was resolved in ExpandNames)
4306     if self.op.start and not self.op.ip_check:
4307       raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4308                                  " adding an instance in start mode")
4309
4310     if self.op.ip_check:
4311       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4312         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4313                                    (self.check_ip, self.op.instance_name))
4314
4315     #### allocator run
4316
4317     if self.op.iallocator is not None:
4318       self._RunAllocator()
4319
4320     #### node related checks
4321
4322     # check primary node
4323     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4324     assert self.pnode is not None, \
4325       "Cannot retrieve locked node %s" % self.op.pnode
4326     if pnode.offline:
4327       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4328                                  pnode.name)
4329
4330     self.secondaries = []
4331
4332     # mirror node verification
4333     if self.op.disk_template in constants.DTS_NET_MIRROR:
4334       if self.op.snode is None:
4335         raise errors.OpPrereqError("The networked disk templates need"
4336                                    " a mirror node")
4337       if self.op.snode == pnode.name:
4338         raise errors.OpPrereqError("The secondary node cannot be"
4339                                    " the primary node.")
4340       self.secondaries.append(self.op.snode)
4341       _CheckNodeOnline(self, self.op.snode)
4342
4343     nodenames = [pnode.name] + self.secondaries
4344
4345     req_size = _ComputeDiskSize(self.op.disk_template,
4346                                 self.disks)
4347
4348     # Check lv size requirements
4349     if req_size is not None:
4350       nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4351                                          self.op.hypervisor)
4352       for node in nodenames:
4353         info = nodeinfo[node]
4354         info.Raise()
4355         info = info.data
4356         if not info:
4357           raise errors.OpPrereqError("Cannot get current information"
4358                                      " from node '%s'" % node)
4359         vg_free = info.get('vg_free', None)
4360         if not isinstance(vg_free, int):
4361           raise errors.OpPrereqError("Can't compute free disk space on"
4362                                      " node %s" % node)
4363         if req_size > info['vg_free']:
4364           raise errors.OpPrereqError("Not enough disk space on target node %s."
4365                                      " %d MB available, %d MB required" %
4366                                      (node, info['vg_free'], req_size))
4367
4368     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4369
4370     # os verification
4371     result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4372     result.Raise()
4373     if not isinstance(result.data, objects.OS):
4374       raise errors.OpPrereqError("OS '%s' not in supported os list for"
4375                                  " primary node"  % self.op.os_type)
4376
4377     # bridge check on primary node
4378     bridges = [n.bridge for n in self.nics]
4379     result = self.rpc.call_bridges_exist(self.pnode.name, bridges)
4380     result.Raise()
4381     if not result.data:
4382       raise errors.OpPrereqError("One of the target bridges '%s' does not"
4383                                  " exist on destination node '%s'" %
4384                                  (",".join(bridges), pnode.name))
4385
4386     # memory check on primary node
4387     if self.op.start:
4388       _CheckNodeFreeMemory(self, self.pnode.name,
4389                            "creating instance %s" % self.op.instance_name,
4390                            self.be_full[constants.BE_MEMORY],
4391                            self.op.hypervisor)
4392
4393     if self.op.start:
4394       self.instance_status = 'up'
4395     else:
4396       self.instance_status = 'down'
4397
4398   def Exec(self, feedback_fn):
4399     """Create and add the instance to the cluster.
4400
4401     """
4402     instance = self.op.instance_name
4403     pnode_name = self.pnode.name
4404
4405     for nic in self.nics:
4406       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4407         nic.mac = self.cfg.GenerateMAC()
4408
4409     ht_kind = self.op.hypervisor
4410     if ht_kind in constants.HTS_REQ_PORT:
4411       network_port = self.cfg.AllocatePort()
4412     else:
4413       network_port = None
4414
4415     ##if self.op.vnc_bind_address is None:
4416     ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
4417
4418     # this is needed because os.path.join does not accept None arguments
4419     if self.op.file_storage_dir is None:
4420       string_file_storage_dir = ""
4421     else:
4422       string_file_storage_dir = self.op.file_storage_dir
4423
4424     # build the full file storage dir path
4425     file_storage_dir = os.path.normpath(os.path.join(
4426                                         self.cfg.GetFileStorageDir(),
4427                                         string_file_storage_dir, instance))
4428
4429
4430     disks = _GenerateDiskTemplate(self,
4431                                   self.op.disk_template,
4432                                   instance, pnode_name,
4433                                   self.secondaries,
4434                                   self.disks,
4435                                   file_storage_dir,
4436                                   self.op.file_driver,
4437                                   0)
4438
4439     iobj = objects.Instance(name=instance, os=self.op.os_type,
4440                             primary_node=pnode_name,
4441                             nics=self.nics, disks=disks,
4442                             disk_template=self.op.disk_template,
4443                             status=self.instance_status,
4444                             network_port=network_port,
4445                             beparams=self.op.beparams,
4446                             hvparams=self.op.hvparams,
4447                             hypervisor=self.op.hypervisor,
4448                             )
4449
4450     feedback_fn("* creating instance disks...")
4451     if not _CreateDisks(self, iobj):
4452       _RemoveDisks(self, iobj)
4453       self.cfg.ReleaseDRBDMinors(instance)
4454       raise errors.OpExecError("Device creation failed, reverting...")
4455
4456     feedback_fn("adding instance %s to cluster config" % instance)
4457
4458     self.cfg.AddInstance(iobj)
4459     # Declare that we don't want to remove the instance lock anymore, as we've
4460     # added the instance to the config
4461     del self.remove_locks[locking.LEVEL_INSTANCE]
4462     # Remove the temp. assignements for the instance's drbds
4463     self.cfg.ReleaseDRBDMinors(instance)
4464     # Unlock all the nodes
4465     if self.op.mode == constants.INSTANCE_IMPORT:
4466       nodes_keep = [self.op.src_node]
4467       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
4468                        if node != self.op.src_node]
4469       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
4470       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
4471     else:
4472       self.context.glm.release(locking.LEVEL_NODE)
4473       del self.acquired_locks[locking.LEVEL_NODE]
4474
4475     if self.op.wait_for_sync:
4476       disk_abort = not _WaitForSync(self, iobj)
4477     elif iobj.disk_template in constants.DTS_NET_MIRROR:
4478       # make sure the disks are not degraded (still sync-ing is ok)
4479       time.sleep(15)
4480       feedback_fn("* checking mirrors status")
4481       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
4482     else:
4483       disk_abort = False
4484
4485     if disk_abort:
4486       _RemoveDisks(self, iobj)
4487       self.cfg.RemoveInstance(iobj.name)
4488       # Make sure the instance lock gets removed
4489       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
4490       raise errors.OpExecError("There are some degraded disks for"
4491                                " this instance")
4492
4493     feedback_fn("creating os for instance %s on node %s" %
4494                 (instance, pnode_name))
4495
4496     if iobj.disk_template != constants.DT_DISKLESS:
4497       if self.op.mode == constants.INSTANCE_CREATE:
4498         feedback_fn("* running the instance OS create scripts...")
4499         result = self.rpc.call_instance_os_add(pnode_name, iobj)
4500         result.Raise()
4501         if not result.data:
4502           raise errors.OpExecError("Could not add os for instance %s"
4503                                    " on node %s" %
4504                                    (instance, pnode_name))
4505
4506       elif self.op.mode == constants.INSTANCE_IMPORT:
4507         feedback_fn("* running the instance OS import scripts...")
4508         src_node = self.op.src_node
4509         src_images = self.src_images
4510         cluster_name = self.cfg.GetClusterName()
4511         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
4512                                                          src_node, src_images,
4513                                                          cluster_name)
4514         import_result.Raise()
4515         for idx, result in enumerate(import_result.data):
4516           if not result:
4517             self.LogWarning("Could not import the image %s for instance"
4518                             " %s, disk %d, on node %s" %
4519                             (src_images[idx], instance, idx, pnode_name))
4520       else:
4521         # also checked in the prereq part
4522         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
4523                                      % self.op.mode)
4524
4525     if self.op.start:
4526       logging.info("Starting instance %s on node %s", instance, pnode_name)
4527       feedback_fn("* starting instance...")
4528       result = self.rpc.call_instance_start(pnode_name, iobj, None)
4529       result.Raise()
4530       if not result.data:
4531         raise errors.OpExecError("Could not start instance")
4532
4533
4534 class LUConnectConsole(NoHooksLU):
4535   """Connect to an instance's console.
4536
4537   This is somewhat special in that it returns the command line that
4538   you need to run on the master node in order to connect to the
4539   console.
4540
4541   """
4542   _OP_REQP = ["instance_name"]
4543   REQ_BGL = False
4544
4545   def ExpandNames(self):
4546     self._ExpandAndLockInstance()
4547
4548   def CheckPrereq(self):
4549     """Check prerequisites.
4550
4551     This checks that the instance is in the cluster.
4552
4553     """
4554     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4555     assert self.instance is not None, \
4556       "Cannot retrieve locked instance %s" % self.op.instance_name
4557     _CheckNodeOnline(self, self.instance.primary_node)
4558
4559   def Exec(self, feedback_fn):
4560     """Connect to the console of an instance
4561
4562     """
4563     instance = self.instance
4564     node = instance.primary_node
4565
4566     node_insts = self.rpc.call_instance_list([node],
4567                                              [instance.hypervisor])[node]
4568     node_insts.Raise()
4569
4570     if instance.name not in node_insts.data:
4571       raise errors.OpExecError("Instance %s is not running." % instance.name)
4572
4573     logging.debug("Connecting to console of %s on %s", instance.name, node)
4574
4575     hyper = hypervisor.GetHypervisor(instance.hypervisor)
4576     console_cmd = hyper.GetShellCommandForConsole(instance)
4577
4578     # build ssh cmdline
4579     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
4580
4581
4582 class LUReplaceDisks(LogicalUnit):
4583   """Replace the disks of an instance.
4584
4585   """
4586   HPATH = "mirrors-replace"
4587   HTYPE = constants.HTYPE_INSTANCE
4588   _OP_REQP = ["instance_name", "mode", "disks"]
4589   REQ_BGL = False
4590
4591   def CheckArguments(self):
4592     if not hasattr(self.op, "remote_node"):
4593       self.op.remote_node = None
4594     if not hasattr(self.op, "iallocator"):
4595       self.op.iallocator = None
4596
4597     # check for valid parameter combination
4598     cnt = [self.op.remote_node, self.op.iallocator].count(None)
4599     if self.op.mode == constants.REPLACE_DISK_CHG:
4600       if cnt == 2:
4601         raise errors.OpPrereqError("When changing the secondary either an"
4602                                    " iallocator script must be used or the"
4603                                    " new node given")
4604       elif cnt == 0:
4605         raise errors.OpPrereqError("Give either the iallocator or the new"
4606                                    " secondary, not both")
4607     else: # not replacing the secondary
4608       if cnt != 2:
4609         raise errors.OpPrereqError("The iallocator and new node options can"
4610                                    " be used only when changing the"
4611                                    " secondary node")
4612
4613   def ExpandNames(self):
4614     self._ExpandAndLockInstance()
4615
4616     if self.op.iallocator is not None:
4617       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4618     elif self.op.remote_node is not None:
4619       remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
4620       if remote_node is None:
4621         raise errors.OpPrereqError("Node '%s' not known" %
4622                                    self.op.remote_node)
4623       self.op.remote_node = remote_node
4624       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
4625       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4626     else:
4627       self.needed_locks[locking.LEVEL_NODE] = []
4628       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4629
4630   def DeclareLocks(self, level):
4631     # If we're not already locking all nodes in the set we have to declare the
4632     # instance's primary/secondary nodes.
4633     if (level == locking.LEVEL_NODE and
4634         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
4635       self._LockInstancesNodes()
4636
4637   def _RunAllocator(self):
4638     """Compute a new secondary node using an IAllocator.
4639
4640     """
4641     ial = IAllocator(self,
4642                      mode=constants.IALLOCATOR_MODE_RELOC,
4643                      name=self.op.instance_name,
4644                      relocate_from=[self.sec_node])
4645
4646     ial.Run(self.op.iallocator)
4647
4648     if not ial.success:
4649       raise errors.OpPrereqError("Can't compute nodes using"
4650                                  " iallocator '%s': %s" % (self.op.iallocator,
4651                                                            ial.info))
4652     if len(ial.nodes) != ial.required_nodes:
4653       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4654                                  " of nodes (%s), required %s" %
4655                                  (len(ial.nodes), ial.required_nodes))
4656     self.op.remote_node = ial.nodes[0]
4657     self.LogInfo("Selected new secondary for the instance: %s",
4658                  self.op.remote_node)
4659
4660   def BuildHooksEnv(self):
4661     """Build hooks env.
4662
4663     This runs on the master, the primary and all the secondaries.
4664
4665     """
4666     env = {
4667       "MODE": self.op.mode,
4668       "NEW_SECONDARY": self.op.remote_node,
4669       "OLD_SECONDARY": self.instance.secondary_nodes[0],
4670       }
4671     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4672     nl = [
4673       self.cfg.GetMasterNode(),
4674       self.instance.primary_node,
4675       ]
4676     if self.op.remote_node is not None:
4677       nl.append(self.op.remote_node)
4678     return env, nl, nl
4679
4680   def CheckPrereq(self):
4681     """Check prerequisites.
4682
4683     This checks that the instance is in the cluster.
4684
4685     """
4686     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4687     assert instance is not None, \
4688       "Cannot retrieve locked instance %s" % self.op.instance_name
4689     self.instance = instance
4690
4691     if instance.disk_template != constants.DT_DRBD8:
4692       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
4693                                  " instances")
4694
4695     if len(instance.secondary_nodes) != 1:
4696       raise errors.OpPrereqError("The instance has a strange layout,"
4697                                  " expected one secondary but found %d" %
4698                                  len(instance.secondary_nodes))
4699
4700     self.sec_node = instance.secondary_nodes[0]
4701
4702     if self.op.iallocator is not None:
4703       self._RunAllocator()
4704
4705     remote_node = self.op.remote_node
4706     if remote_node is not None:
4707       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
4708       assert self.remote_node_info is not None, \
4709         "Cannot retrieve locked node %s" % remote_node
4710     else:
4711       self.remote_node_info = None
4712     if remote_node == instance.primary_node:
4713       raise errors.OpPrereqError("The specified node is the primary node of"
4714                                  " the instance.")
4715     elif remote_node == self.sec_node:
4716       raise errors.OpPrereqError("The specified node is already the"
4717                                  " secondary node of the instance.")
4718
4719     if self.op.mode == constants.REPLACE_DISK_PRI:
4720       n1 = self.tgt_node = instance.primary_node
4721       n2 = self.oth_node = self.sec_node
4722     elif self.op.mode == constants.REPLACE_DISK_SEC:
4723       n1 = self.tgt_node = self.sec_node
4724       n2 = self.oth_node = instance.primary_node
4725     elif self.op.mode == constants.REPLACE_DISK_CHG:
4726       n1 = self.new_node = remote_node
4727       n2 = self.oth_node = instance.primary_node
4728       self.tgt_node = self.sec_node
4729     else:
4730       raise errors.ProgrammerError("Unhandled disk replace mode")
4731
4732     _CheckNodeOnline(self, n1)
4733     _CheckNodeOnline(self, n2)
4734
4735     if not self.op.disks:
4736       self.op.disks = range(len(instance.disks))
4737
4738     for disk_idx in self.op.disks:
4739       instance.FindDisk(disk_idx)
4740
4741   def _ExecD8DiskOnly(self, feedback_fn):
4742     """Replace a disk on the primary or secondary for dbrd8.
4743
4744     The algorithm for replace is quite complicated:
4745
4746       1. for each disk to be replaced:
4747
4748         1. create new LVs on the target node with unique names
4749         1. detach old LVs from the drbd device
4750         1. rename old LVs to name_replaced.<time_t>
4751         1. rename new LVs to old LVs
4752         1. attach the new LVs (with the old names now) to the drbd device
4753
4754       1. wait for sync across all devices
4755
4756       1. for each modified disk:
4757
4758         1. remove old LVs (which have the name name_replaces.<time_t>)
4759
4760     Failures are not very well handled.
4761
4762     """
4763     steps_total = 6
4764     warning, info = (self.proc.LogWarning, self.proc.LogInfo)
4765     instance = self.instance
4766     iv_names = {}
4767     vgname = self.cfg.GetVGName()
4768     # start of work
4769     cfg = self.cfg
4770     tgt_node = self.tgt_node
4771     oth_node = self.oth_node
4772
4773     # Step: check device activation
4774     self.proc.LogStep(1, steps_total, "check device existence")
4775     info("checking volume groups")
4776     my_vg = cfg.GetVGName()
4777     results = self.rpc.call_vg_list([oth_node, tgt_node])
4778     if not results:
4779       raise errors.OpExecError("Can't list volume groups on the nodes")
4780     for node in oth_node, tgt_node:
4781       res = results[node]
4782       if res.failed or not res.data or my_vg not in res.data:
4783         raise errors.OpExecError("Volume group '%s' not found on %s" %
4784                                  (my_vg, node))
4785     for idx, dev in enumerate(instance.disks):
4786       if idx not in self.op.disks:
4787         continue
4788       for node in tgt_node, oth_node:
4789         info("checking disk/%d on %s" % (idx, node))
4790         cfg.SetDiskID(dev, node)
4791         if not self.rpc.call_blockdev_find(node, dev):
4792           raise errors.OpExecError("Can't find disk/%d on node %s" %
4793                                    (idx, node))
4794
4795     # Step: check other node consistency
4796     self.proc.LogStep(2, steps_total, "check peer consistency")
4797     for idx, dev in enumerate(instance.disks):
4798       if idx not in self.op.disks:
4799         continue
4800       info("checking disk/%d consistency on %s" % (idx, oth_node))
4801       if not _CheckDiskConsistency(self, dev, oth_node,
4802                                    oth_node==instance.primary_node):
4803         raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
4804                                  " to replace disks on this node (%s)" %
4805                                  (oth_node, tgt_node))
4806
4807     # Step: create new storage
4808     self.proc.LogStep(3, steps_total, "allocate new storage")
4809     for idx, dev in enumerate(instance.disks):
4810       if idx not in self.op.disks:
4811         continue
4812       size = dev.size
4813       cfg.SetDiskID(dev, tgt_node)
4814       lv_names = [".disk%d_%s" % (idx, suf)
4815                   for suf in ["data", "meta"]]
4816       names = _GenerateUniqueNames(self, lv_names)
4817       lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4818                              logical_id=(vgname, names[0]))
4819       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4820                              logical_id=(vgname, names[1]))
4821       new_lvs = [lv_data, lv_meta]
4822       old_lvs = dev.children
4823       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
4824       info("creating new local storage on %s for %s" %
4825            (tgt_node, dev.iv_name))
4826       # since we *always* want to create this LV, we use the
4827       # _Create...OnPrimary (which forces the creation), even if we
4828       # are talking about the secondary node
4829       for new_lv in new_lvs:
4830         if not _CreateBlockDevOnPrimary(self, tgt_node, instance, new_lv,
4831                                         _GetInstanceInfoText(instance)):
4832           raise errors.OpExecError("Failed to create new LV named '%s' on"
4833                                    " node '%s'" %
4834                                    (new_lv.logical_id[1], tgt_node))
4835
4836     # Step: for each lv, detach+rename*2+attach
4837     self.proc.LogStep(4, steps_total, "change drbd configuration")
4838     for dev, old_lvs, new_lvs in iv_names.itervalues():
4839       info("detaching %s drbd from local storage" % dev.iv_name)
4840       result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
4841       result.Raise()
4842       if not result.data:
4843         raise errors.OpExecError("Can't detach drbd from local storage on node"
4844                                  " %s for device %s" % (tgt_node, dev.iv_name))
4845       #dev.children = []
4846       #cfg.Update(instance)
4847
4848       # ok, we created the new LVs, so now we know we have the needed
4849       # storage; as such, we proceed on the target node to rename
4850       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
4851       # using the assumption that logical_id == physical_id (which in
4852       # turn is the unique_id on that node)
4853
4854       # FIXME(iustin): use a better name for the replaced LVs
4855       temp_suffix = int(time.time())
4856       ren_fn = lambda d, suff: (d.physical_id[0],
4857                                 d.physical_id[1] + "_replaced-%s" % suff)
4858       # build the rename list based on what LVs exist on the node
4859       rlist = []
4860       for to_ren in old_lvs:
4861         find_res = self.rpc.call_blockdev_find(tgt_node, to_ren)
4862         if not find_res.failed and find_res.data is not None: # device exists
4863           rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
4864
4865       info("renaming the old LVs on the target node")
4866       result = self.rpc.call_blockdev_rename(tgt_node, rlist)
4867       result.Raise()
4868       if not result.data:
4869         raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
4870       # now we rename the new LVs to the old LVs
4871       info("renaming the new LVs on the target node")
4872       rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
4873       result = self.rpc.call_blockdev_rename(tgt_node, rlist)
4874       result.Raise()
4875       if not result.data:
4876         raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
4877
4878       for old, new in zip(old_lvs, new_lvs):
4879         new.logical_id = old.logical_id
4880         cfg.SetDiskID(new, tgt_node)
4881
4882       for disk in old_lvs:
4883         disk.logical_id = ren_fn(disk, temp_suffix)
4884         cfg.SetDiskID(disk, tgt_node)
4885
4886       # now that the new lvs have the old name, we can add them to the device
4887       info("adding new mirror component on %s" % tgt_node)
4888       result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
4889       if result.failed or not result.data:
4890         for new_lv in new_lvs:
4891           result = self.rpc.call_blockdev_remove(tgt_node, new_lv)
4892           if result.failed or not result.data:
4893             warning("Can't rollback device %s", hint="manually cleanup unused"
4894                     " logical volumes")
4895         raise errors.OpExecError("Can't add local storage to drbd")
4896
4897       dev.children = new_lvs
4898       cfg.Update(instance)
4899
4900     # Step: wait for sync
4901
4902     # this can fail as the old devices are degraded and _WaitForSync
4903     # does a combined result over all disks, so we don't check its
4904     # return value
4905     self.proc.LogStep(5, steps_total, "sync devices")
4906     _WaitForSync(self, instance, unlock=True)
4907
4908     # so check manually all the devices
4909     for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
4910       cfg.SetDiskID(dev, instance.primary_node)
4911       result = self.rpc.call_blockdev_find(instance.primary_node, dev)
4912       if result.failed or result.data[5]:
4913         raise errors.OpExecError("DRBD device %s is degraded!" % name)
4914
4915     # Step: remove old storage
4916     self.proc.LogStep(6, steps_total, "removing old storage")
4917     for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
4918       info("remove logical volumes for %s" % name)
4919       for lv in old_lvs:
4920         cfg.SetDiskID(lv, tgt_node)
4921         result = self.rpc.call_blockdev_remove(tgt_node, lv)
4922         if result.failed or not result.data:
4923           warning("Can't remove old LV", hint="manually remove unused LVs")
4924           continue
4925
4926   def _ExecD8Secondary(self, feedback_fn):
4927     """Replace the secondary node for drbd8.
4928
4929     The algorithm for replace is quite complicated:
4930       - for all disks of the instance:
4931         - create new LVs on the new node with same names
4932         - shutdown the drbd device on the old secondary
4933         - disconnect the drbd network on the primary
4934         - create the drbd device on the new secondary
4935         - network attach the drbd on the primary, using an artifice:
4936           the drbd code for Attach() will connect to the network if it
4937           finds a device which is connected to the good local disks but
4938           not network enabled
4939       - wait for sync across all devices
4940       - remove all disks from the old secondary
4941
4942     Failures are not very well handled.
4943
4944     """
4945     steps_total = 6
4946     warning, info = (self.proc.LogWarning, self.proc.LogInfo)
4947     instance = self.instance
4948     iv_names = {}
4949     # start of work
4950     cfg = self.cfg
4951     old_node = self.tgt_node
4952     new_node = self.new_node
4953     pri_node = instance.primary_node
4954     nodes_ip = {
4955       old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
4956       new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
4957       pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
4958       }
4959
4960     # Step: check device activation
4961     self.proc.LogStep(1, steps_total, "check device existence")
4962     info("checking volume groups")
4963     my_vg = cfg.GetVGName()
4964     results = self.rpc.call_vg_list([pri_node, new_node])
4965     for node in pri_node, new_node:
4966       res = results[node]
4967       if res.failed or not res.data or my_vg not in res.data:
4968         raise errors.OpExecError("Volume group '%s' not found on %s" %
4969                                  (my_vg, node))
4970     for idx, dev in enumerate(instance.disks):
4971       if idx not in self.op.disks:
4972         continue
4973       info("checking disk/%d on %s" % (idx, pri_node))
4974       cfg.SetDiskID(dev, pri_node)
4975       result = self.rpc.call_blockdev_find(pri_node, dev)
4976       result.Raise()
4977       if not result.data:
4978         raise errors.OpExecError("Can't find disk/%d on node %s" %
4979                                  (idx, pri_node))
4980
4981     # Step: check other node consistency
4982     self.proc.LogStep(2, steps_total, "check peer consistency")
4983     for idx, dev in enumerate(instance.disks):
4984       if idx not in self.op.disks:
4985         continue
4986       info("checking disk/%d consistency on %s" % (idx, pri_node))
4987       if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
4988         raise errors.OpExecError("Primary node (%s) has degraded storage,"
4989                                  " unsafe to replace the secondary" %
4990                                  pri_node)
4991
4992     # Step: create new storage
4993     self.proc.LogStep(3, steps_total, "allocate new storage")
4994     for idx, dev in enumerate(instance.disks):
4995       info("adding new local storage on %s for disk/%d" %
4996            (new_node, idx))
4997       # since we *always* want to create this LV, we use the
4998       # _Create...OnPrimary (which forces the creation), even if we
4999       # are talking about the secondary node
5000       for new_lv in dev.children:
5001         if not _CreateBlockDevOnPrimary(self, new_node, instance, new_lv,
5002                                         _GetInstanceInfoText(instance)):
5003           raise errors.OpExecError("Failed to create new LV named '%s' on"
5004                                    " node '%s'" %
5005                                    (new_lv.logical_id[1], new_node))
5006
5007     # Step 4: dbrd minors and drbd setups changes
5008     # after this, we must manually remove the drbd minors on both the
5009     # error and the success paths
5010     minors = cfg.AllocateDRBDMinor([new_node for dev in instance.disks],
5011                                    instance.name)
5012     logging.debug("Allocated minors %s" % (minors,))
5013     self.proc.LogStep(4, steps_total, "changing drbd configuration")
5014     for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
5015       size = dev.size
5016       info("activating a new drbd on %s for disk/%d" % (new_node, idx))
5017       # create new devices on new_node; note that we create two IDs:
5018       # one without port, so the drbd will be activated without
5019       # networking information on the new node at this stage, and one
5020       # with network, for the latter activation in step 4
5021       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5022       if pri_node == o_node1:
5023         p_minor = o_minor1
5024       else:
5025         p_minor = o_minor2
5026
5027       new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
5028       new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
5029
5030       iv_names[idx] = (dev, dev.children, new_net_id)
5031       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5032                     new_net_id)
5033       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5034                               logical_id=new_alone_id,
5035                               children=dev.children)
5036       if not _CreateBlockDevOnSecondary(self, new_node, instance,
5037                                         new_drbd, False,
5038                                         _GetInstanceInfoText(instance)):
5039         self.cfg.ReleaseDRBDMinors(instance.name)
5040         raise errors.OpExecError("Failed to create new DRBD on"
5041                                  " node '%s'" % new_node)
5042
5043     for idx, dev in enumerate(instance.disks):
5044       # we have new devices, shutdown the drbd on the old secondary
5045       info("shutting down drbd for disk/%d on old node" % idx)
5046       cfg.SetDiskID(dev, old_node)
5047       result = self.rpc.call_blockdev_shutdown(old_node, dev)
5048       if result.failed or not result.data:
5049         warning("Failed to shutdown drbd for disk/%d on old node" % idx,
5050                 hint="Please cleanup this device manually as soon as possible")
5051
5052     info("detaching primary drbds from the network (=> standalone)")
5053     result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
5054                                                instance.disks)[pri_node]
5055
5056     msg = result.RemoteFailMsg()
5057     if msg:
5058       # detaches didn't succeed (unlikely)
5059       self.cfg.ReleaseDRBDMinors(instance.name)
5060       raise errors.OpExecError("Can't detach the disks from the network on"
5061                                " old node: %s" % (msg,))
5062
5063     # if we managed to detach at least one, we update all the disks of
5064     # the instance to point to the new secondary
5065     info("updating instance configuration")
5066     for dev, _, new_logical_id in iv_names.itervalues():
5067       dev.logical_id = new_logical_id
5068       cfg.SetDiskID(dev, pri_node)
5069     cfg.Update(instance)
5070     # we can remove now the temp minors as now the new values are
5071     # written to the config file (and therefore stable)
5072     self.cfg.ReleaseDRBDMinors(instance.name)
5073
5074     # and now perform the drbd attach
5075     info("attaching primary drbds to new secondary (standalone => connected)")
5076     result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
5077                                            instance.disks, instance.name,
5078                                            False)
5079     for to_node, to_result in result.items():
5080       msg = to_result.RemoteFailMsg()
5081       if msg:
5082         warning("can't attach drbd disks on node %s: %s", to_node, msg,
5083                 hint="please do a gnt-instance info to see the"
5084                 " status of disks")
5085
5086     # this can fail as the old devices are degraded and _WaitForSync
5087     # does a combined result over all disks, so we don't check its
5088     # return value
5089     self.proc.LogStep(5, steps_total, "sync devices")
5090     _WaitForSync(self, instance, unlock=True)
5091
5092     # so check manually all the devices
5093     for idx, (dev, old_lvs, _) in iv_names.iteritems():
5094       cfg.SetDiskID(dev, pri_node)
5095       result = self.rpc.call_blockdev_find(pri_node, dev)
5096       result.Raise()
5097       if result.data[5]:
5098         raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
5099
5100     self.proc.LogStep(6, steps_total, "removing old storage")
5101     for idx, (dev, old_lvs, _) in iv_names.iteritems():
5102       info("remove logical volumes for disk/%d" % idx)
5103       for lv in old_lvs:
5104         cfg.SetDiskID(lv, old_node)
5105         result = self.rpc.call_blockdev_remove(old_node, lv)
5106         if result.failed or not result.data:
5107           warning("Can't remove LV on old secondary",
5108                   hint="Cleanup stale volumes by hand")
5109
5110   def Exec(self, feedback_fn):
5111     """Execute disk replacement.
5112
5113     This dispatches the disk replacement to the appropriate handler.
5114
5115     """
5116     instance = self.instance
5117
5118     # Activate the instance disks if we're replacing them on a down instance
5119     if instance.status == "down":
5120       _StartInstanceDisks(self, instance, True)
5121
5122     if self.op.mode == constants.REPLACE_DISK_CHG:
5123       fn = self._ExecD8Secondary
5124     else:
5125       fn = self._ExecD8DiskOnly
5126
5127     ret = fn(feedback_fn)
5128
5129     # Deactivate the instance disks if we're replacing them on a down instance
5130     if instance.status == "down":
5131       _SafeShutdownInstanceDisks(self, instance)
5132
5133     return ret
5134
5135
5136 class LUGrowDisk(LogicalUnit):
5137   """Grow a disk of an instance.
5138
5139   """
5140   HPATH = "disk-grow"
5141   HTYPE = constants.HTYPE_INSTANCE
5142   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5143   REQ_BGL = False
5144
5145   def ExpandNames(self):
5146     self._ExpandAndLockInstance()
5147     self.needed_locks[locking.LEVEL_NODE] = []
5148     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5149
5150   def DeclareLocks(self, level):
5151     if level == locking.LEVEL_NODE:
5152       self._LockInstancesNodes()
5153
5154   def BuildHooksEnv(self):
5155     """Build hooks env.
5156
5157     This runs on the master, the primary and all the secondaries.
5158
5159     """
5160     env = {
5161       "DISK": self.op.disk,
5162       "AMOUNT": self.op.amount,
5163       }
5164     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5165     nl = [
5166       self.cfg.GetMasterNode(),
5167       self.instance.primary_node,
5168       ]
5169     return env, nl, nl
5170
5171   def CheckPrereq(self):
5172     """Check prerequisites.
5173
5174     This checks that the instance is in the cluster.
5175
5176     """
5177     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5178     assert instance is not None, \
5179       "Cannot retrieve locked instance %s" % self.op.instance_name
5180     _CheckNodeOnline(self, instance.primary_node)
5181     for node in instance.secondary_nodes:
5182       _CheckNodeOnline(self, node)
5183
5184
5185     self.instance = instance
5186
5187     if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5188       raise errors.OpPrereqError("Instance's disk layout does not support"
5189                                  " growing.")
5190
5191     self.disk = instance.FindDisk(self.op.disk)
5192
5193     nodenames = [instance.primary_node] + list(instance.secondary_nodes)
5194     nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5195                                        instance.hypervisor)
5196     for node in nodenames:
5197       info = nodeinfo[node]
5198       if info.failed or not info.data:
5199         raise errors.OpPrereqError("Cannot get current information"
5200                                    " from node '%s'" % node)
5201       vg_free = info.data.get('vg_free', None)
5202       if not isinstance(vg_free, int):
5203         raise errors.OpPrereqError("Can't compute free disk space on"
5204                                    " node %s" % node)
5205       if self.op.amount > vg_free:
5206         raise errors.OpPrereqError("Not enough disk space on target node %s:"
5207                                    " %d MiB available, %d MiB required" %
5208                                    (node, vg_free, self.op.amount))
5209
5210   def Exec(self, feedback_fn):
5211     """Execute disk grow.
5212
5213     """
5214     instance = self.instance
5215     disk = self.disk
5216     for node in (instance.secondary_nodes + (instance.primary_node,)):
5217       self.cfg.SetDiskID(disk, node)
5218       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5219       result.Raise()
5220       if (not result.data or not isinstance(result.data, (list, tuple)) or
5221           len(result.data) != 2):
5222         raise errors.OpExecError("Grow request failed to node %s" % node)
5223       elif not result.data[0]:
5224         raise errors.OpExecError("Grow request failed to node %s: %s" %
5225                                  (node, result.data[1]))
5226     disk.RecordGrow(self.op.amount)
5227     self.cfg.Update(instance)
5228     if self.op.wait_for_sync:
5229       disk_abort = not _WaitForSync(self, instance)
5230       if disk_abort:
5231         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5232                              " status.\nPlease check the instance.")
5233
5234
5235 class LUQueryInstanceData(NoHooksLU):
5236   """Query runtime instance data.
5237
5238   """
5239   _OP_REQP = ["instances", "static"]
5240   REQ_BGL = False
5241
5242   def ExpandNames(self):
5243     self.needed_locks = {}
5244     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
5245
5246     if not isinstance(self.op.instances, list):
5247       raise errors.OpPrereqError("Invalid argument type 'instances'")
5248
5249     if self.op.instances:
5250       self.wanted_names = []
5251       for name in self.op.instances:
5252         full_name = self.cfg.ExpandInstanceName(name)
5253         if full_name is None:
5254           raise errors.OpPrereqError("Instance '%s' not known" % name)
5255         self.wanted_names.append(full_name)
5256       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
5257     else:
5258       self.wanted_names = None
5259       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5260
5261     self.needed_locks[locking.LEVEL_NODE] = []
5262     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5263
5264   def DeclareLocks(self, level):
5265     if level == locking.LEVEL_NODE:
5266       self._LockInstancesNodes()
5267
5268   def CheckPrereq(self):
5269     """Check prerequisites.
5270
5271     This only checks the optional instance list against the existing names.
5272
5273     """
5274     if self.wanted_names is None:
5275       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5276
5277     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
5278                              in self.wanted_names]
5279     return
5280
5281   def _ComputeDiskStatus(self, instance, snode, dev):
5282     """Compute block device status.
5283
5284     """
5285     static = self.op.static
5286     if not static:
5287       self.cfg.SetDiskID(dev, instance.primary_node)
5288       dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
5289       dev_pstatus.Raise()
5290       dev_pstatus = dev_pstatus.data
5291     else:
5292       dev_pstatus = None
5293
5294     if dev.dev_type in constants.LDS_DRBD:
5295       # we change the snode then (otherwise we use the one passed in)
5296       if dev.logical_id[0] == instance.primary_node:
5297         snode = dev.logical_id[1]
5298       else:
5299         snode = dev.logical_id[0]
5300
5301     if snode and not static:
5302       self.cfg.SetDiskID(dev, snode)
5303       dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
5304       dev_sstatus.Raise()
5305       dev_sstatus = dev_sstatus.data
5306     else:
5307       dev_sstatus = None
5308
5309     if dev.children:
5310       dev_children = [self._ComputeDiskStatus(instance, snode, child)
5311                       for child in dev.children]
5312     else:
5313       dev_children = []
5314
5315     data = {
5316       "iv_name": dev.iv_name,
5317       "dev_type": dev.dev_type,
5318       "logical_id": dev.logical_id,
5319       "physical_id": dev.physical_id,
5320       "pstatus": dev_pstatus,
5321       "sstatus": dev_sstatus,
5322       "children": dev_children,
5323       "mode": dev.mode,
5324       }
5325
5326     return data
5327
5328   def Exec(self, feedback_fn):
5329     """Gather and return data"""
5330     result = {}
5331
5332     cluster = self.cfg.GetClusterInfo()
5333
5334     for instance in self.wanted_instances:
5335       if not self.op.static:
5336         remote_info = self.rpc.call_instance_info(instance.primary_node,
5337                                                   instance.name,
5338                                                   instance.hypervisor)
5339         remote_info.Raise()
5340         remote_info = remote_info.data
5341         if remote_info and "state" in remote_info:
5342           remote_state = "up"
5343         else:
5344           remote_state = "down"
5345       else:
5346         remote_state = None
5347       if instance.status == "down":
5348         config_state = "down"
5349       else:
5350         config_state = "up"
5351
5352       disks = [self._ComputeDiskStatus(instance, None, device)
5353                for device in instance.disks]
5354
5355       idict = {
5356         "name": instance.name,
5357         "config_state": config_state,
5358         "run_state": remote_state,
5359         "pnode": instance.primary_node,
5360         "snodes": instance.secondary_nodes,
5361         "os": instance.os,
5362         "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
5363         "disks": disks,
5364         "hypervisor": instance.hypervisor,
5365         "network_port": instance.network_port,
5366         "hv_instance": instance.hvparams,
5367         "hv_actual": cluster.FillHV(instance),
5368         "be_instance": instance.beparams,
5369         "be_actual": cluster.FillBE(instance),
5370         }
5371
5372       result[instance.name] = idict
5373
5374     return result
5375
5376
5377 class LUSetInstanceParams(LogicalUnit):
5378   """Modifies an instances's parameters.
5379
5380   """
5381   HPATH = "instance-modify"
5382   HTYPE = constants.HTYPE_INSTANCE
5383   _OP_REQP = ["instance_name"]
5384   REQ_BGL = False
5385
5386   def CheckArguments(self):
5387     if not hasattr(self.op, 'nics'):
5388       self.op.nics = []
5389     if not hasattr(self.op, 'disks'):
5390       self.op.disks = []
5391     if not hasattr(self.op, 'beparams'):
5392       self.op.beparams = {}
5393     if not hasattr(self.op, 'hvparams'):
5394       self.op.hvparams = {}
5395     self.op.force = getattr(self.op, "force", False)
5396     if not (self.op.nics or self.op.disks or
5397             self.op.hvparams or self.op.beparams):
5398       raise errors.OpPrereqError("No changes submitted")
5399
5400     utils.CheckBEParams(self.op.beparams)
5401
5402     # Disk validation
5403     disk_addremove = 0
5404     for disk_op, disk_dict in self.op.disks:
5405       if disk_op == constants.DDM_REMOVE:
5406         disk_addremove += 1
5407         continue
5408       elif disk_op == constants.DDM_ADD:
5409         disk_addremove += 1
5410       else:
5411         if not isinstance(disk_op, int):
5412           raise errors.OpPrereqError("Invalid disk index")
5413       if disk_op == constants.DDM_ADD:
5414         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
5415         if mode not in (constants.DISK_RDONLY, constants.DISK_RDWR):
5416           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
5417         size = disk_dict.get('size', None)
5418         if size is None:
5419           raise errors.OpPrereqError("Required disk parameter size missing")
5420         try:
5421           size = int(size)
5422         except ValueError, err:
5423           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
5424                                      str(err))
5425         disk_dict['size'] = size
5426       else:
5427         # modification of disk
5428         if 'size' in disk_dict:
5429           raise errors.OpPrereqError("Disk size change not possible, use"
5430                                      " grow-disk")
5431
5432     if disk_addremove > 1:
5433       raise errors.OpPrereqError("Only one disk add or remove operation"
5434                                  " supported at a time")
5435
5436     # NIC validation
5437     nic_addremove = 0
5438     for nic_op, nic_dict in self.op.nics:
5439       if nic_op == constants.DDM_REMOVE:
5440         nic_addremove += 1
5441         continue
5442       elif nic_op == constants.DDM_ADD:
5443         nic_addremove += 1
5444       else:
5445         if not isinstance(nic_op, int):
5446           raise errors.OpPrereqError("Invalid nic index")
5447
5448       # nic_dict should be a dict
5449       nic_ip = nic_dict.get('ip', None)
5450       if nic_ip is not None:
5451         if nic_ip.lower() == "none":
5452           nic_dict['ip'] = None
5453         else:
5454           if not utils.IsValidIP(nic_ip):
5455             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
5456       # we can only check None bridges and assign the default one
5457       nic_bridge = nic_dict.get('bridge', None)
5458       if nic_bridge is None:
5459         nic_dict['bridge'] = self.cfg.GetDefBridge()
5460       # but we can validate MACs
5461       nic_mac = nic_dict.get('mac', None)
5462       if nic_mac is not None:
5463         if self.cfg.IsMacInUse(nic_mac):
5464           raise errors.OpPrereqError("MAC address %s already in use"
5465                                      " in cluster" % nic_mac)
5466         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5467           if not utils.IsValidMac(nic_mac):
5468             raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
5469     if nic_addremove > 1:
5470       raise errors.OpPrereqError("Only one NIC add or remove operation"
5471                                  " supported at a time")
5472
5473   def ExpandNames(self):
5474     self._ExpandAndLockInstance()
5475     self.needed_locks[locking.LEVEL_NODE] = []
5476     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5477
5478   def DeclareLocks(self, level):
5479     if level == locking.LEVEL_NODE:
5480       self._LockInstancesNodes()
5481
5482   def BuildHooksEnv(self):
5483     """Build hooks env.
5484
5485     This runs on the master, primary and secondaries.
5486
5487     """
5488     args = dict()
5489     if constants.BE_MEMORY in self.be_new:
5490       args['memory'] = self.be_new[constants.BE_MEMORY]
5491     if constants.BE_VCPUS in self.be_new:
5492       args['vcpus'] = self.be_new[constants.BE_VCPUS]
5493     # FIXME: readd disk/nic changes
5494     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
5495     nl = [self.cfg.GetMasterNode(),
5496           self.instance.primary_node] + list(self.instance.secondary_nodes)
5497     return env, nl, nl
5498
5499   def CheckPrereq(self):
5500     """Check prerequisites.
5501
5502     This only checks the instance list against the existing names.
5503
5504     """
5505     force = self.force = self.op.force
5506
5507     # checking the new params on the primary/secondary nodes
5508
5509     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5510     assert self.instance is not None, \
5511       "Cannot retrieve locked instance %s" % self.op.instance_name
5512     pnode = self.instance.primary_node
5513     nodelist = [pnode]
5514     nodelist.extend(instance.secondary_nodes)
5515
5516     # hvparams processing
5517     if self.op.hvparams:
5518       i_hvdict = copy.deepcopy(instance.hvparams)
5519       for key, val in self.op.hvparams.iteritems():
5520         if val == constants.VALUE_DEFAULT:
5521           try:
5522             del i_hvdict[key]
5523           except KeyError:
5524             pass
5525         elif val == constants.VALUE_NONE:
5526           i_hvdict[key] = None
5527         else:
5528           i_hvdict[key] = val
5529       cluster = self.cfg.GetClusterInfo()
5530       hv_new = cluster.FillDict(cluster.hvparams[instance.hypervisor],
5531                                 i_hvdict)
5532       # local check
5533       hypervisor.GetHypervisor(
5534         instance.hypervisor).CheckParameterSyntax(hv_new)
5535       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
5536       self.hv_new = hv_new # the new actual values
5537       self.hv_inst = i_hvdict # the new dict (without defaults)
5538     else:
5539       self.hv_new = self.hv_inst = {}
5540
5541     # beparams processing
5542     if self.op.beparams:
5543       i_bedict = copy.deepcopy(instance.beparams)
5544       for key, val in self.op.beparams.iteritems():
5545         if val == constants.VALUE_DEFAULT:
5546           try:
5547             del i_bedict[key]
5548           except KeyError:
5549             pass
5550         else:
5551           i_bedict[key] = val
5552       cluster = self.cfg.GetClusterInfo()
5553       be_new = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
5554                                 i_bedict)
5555       self.be_new = be_new # the new actual values
5556       self.be_inst = i_bedict # the new dict (without defaults)
5557     else:
5558       self.be_new = self.be_inst = {}
5559
5560     self.warn = []
5561
5562     if constants.BE_MEMORY in self.op.beparams and not self.force:
5563       mem_check_list = [pnode]
5564       if be_new[constants.BE_AUTO_BALANCE]:
5565         # either we changed auto_balance to yes or it was from before
5566         mem_check_list.extend(instance.secondary_nodes)
5567       instance_info = self.rpc.call_instance_info(pnode, instance.name,
5568                                                   instance.hypervisor)
5569       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
5570                                          instance.hypervisor)
5571       if nodeinfo[pnode].failed or not isinstance(nodeinfo[pnode].data, dict):
5572         # Assume the primary node is unreachable and go ahead
5573         self.warn.append("Can't get info from primary node %s" % pnode)
5574       else:
5575         if not instance_info.failed and instance_info.data:
5576           current_mem = instance_info.data['memory']
5577         else:
5578           # Assume instance not running
5579           # (there is a slight race condition here, but it's not very probable,
5580           # and we have no other way to check)
5581           current_mem = 0
5582         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
5583                     nodeinfo[pnode].data['memory_free'])
5584         if miss_mem > 0:
5585           raise errors.OpPrereqError("This change will prevent the instance"
5586                                      " from starting, due to %d MB of memory"
5587                                      " missing on its primary node" % miss_mem)
5588
5589       if be_new[constants.BE_AUTO_BALANCE]:
5590         for node, nres in nodeinfo.iteritems():
5591           if node not in instance.secondary_nodes:
5592             continue
5593           if nres.failed or not isinstance(nres.data, dict):
5594             self.warn.append("Can't get info from secondary node %s" % node)
5595           elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
5596             self.warn.append("Not enough memory to failover instance to"
5597                              " secondary node %s" % node)
5598
5599     # NIC processing
5600     for nic_op, nic_dict in self.op.nics:
5601       if nic_op == constants.DDM_REMOVE:
5602         if not instance.nics:
5603           raise errors.OpPrereqError("Instance has no NICs, cannot remove")
5604         continue
5605       if nic_op != constants.DDM_ADD:
5606         # an existing nic
5607         if nic_op < 0 or nic_op >= len(instance.nics):
5608           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
5609                                      " are 0 to %d" %
5610                                      (nic_op, len(instance.nics)))
5611       nic_bridge = nic_dict.get('bridge', None)
5612       if nic_bridge is not None:
5613         if not self.rpc.call_bridges_exist(pnode, [nic_bridge]):
5614           msg = ("Bridge '%s' doesn't exist on one of"
5615                  " the instance nodes" % nic_bridge)
5616           if self.force:
5617             self.warn.append(msg)
5618           else:
5619             raise errors.OpPrereqError(msg)
5620
5621     # DISK processing
5622     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
5623       raise errors.OpPrereqError("Disk operations not supported for"
5624                                  " diskless instances")
5625     for disk_op, disk_dict in self.op.disks:
5626       if disk_op == constants.DDM_REMOVE:
5627         if len(instance.disks) == 1:
5628           raise errors.OpPrereqError("Cannot remove the last disk of"
5629                                      " an instance")
5630         ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
5631         ins_l = ins_l[pnode]
5632         if ins_l.failed or not isinstance(ins_l.data, list):
5633           raise errors.OpPrereqError("Can't contact node '%s'" % pnode)
5634         if instance.name in ins_l.data:
5635           raise errors.OpPrereqError("Instance is running, can't remove"
5636                                      " disks.")
5637
5638       if (disk_op == constants.DDM_ADD and
5639           len(instance.nics) >= constants.MAX_DISKS):
5640         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
5641                                    " add more" % constants.MAX_DISKS)
5642       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
5643         # an existing disk
5644         if disk_op < 0 or disk_op >= len(instance.disks):
5645           raise errors.OpPrereqError("Invalid disk index %s, valid values"
5646                                      " are 0 to %d" %
5647                                      (disk_op, len(instance.disks)))
5648
5649     return
5650
5651   def Exec(self, feedback_fn):
5652     """Modifies an instance.
5653
5654     All parameters take effect only at the next restart of the instance.
5655
5656     """
5657     # Process here the warnings from CheckPrereq, as we don't have a
5658     # feedback_fn there.
5659     for warn in self.warn:
5660       feedback_fn("WARNING: %s" % warn)
5661
5662     result = []
5663     instance = self.instance
5664     # disk changes
5665     for disk_op, disk_dict in self.op.disks:
5666       if disk_op == constants.DDM_REMOVE:
5667         # remove the last disk
5668         device = instance.disks.pop()
5669         device_idx = len(instance.disks)
5670         for node, disk in device.ComputeNodeTree(instance.primary_node):
5671           self.cfg.SetDiskID(disk, node)
5672           rpc_result = self.rpc.call_blockdev_remove(node, disk)
5673           if rpc_result.failed or not rpc_result.data:
5674             self.proc.LogWarning("Could not remove disk/%d on node %s,"
5675                                  " continuing anyway", device_idx, node)
5676         result.append(("disk/%d" % device_idx, "remove"))
5677       elif disk_op == constants.DDM_ADD:
5678         # add a new disk
5679         if instance.disk_template == constants.DT_FILE:
5680           file_driver, file_path = instance.disks[0].logical_id
5681           file_path = os.path.dirname(file_path)
5682         else:
5683           file_driver = file_path = None
5684         disk_idx_base = len(instance.disks)
5685         new_disk = _GenerateDiskTemplate(self,
5686                                          instance.disk_template,
5687                                          instance, instance.primary_node,
5688                                          instance.secondary_nodes,
5689                                          [disk_dict],
5690                                          file_path,
5691                                          file_driver,
5692                                          disk_idx_base)[0]
5693         new_disk.mode = disk_dict['mode']
5694         instance.disks.append(new_disk)
5695         info = _GetInstanceInfoText(instance)
5696
5697         logging.info("Creating volume %s for instance %s",
5698                      new_disk.iv_name, instance.name)
5699         # Note: this needs to be kept in sync with _CreateDisks
5700         #HARDCODE
5701         for secondary_node in instance.secondary_nodes:
5702           if not _CreateBlockDevOnSecondary(self, secondary_node, instance,
5703                                             new_disk, False, info):
5704             self.LogWarning("Failed to create volume %s (%s) on"
5705                             " secondary node %s!",
5706                             new_disk.iv_name, new_disk, secondary_node)
5707         #HARDCODE
5708         if not _CreateBlockDevOnPrimary(self, instance.primary_node,
5709                                         instance, new_disk, info):
5710           self.LogWarning("Failed to create volume %s on primary!",
5711                           new_disk.iv_name)
5712         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
5713                        (new_disk.size, new_disk.mode)))
5714       else:
5715         # change a given disk
5716         instance.disks[disk_op].mode = disk_dict['mode']
5717         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
5718     # NIC changes
5719     for nic_op, nic_dict in self.op.nics:
5720       if nic_op == constants.DDM_REMOVE:
5721         # remove the last nic
5722         del instance.nics[-1]
5723         result.append(("nic.%d" % len(instance.nics), "remove"))
5724       elif nic_op == constants.DDM_ADD:
5725         # add a new nic
5726         if 'mac' not in nic_dict:
5727           mac = constants.VALUE_GENERATE
5728         else:
5729           mac = nic_dict['mac']
5730         if mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5731           mac = self.cfg.GenerateMAC()
5732         new_nic = objects.NIC(mac=mac, ip=nic_dict.get('ip', None),
5733                               bridge=nic_dict.get('bridge', None))
5734         instance.nics.append(new_nic)
5735         result.append(("nic.%d" % (len(instance.nics) - 1),
5736                        "add:mac=%s,ip=%s,bridge=%s" %
5737                        (new_nic.mac, new_nic.ip, new_nic.bridge)))
5738       else:
5739         # change a given nic
5740         for key in 'mac', 'ip', 'bridge':
5741           if key in nic_dict:
5742             setattr(instance.nics[nic_op], key, nic_dict[key])
5743             result.append(("nic.%s/%d" % (key, nic_op), nic_dict[key]))
5744
5745     # hvparams changes
5746     if self.op.hvparams:
5747       instance.hvparams = self.hv_new
5748       for key, val in self.op.hvparams.iteritems():
5749         result.append(("hv/%s" % key, val))
5750
5751     # beparams changes
5752     if self.op.beparams:
5753       instance.beparams = self.be_inst
5754       for key, val in self.op.beparams.iteritems():
5755         result.append(("be/%s" % key, val))
5756
5757     self.cfg.Update(instance)
5758
5759     return result
5760
5761
5762 class LUQueryExports(NoHooksLU):
5763   """Query the exports list
5764
5765   """
5766   _OP_REQP = ['nodes']
5767   REQ_BGL = False
5768
5769   def ExpandNames(self):
5770     self.needed_locks = {}
5771     self.share_locks[locking.LEVEL_NODE] = 1
5772     if not self.op.nodes:
5773       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5774     else:
5775       self.needed_locks[locking.LEVEL_NODE] = \
5776         _GetWantedNodes(self, self.op.nodes)
5777
5778   def CheckPrereq(self):
5779     """Check prerequisites.
5780
5781     """
5782     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
5783
5784   def Exec(self, feedback_fn):
5785     """Compute the list of all the exported system images.
5786
5787     @rtype: dict
5788     @return: a dictionary with the structure node->(export-list)
5789         where export-list is a list of the instances exported on
5790         that node.
5791
5792     """
5793     rpcresult = self.rpc.call_export_list(self.nodes)
5794     result = {}
5795     for node in rpcresult:
5796       if rpcresult[node].failed:
5797         result[node] = False
5798       else:
5799         result[node] = rpcresult[node].data
5800
5801     return result
5802
5803
5804 class LUExportInstance(LogicalUnit):
5805   """Export an instance to an image in the cluster.
5806
5807   """
5808   HPATH = "instance-export"
5809   HTYPE = constants.HTYPE_INSTANCE
5810   _OP_REQP = ["instance_name", "target_node", "shutdown"]
5811   REQ_BGL = False
5812
5813   def ExpandNames(self):
5814     self._ExpandAndLockInstance()
5815     # FIXME: lock only instance primary and destination node
5816     #
5817     # Sad but true, for now we have do lock all nodes, as we don't know where
5818     # the previous export might be, and and in this LU we search for it and
5819     # remove it from its current node. In the future we could fix this by:
5820     #  - making a tasklet to search (share-lock all), then create the new one,
5821     #    then one to remove, after
5822     #  - removing the removal operation altoghether
5823     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5824
5825   def DeclareLocks(self, level):
5826     """Last minute lock declaration."""
5827     # All nodes are locked anyway, so nothing to do here.
5828
5829   def BuildHooksEnv(self):
5830     """Build hooks env.
5831
5832     This will run on the master, primary node and target node.
5833
5834     """
5835     env = {
5836       "EXPORT_NODE": self.op.target_node,
5837       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
5838       }
5839     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5840     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
5841           self.op.target_node]
5842     return env, nl, nl
5843
5844   def CheckPrereq(self):
5845     """Check prerequisites.
5846
5847     This checks that the instance and node names are valid.
5848
5849     """
5850     instance_name = self.op.instance_name
5851     self.instance = self.cfg.GetInstanceInfo(instance_name)
5852     assert self.instance is not None, \
5853           "Cannot retrieve locked instance %s" % self.op.instance_name
5854     _CheckNodeOnline(self, self.instance.primary_node)
5855
5856     self.dst_node = self.cfg.GetNodeInfo(
5857       self.cfg.ExpandNodeName(self.op.target_node))
5858
5859     if self.dst_node is None:
5860       # This is wrong node name, not a non-locked node
5861       raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
5862     _CheckNodeOnline(self, self.op.target_node)
5863
5864     # instance disk type verification
5865     for disk in self.instance.disks:
5866       if disk.dev_type == constants.LD_FILE:
5867         raise errors.OpPrereqError("Export not supported for instances with"
5868                                    " file-based disks")
5869
5870   def Exec(self, feedback_fn):
5871     """Export an instance to an image in the cluster.
5872
5873     """
5874     instance = self.instance
5875     dst_node = self.dst_node
5876     src_node = instance.primary_node
5877     if self.op.shutdown:
5878       # shutdown the instance, but not the disks
5879       result = self.rpc.call_instance_shutdown(src_node, instance)
5880       result.Raise()
5881       if not result.data:
5882         raise errors.OpExecError("Could not shutdown instance %s on node %s" %
5883                                  (instance.name, src_node))
5884
5885     vgname = self.cfg.GetVGName()
5886
5887     snap_disks = []
5888
5889     try:
5890       for disk in instance.disks:
5891         # new_dev_name will be a snapshot of an lvm leaf of the one we passed
5892         new_dev_name = self.rpc.call_blockdev_snapshot(src_node, disk)
5893         if new_dev_name.failed or not new_dev_name.data:
5894           self.LogWarning("Could not snapshot block device %s on node %s",
5895                           disk.logical_id[1], src_node)
5896           snap_disks.append(False)
5897         else:
5898           new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
5899                                  logical_id=(vgname, new_dev_name.data),
5900                                  physical_id=(vgname, new_dev_name.data),
5901                                  iv_name=disk.iv_name)
5902           snap_disks.append(new_dev)
5903
5904     finally:
5905       if self.op.shutdown and instance.status == "up":
5906         result = self.rpc.call_instance_start(src_node, instance, None)
5907         if result.failed or not result.data:
5908           _ShutdownInstanceDisks(self, instance)
5909           raise errors.OpExecError("Could not start instance")
5910
5911     # TODO: check for size
5912
5913     cluster_name = self.cfg.GetClusterName()
5914     for idx, dev in enumerate(snap_disks):
5915       if dev:
5916         result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
5917                                                instance, cluster_name, idx)
5918         if result.failed or not result.data:
5919           self.LogWarning("Could not export block device %s from node %s to"
5920                           " node %s", dev.logical_id[1], src_node,
5921                           dst_node.name)
5922         result = self.rpc.call_blockdev_remove(src_node, dev)
5923         if result.failed or not result.data:
5924           self.LogWarning("Could not remove snapshot block device %s from node"
5925                           " %s", dev.logical_id[1], src_node)
5926
5927     result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
5928     if result.failed or not result.data:
5929       self.LogWarning("Could not finalize export for instance %s on node %s",
5930                       instance.name, dst_node.name)
5931
5932     nodelist = self.cfg.GetNodeList()
5933     nodelist.remove(dst_node.name)
5934
5935     # on one-node clusters nodelist will be empty after the removal
5936     # if we proceed the backup would be removed because OpQueryExports
5937     # substitutes an empty list with the full cluster node list.
5938     if nodelist:
5939       exportlist = self.rpc.call_export_list(nodelist)
5940       for node in exportlist:
5941         if exportlist[node].failed:
5942           continue
5943         if instance.name in exportlist[node].data:
5944           if not self.rpc.call_export_remove(node, instance.name):
5945             self.LogWarning("Could not remove older export for instance %s"
5946                             " on node %s", instance.name, node)
5947
5948
5949 class LURemoveExport(NoHooksLU):
5950   """Remove exports related to the named instance.
5951
5952   """
5953   _OP_REQP = ["instance_name"]
5954   REQ_BGL = False
5955
5956   def ExpandNames(self):
5957     self.needed_locks = {}
5958     # We need all nodes to be locked in order for RemoveExport to work, but we
5959     # don't need to lock the instance itself, as nothing will happen to it (and
5960     # we can remove exports also for a removed instance)
5961     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5962
5963   def CheckPrereq(self):
5964     """Check prerequisites.
5965     """
5966     pass
5967
5968   def Exec(self, feedback_fn):
5969     """Remove any export.
5970
5971     """
5972     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
5973     # If the instance was not found we'll try with the name that was passed in.
5974     # This will only work if it was an FQDN, though.
5975     fqdn_warn = False
5976     if not instance_name:
5977       fqdn_warn = True
5978       instance_name = self.op.instance_name
5979
5980     exportlist = self.rpc.call_export_list(self.acquired_locks[
5981       locking.LEVEL_NODE])
5982     found = False
5983     for node in exportlist:
5984       if exportlist[node].failed:
5985         self.LogWarning("Failed to query node %s, continuing" % node)
5986         continue
5987       if instance_name in exportlist[node].data:
5988         found = True
5989         result = self.rpc.call_export_remove(node, instance_name)
5990         if result.failed or not result.data:
5991           logging.error("Could not remove export for instance %s"
5992                         " on node %s", instance_name, node)
5993
5994     if fqdn_warn and not found:
5995       feedback_fn("Export not found. If trying to remove an export belonging"
5996                   " to a deleted instance please use its Fully Qualified"
5997                   " Domain Name.")
5998
5999
6000 class TagsLU(NoHooksLU):
6001   """Generic tags LU.
6002
6003   This is an abstract class which is the parent of all the other tags LUs.
6004
6005   """
6006
6007   def ExpandNames(self):
6008     self.needed_locks = {}
6009     if self.op.kind == constants.TAG_NODE:
6010       name = self.cfg.ExpandNodeName(self.op.name)
6011       if name is None:
6012         raise errors.OpPrereqError("Invalid node name (%s)" %
6013                                    (self.op.name,))
6014       self.op.name = name
6015       self.needed_locks[locking.LEVEL_NODE] = name
6016     elif self.op.kind == constants.TAG_INSTANCE:
6017       name = self.cfg.ExpandInstanceName(self.op.name)
6018       if name is None:
6019         raise errors.OpPrereqError("Invalid instance name (%s)" %
6020                                    (self.op.name,))
6021       self.op.name = name
6022       self.needed_locks[locking.LEVEL_INSTANCE] = name
6023
6024   def CheckPrereq(self):
6025     """Check prerequisites.
6026
6027     """
6028     if self.op.kind == constants.TAG_CLUSTER:
6029       self.target = self.cfg.GetClusterInfo()
6030     elif self.op.kind == constants.TAG_NODE:
6031       self.target = self.cfg.GetNodeInfo(self.op.name)
6032     elif self.op.kind == constants.TAG_INSTANCE:
6033       self.target = self.cfg.GetInstanceInfo(self.op.name)
6034     else:
6035       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6036                                  str(self.op.kind))
6037
6038
6039 class LUGetTags(TagsLU):
6040   """Returns the tags of a given object.
6041
6042   """
6043   _OP_REQP = ["kind", "name"]
6044   REQ_BGL = False
6045
6046   def Exec(self, feedback_fn):
6047     """Returns the tag list.
6048
6049     """
6050     return list(self.target.GetTags())
6051
6052
6053 class LUSearchTags(NoHooksLU):
6054   """Searches the tags for a given pattern.
6055
6056   """
6057   _OP_REQP = ["pattern"]
6058   REQ_BGL = False
6059
6060   def ExpandNames(self):
6061     self.needed_locks = {}
6062
6063   def CheckPrereq(self):
6064     """Check prerequisites.
6065
6066     This checks the pattern passed for validity by compiling it.
6067
6068     """
6069     try:
6070       self.re = re.compile(self.op.pattern)
6071     except re.error, err:
6072       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6073                                  (self.op.pattern, err))
6074
6075   def Exec(self, feedback_fn):
6076     """Returns the tag list.
6077
6078     """
6079     cfg = self.cfg
6080     tgts = [("/cluster", cfg.GetClusterInfo())]
6081     ilist = cfg.GetAllInstancesInfo().values()
6082     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6083     nlist = cfg.GetAllNodesInfo().values()
6084     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6085     results = []
6086     for path, target in tgts:
6087       for tag in target.GetTags():
6088         if self.re.search(tag):
6089           results.append((path, tag))
6090     return results
6091
6092
6093 class LUAddTags(TagsLU):
6094   """Sets a tag on a given object.
6095
6096   """
6097   _OP_REQP = ["kind", "name", "tags"]
6098   REQ_BGL = False
6099
6100   def CheckPrereq(self):
6101     """Check prerequisites.
6102
6103     This checks the type and length of the tag name and value.
6104
6105     """
6106     TagsLU.CheckPrereq(self)
6107     for tag in self.op.tags:
6108       objects.TaggableObject.ValidateTag(tag)
6109
6110   def Exec(self, feedback_fn):
6111     """Sets the tag.
6112
6113     """
6114     try:
6115       for tag in self.op.tags:
6116         self.target.AddTag(tag)
6117     except errors.TagError, err:
6118       raise errors.OpExecError("Error while setting tag: %s" % str(err))
6119     try:
6120       self.cfg.Update(self.target)
6121     except errors.ConfigurationError:
6122       raise errors.OpRetryError("There has been a modification to the"
6123                                 " config file and the operation has been"
6124                                 " aborted. Please retry.")
6125
6126
6127 class LUDelTags(TagsLU):
6128   """Delete a list of tags from a given object.
6129
6130   """
6131   _OP_REQP = ["kind", "name", "tags"]
6132   REQ_BGL = False
6133
6134   def CheckPrereq(self):
6135     """Check prerequisites.
6136
6137     This checks that we have the given tag.
6138
6139     """
6140     TagsLU.CheckPrereq(self)
6141     for tag in self.op.tags:
6142       objects.TaggableObject.ValidateTag(tag)
6143     del_tags = frozenset(self.op.tags)
6144     cur_tags = self.target.GetTags()
6145     if not del_tags <= cur_tags:
6146       diff_tags = del_tags - cur_tags
6147       diff_names = ["'%s'" % tag for tag in diff_tags]
6148       diff_names.sort()
6149       raise errors.OpPrereqError("Tag(s) %s not found" %
6150                                  (",".join(diff_names)))
6151
6152   def Exec(self, feedback_fn):
6153     """Remove the tag from the object.
6154
6155     """
6156     for tag in self.op.tags:
6157       self.target.RemoveTag(tag)
6158     try:
6159       self.cfg.Update(self.target)
6160     except errors.ConfigurationError:
6161       raise errors.OpRetryError("There has been a modification to the"
6162                                 " config file and the operation has been"
6163                                 " aborted. Please retry.")
6164
6165
6166 class LUTestDelay(NoHooksLU):
6167   """Sleep for a specified amount of time.
6168
6169   This LU sleeps on the master and/or nodes for a specified amount of
6170   time.
6171
6172   """
6173   _OP_REQP = ["duration", "on_master", "on_nodes"]
6174   REQ_BGL = False
6175
6176   def ExpandNames(self):
6177     """Expand names and set required locks.
6178
6179     This expands the node list, if any.
6180
6181     """
6182     self.needed_locks = {}
6183     if self.op.on_nodes:
6184       # _GetWantedNodes can be used here, but is not always appropriate to use
6185       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
6186       # more information.
6187       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
6188       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
6189
6190   def CheckPrereq(self):
6191     """Check prerequisites.
6192
6193     """
6194
6195   def Exec(self, feedback_fn):
6196     """Do the actual sleep.
6197
6198     """
6199     if self.op.on_master:
6200       if not utils.TestDelay(self.op.duration):
6201         raise errors.OpExecError("Error during master delay test")
6202     if self.op.on_nodes:
6203       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
6204       if not result:
6205         raise errors.OpExecError("Complete failure from rpc call")
6206       for node, node_result in result.items():
6207         node_result.Raise()
6208         if not node_result.data:
6209           raise errors.OpExecError("Failure during rpc call to node %s,"
6210                                    " result: %s" % (node, node_result.data))
6211
6212
6213 class IAllocator(object):
6214   """IAllocator framework.
6215
6216   An IAllocator instance has three sets of attributes:
6217     - cfg that is needed to query the cluster
6218     - input data (all members of the _KEYS class attribute are required)
6219     - four buffer attributes (in|out_data|text), that represent the
6220       input (to the external script) in text and data structure format,
6221       and the output from it, again in two formats
6222     - the result variables from the script (success, info, nodes) for
6223       easy usage
6224
6225   """
6226   _ALLO_KEYS = [
6227     "mem_size", "disks", "disk_template",
6228     "os", "tags", "nics", "vcpus", "hypervisor",
6229     ]
6230   _RELO_KEYS = [
6231     "relocate_from",
6232     ]
6233
6234   def __init__(self, lu, mode, name, **kwargs):
6235     self.lu = lu
6236     # init buffer variables
6237     self.in_text = self.out_text = self.in_data = self.out_data = None
6238     # init all input fields so that pylint is happy
6239     self.mode = mode
6240     self.name = name
6241     self.mem_size = self.disks = self.disk_template = None
6242     self.os = self.tags = self.nics = self.vcpus = None
6243     self.hypervisor = None
6244     self.relocate_from = None
6245     # computed fields
6246     self.required_nodes = None
6247     # init result fields
6248     self.success = self.info = self.nodes = None
6249     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6250       keyset = self._ALLO_KEYS
6251     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6252       keyset = self._RELO_KEYS
6253     else:
6254       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
6255                                    " IAllocator" % self.mode)
6256     for key in kwargs:
6257       if key not in keyset:
6258         raise errors.ProgrammerError("Invalid input parameter '%s' to"
6259                                      " IAllocator" % key)
6260       setattr(self, key, kwargs[key])
6261     for key in keyset:
6262       if key not in kwargs:
6263         raise errors.ProgrammerError("Missing input parameter '%s' to"
6264                                      " IAllocator" % key)
6265     self._BuildInputData()
6266
6267   def _ComputeClusterData(self):
6268     """Compute the generic allocator input data.
6269
6270     This is the data that is independent of the actual operation.
6271
6272     """
6273     cfg = self.lu.cfg
6274     cluster_info = cfg.GetClusterInfo()
6275     # cluster data
6276     data = {
6277       "version": 1,
6278       "cluster_name": cfg.GetClusterName(),
6279       "cluster_tags": list(cluster_info.GetTags()),
6280       "enable_hypervisors": list(cluster_info.enabled_hypervisors),
6281       # we don't have job IDs
6282       }
6283     iinfo = cfg.GetAllInstancesInfo().values()
6284     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
6285
6286     # node data
6287     node_results = {}
6288     node_list = cfg.GetNodeList()
6289
6290     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6291       hypervisor_name = self.hypervisor
6292     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6293       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
6294
6295     node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
6296                                            hypervisor_name)
6297     node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
6298                        cluster_info.enabled_hypervisors)
6299     for nname in node_list:
6300       ninfo = cfg.GetNodeInfo(nname)
6301       node_data[nname].Raise()
6302       if not isinstance(node_data[nname].data, dict):
6303         raise errors.OpExecError("Can't get data for node %s" % nname)
6304       remote_info = node_data[nname].data
6305       for attr in ['memory_total', 'memory_free', 'memory_dom0',
6306                    'vg_size', 'vg_free', 'cpu_total']:
6307         if attr not in remote_info:
6308           raise errors.OpExecError("Node '%s' didn't return attribute '%s'" %
6309                                    (nname, attr))
6310         try:
6311           remote_info[attr] = int(remote_info[attr])
6312         except ValueError, err:
6313           raise errors.OpExecError("Node '%s' returned invalid value for '%s':"
6314                                    " %s" % (nname, attr, str(err)))
6315       # compute memory used by primary instances
6316       i_p_mem = i_p_up_mem = 0
6317       for iinfo, beinfo in i_list:
6318         if iinfo.primary_node == nname:
6319           i_p_mem += beinfo[constants.BE_MEMORY]
6320           if iinfo.name not in node_iinfo[nname]:
6321             i_used_mem = 0
6322           else:
6323             i_used_mem = int(node_iinfo[nname][iinfo.name]['memory'])
6324           i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
6325           remote_info['memory_free'] -= max(0, i_mem_diff)
6326
6327           if iinfo.status == "up":
6328             i_p_up_mem += beinfo[constants.BE_MEMORY]
6329
6330       # compute memory used by instances
6331       pnr = {
6332         "tags": list(ninfo.GetTags()),
6333         "total_memory": remote_info['memory_total'],
6334         "reserved_memory": remote_info['memory_dom0'],
6335         "free_memory": remote_info['memory_free'],
6336         "i_pri_memory": i_p_mem,
6337         "i_pri_up_memory": i_p_up_mem,
6338         "total_disk": remote_info['vg_size'],
6339         "free_disk": remote_info['vg_free'],
6340         "primary_ip": ninfo.primary_ip,
6341         "secondary_ip": ninfo.secondary_ip,
6342         "total_cpus": remote_info['cpu_total'],
6343         "offline": ninfo.offline,
6344         }
6345       node_results[nname] = pnr
6346     data["nodes"] = node_results
6347
6348     # instance data
6349     instance_data = {}
6350     for iinfo, beinfo in i_list:
6351       nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge}
6352                   for n in iinfo.nics]
6353       pir = {
6354         "tags": list(iinfo.GetTags()),
6355         "should_run": iinfo.status == "up",
6356         "vcpus": beinfo[constants.BE_VCPUS],
6357         "memory": beinfo[constants.BE_MEMORY],
6358         "os": iinfo.os,
6359         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
6360         "nics": nic_data,
6361         "disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks],
6362         "disk_template": iinfo.disk_template,
6363         "hypervisor": iinfo.hypervisor,
6364         }
6365       instance_data[iinfo.name] = pir
6366
6367     data["instances"] = instance_data
6368
6369     self.in_data = data
6370
6371   def _AddNewInstance(self):
6372     """Add new instance data to allocator structure.
6373
6374     This in combination with _AllocatorGetClusterData will create the
6375     correct structure needed as input for the allocator.
6376
6377     The checks for the completeness of the opcode must have already been
6378     done.
6379
6380     """
6381     data = self.in_data
6382     if len(self.disks) != 2:
6383       raise errors.OpExecError("Only two-disk configurations supported")
6384
6385     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
6386
6387     if self.disk_template in constants.DTS_NET_MIRROR:
6388       self.required_nodes = 2
6389     else:
6390       self.required_nodes = 1
6391     request = {
6392       "type": "allocate",
6393       "name": self.name,
6394       "disk_template": self.disk_template,
6395       "tags": self.tags,
6396       "os": self.os,
6397       "vcpus": self.vcpus,
6398       "memory": self.mem_size,
6399       "disks": self.disks,
6400       "disk_space_total": disk_space,
6401       "nics": self.nics,
6402       "required_nodes": self.required_nodes,
6403       }
6404     data["request"] = request
6405
6406   def _AddRelocateInstance(self):
6407     """Add relocate instance data to allocator structure.
6408
6409     This in combination with _IAllocatorGetClusterData will create the
6410     correct structure needed as input for the allocator.
6411
6412     The checks for the completeness of the opcode must have already been
6413     done.
6414
6415     """
6416     instance = self.lu.cfg.GetInstanceInfo(self.name)
6417     if instance is None:
6418       raise errors.ProgrammerError("Unknown instance '%s' passed to"
6419                                    " IAllocator" % self.name)
6420
6421     if instance.disk_template not in constants.DTS_NET_MIRROR:
6422       raise errors.OpPrereqError("Can't relocate non-mirrored instances")
6423
6424     if len(instance.secondary_nodes) != 1:
6425       raise errors.OpPrereqError("Instance has not exactly one secondary node")
6426
6427     self.required_nodes = 1
6428     disk_sizes = [{'size': disk.size} for disk in instance.disks]
6429     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
6430
6431     request = {
6432       "type": "relocate",
6433       "name": self.name,
6434       "disk_space_total": disk_space,
6435       "required_nodes": self.required_nodes,
6436       "relocate_from": self.relocate_from,
6437       }
6438     self.in_data["request"] = request
6439
6440   def _BuildInputData(self):
6441     """Build input data structures.
6442
6443     """
6444     self._ComputeClusterData()
6445
6446     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6447       self._AddNewInstance()
6448     else:
6449       self._AddRelocateInstance()
6450
6451     self.in_text = serializer.Dump(self.in_data)
6452
6453   def Run(self, name, validate=True, call_fn=None):
6454     """Run an instance allocator and return the results.
6455
6456     """
6457     if call_fn is None:
6458       call_fn = self.lu.rpc.call_iallocator_runner
6459     data = self.in_text
6460
6461     result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
6462     result.Raise()
6463
6464     if not isinstance(result.data, (list, tuple)) or len(result.data) != 4:
6465       raise errors.OpExecError("Invalid result from master iallocator runner")
6466
6467     rcode, stdout, stderr, fail = result.data
6468
6469     if rcode == constants.IARUN_NOTFOUND:
6470       raise errors.OpExecError("Can't find allocator '%s'" % name)
6471     elif rcode == constants.IARUN_FAILURE:
6472       raise errors.OpExecError("Instance allocator call failed: %s,"
6473                                " output: %s" % (fail, stdout+stderr))
6474     self.out_text = stdout
6475     if validate:
6476       self._ValidateResult()
6477
6478   def _ValidateResult(self):
6479     """Process the allocator results.
6480
6481     This will process and if successful save the result in
6482     self.out_data and the other parameters.
6483
6484     """
6485     try:
6486       rdict = serializer.Load(self.out_text)
6487     except Exception, err:
6488       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
6489
6490     if not isinstance(rdict, dict):
6491       raise errors.OpExecError("Can't parse iallocator results: not a dict")
6492
6493     for key in "success", "info", "nodes":
6494       if key not in rdict:
6495         raise errors.OpExecError("Can't parse iallocator results:"
6496                                  " missing key '%s'" % key)
6497       setattr(self, key, rdict[key])
6498
6499     if not isinstance(rdict["nodes"], list):
6500       raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
6501                                " is not a list")
6502     self.out_data = rdict
6503
6504
6505 class LUTestAllocator(NoHooksLU):
6506   """Run allocator tests.
6507
6508   This LU runs the allocator tests
6509
6510   """
6511   _OP_REQP = ["direction", "mode", "name"]
6512
6513   def CheckPrereq(self):
6514     """Check prerequisites.
6515
6516     This checks the opcode parameters depending on the director and mode test.
6517
6518     """
6519     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
6520       for attr in ["name", "mem_size", "disks", "disk_template",
6521                    "os", "tags", "nics", "vcpus"]:
6522         if not hasattr(self.op, attr):
6523           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
6524                                      attr)
6525       iname = self.cfg.ExpandInstanceName(self.op.name)
6526       if iname is not None:
6527         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
6528                                    iname)
6529       if not isinstance(self.op.nics, list):
6530         raise errors.OpPrereqError("Invalid parameter 'nics'")
6531       for row in self.op.nics:
6532         if (not isinstance(row, dict) or
6533             "mac" not in row or
6534             "ip" not in row or
6535             "bridge" not in row):
6536           raise errors.OpPrereqError("Invalid contents of the"
6537                                      " 'nics' parameter")
6538       if not isinstance(self.op.disks, list):
6539         raise errors.OpPrereqError("Invalid parameter 'disks'")
6540       if len(self.op.disks) != 2:
6541         raise errors.OpPrereqError("Only two-disk configurations supported")
6542       for row in self.op.disks:
6543         if (not isinstance(row, dict) or
6544             "size" not in row or
6545             not isinstance(row["size"], int) or
6546             "mode" not in row or
6547             row["mode"] not in ['r', 'w']):
6548           raise errors.OpPrereqError("Invalid contents of the"
6549                                      " 'disks' parameter")
6550       if self.op.hypervisor is None:
6551         self.op.hypervisor = self.cfg.GetHypervisorType()
6552     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
6553       if not hasattr(self.op, "name"):
6554         raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
6555       fname = self.cfg.ExpandInstanceName(self.op.name)
6556       if fname is None:
6557         raise errors.OpPrereqError("Instance '%s' not found for relocation" %
6558                                    self.op.name)
6559       self.op.name = fname
6560       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
6561     else:
6562       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
6563                                  self.op.mode)
6564
6565     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
6566       if not hasattr(self.op, "allocator") or self.op.allocator is None:
6567         raise errors.OpPrereqError("Missing allocator name")
6568     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
6569       raise errors.OpPrereqError("Wrong allocator test '%s'" %
6570                                  self.op.direction)
6571
6572   def Exec(self, feedback_fn):
6573     """Run the allocator test.
6574
6575     """
6576     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
6577       ial = IAllocator(self,
6578                        mode=self.op.mode,
6579                        name=self.op.name,
6580                        mem_size=self.op.mem_size,
6581                        disks=self.op.disks,
6582                        disk_template=self.op.disk_template,
6583                        os=self.op.os,
6584                        tags=self.op.tags,
6585                        nics=self.op.nics,
6586                        vcpus=self.op.vcpus,
6587                        hypervisor=self.op.hypervisor,
6588                        )
6589     else:
6590       ial = IAllocator(self,
6591                        mode=self.op.mode,
6592                        name=self.op.name,
6593                        relocate_from=list(self.relocate_from),
6594                        )
6595
6596     if self.op.direction == constants.IALLOCATOR_DIR_IN:
6597       result = ial.in_text
6598     else:
6599       ial.Run(self.op.allocator, validate=False)
6600       result = ial.out_text
6601     return result