code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 import os
  30 import os.path
  31 import time
  32 import re
  33 import platform
  34 import logging
  35 import copy
  36 import OpenSSL
  37
  38 from ganeti import ssh
  39 from ganeti import utils
  40 from ganeti import errors
  41 from ganeti import hypervisor
  42 from ganeti import locking
  43 from ganeti import constants
  44 from ganeti import objects
  45 from ganeti import serializer
  46 from ganeti import ssconf
  47 from ganeti import uidpool
  48 from ganeti import compat
  49
  50
  51 class LogicalUnit(object):
  52   """Logical Unit base class.
  53
  54   Subclasses must follow these rules:
  55     - implement ExpandNames
  56     - implement CheckPrereq (except when tasklets are used)
  57     - implement Exec (except when tasklets are used)
  58     - implement BuildHooksEnv
  59     - redefine HPATH and HTYPE
  60     - optionally redefine their run requirements:
  61         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  62
  63   Note that all commands require root permissions.
  64
  65   @ivar dry_run_result: the value (if any) that will be returned to the caller
  66       in dry-run mode (signalled by opcode dry_run parameter)
  67
  68   """
  69   HPATH = None
  70   HTYPE = None
  71   _OP_REQP = []
  72   REQ_BGL = True
  73
  74   def __init__(self, processor, op, context, rpc):
  75     """Constructor for LogicalUnit.
  76
  77     This needs to be overridden in derived classes in order to check op
  78     validity.
  79
  80     """
  81     self.proc = processor
  82     self.op = op
  83     self.cfg = context.cfg
  84     self.context = context
  85     self.rpc = rpc
  86     # Dicts used to declare locking needs to mcpu
  87     self.needed_locks = None
  88     self.acquired_locks = {}
  89     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  90     self.add_locks = {}
  91     self.remove_locks = {}
  92     # Used to force good behavior when calling helper functions
  93     self.recalculate_locks = {}
  94     self.__ssh = None
  95     # logging
  96     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
  97     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
  98     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
  99     # support for dry-run
 100     self.dry_run_result = None
 101     # support for generic debug attribute
 102     if (not hasattr(self.op, "debug_level") or
 103         not isinstance(self.op.debug_level, int)):
 104       self.op.debug_level = 0
 105
 106     # Tasklets
 107     self.tasklets = None
 108
 109     for attr_name in self._OP_REQP:
 110       attr_val = getattr(op, attr_name, None)
 111       if attr_val is None:
 112         raise errors.OpPrereqError("Required parameter '%s' missing" %
 113                                    attr_name, errors.ECODE_INVAL)
 114
 115     self.CheckArguments()
 116
 117   def __GetSSH(self):
 118     """Returns the SshRunner object
 119
 120     """
 121     if not self.__ssh:
 122       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 123     return self.__ssh
 124
 125   ssh = property(fget=__GetSSH)
 126
 127   def CheckArguments(self):
 128     """Check syntactic validity for the opcode arguments.
 129
 130     This method is for doing a simple syntactic check and ensure
 131     validity of opcode parameters, without any cluster-related
 132     checks. While the same can be accomplished in ExpandNames and/or
 133     CheckPrereq, doing these separate is better because:
 134
 135       - ExpandNames is left as as purely a lock-related function
 136       - CheckPrereq is run after we have acquired locks (and possible
 137         waited for them)
 138
 139     The function is allowed to change the self.op attribute so that
 140     later methods can no longer worry about missing parameters.
 141
 142     """
 143     pass
 144
 145   def ExpandNames(self):
 146     """Expand names for this LU.
 147
 148     This method is called before starting to execute the opcode, and it should
 149     update all the parameters of the opcode to their canonical form (e.g. a
 150     short node name must be fully expanded after this method has successfully
 151     completed). This way locking, hooks, logging, ecc. can work correctly.
 152
 153     LUs which implement this method must also populate the self.needed_locks
 154     member, as a dict with lock levels as keys, and a list of needed lock names
 155     as values. Rules:
 156
 157       - use an empty dict if you don't need any lock
 158       - if you don't need any lock at a particular level omit that level
 159       - don't put anything for the BGL level
 160       - if you want all locks at a level use locking.ALL_SET as a value
 161
 162     If you need to share locks (rather than acquire them exclusively) at one
 163     level you can modify self.share_locks, setting a true value (usually 1) for
 164     that level. By default locks are not shared.
 165
 166     This function can also define a list of tasklets, which then will be
 167     executed in order instead of the usual LU-level CheckPrereq and Exec
 168     functions, if those are not defined by the LU.
 169
 170     Examples::
 171
 172       # Acquire all nodes and one instance
 173       self.needed_locks = {
 174         locking.LEVEL_NODE: locking.ALL_SET,
 175         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 176       }
 177       # Acquire just two nodes
 178       self.needed_locks = {
 179         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 180       }
 181       # Acquire no locks
 182       self.needed_locks = {} # No, you can't leave it to the default value None
 183
 184     """
 185     # The implementation of this method is mandatory only if the new LU is
 186     # concurrent, so that old LUs don't need to be changed all at the same
 187     # time.
 188     if self.REQ_BGL:
 189       self.needed_locks = {} # Exclusive LUs don't need locks.
 190     else:
 191       raise NotImplementedError
 192
 193   def DeclareLocks(self, level):
 194     """Declare LU locking needs for a level
 195
 196     While most LUs can just declare their locking needs at ExpandNames time,
 197     sometimes there's the need to calculate some locks after having acquired
 198     the ones before. This function is called just before acquiring locks at a
 199     particular level, but after acquiring the ones at lower levels, and permits
 200     such calculations. It can be used to modify self.needed_locks, and by
 201     default it does nothing.
 202
 203     This function is only called if you have something already set in
 204     self.needed_locks for the level.
 205
 206     @param level: Locking level which is going to be locked
 207     @type level: member of ganeti.locking.LEVELS
 208
 209     """
 210
 211   def CheckPrereq(self):
 212     """Check prerequisites for this LU.
 213
 214     This method should check that the prerequisites for the execution
 215     of this LU are fulfilled. It can do internode communication, but
 216     it should be idempotent - no cluster or system changes are
 217     allowed.
 218
 219     The method should raise errors.OpPrereqError in case something is
 220     not fulfilled. Its return value is ignored.
 221
 222     This method should also update all the parameters of the opcode to
 223     their canonical form if it hasn't been done by ExpandNames before.
 224
 225     """
 226     if self.tasklets is not None:
 227       for (idx, tl) in enumerate(self.tasklets):
 228         logging.debug("Checking prerequisites for tasklet %s/%s",
 229                       idx + 1, len(self.tasklets))
 230         tl.CheckPrereq()
 231     else:
 232       raise NotImplementedError
 233
 234   def Exec(self, feedback_fn):
 235     """Execute the LU.
 236
 237     This method should implement the actual work. It should raise
 238     errors.OpExecError for failures that are somewhat dealt with in
 239     code, or expected.
 240
 241     """
 242     if self.tasklets is not None:
 243       for (idx, tl) in enumerate(self.tasklets):
 244         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 245         tl.Exec(feedback_fn)
 246     else:
 247       raise NotImplementedError
 248
 249   def BuildHooksEnv(self):
 250     """Build hooks environment for this LU.
 251
 252     This method should return a three-node tuple consisting of: a dict
 253     containing the environment that will be used for running the
 254     specific hook for this LU, a list of node names on which the hook
 255     should run before the execution, and a list of node names on which
 256     the hook should run after the execution.
 257
 258     The keys of the dict must not have 'GANETI_' prefixed as this will
 259     be handled in the hooks runner. Also note additional keys will be
 260     added by the hooks runner. If the LU doesn't define any
 261     environment, an empty dict (and not None) should be returned.
 262
 263     No nodes should be returned as an empty list (and not None).
 264
 265     Note that if the HPATH for a LU class is None, this function will
 266     not be called.
 267
 268     """
 269     raise NotImplementedError
 270
 271   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 272     """Notify the LU about the results of its hooks.
 273
 274     This method is called every time a hooks phase is executed, and notifies
 275     the Logical Unit about the hooks' result. The LU can then use it to alter
 276     its result based on the hooks.  By default the method does nothing and the
 277     previous result is passed back unchanged but any LU can define it if it
 278     wants to use the local cluster hook-scripts somehow.
 279
 280     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 281         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 282     @param hook_results: the results of the multi-node hooks rpc call
 283     @param feedback_fn: function used send feedback back to the caller
 284     @param lu_result: the previous Exec result this LU had, or None
 285         in the PRE phase
 286     @return: the new Exec result, based on the previous result
 287         and hook results
 288
 289     """
 290     # API must be kept, thus we ignore the unused argument and could
 291     # be a function warnings
 292     # pylint: disable-msg=W0613,R0201
 293     return lu_result
 294
 295   def _ExpandAndLockInstance(self):
 296     """Helper function to expand and lock an instance.
 297
 298     Many LUs that work on an instance take its name in self.op.instance_name
 299     and need to expand it and then declare the expanded name for locking. This
 300     function does it, and then updates self.op.instance_name to the expanded
 301     name. It also initializes needed_locks as a dict, if this hasn't been done
 302     before.
 303
 304     """
 305     if self.needed_locks is None:
 306       self.needed_locks = {}
 307     else:
 308       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 309         "_ExpandAndLockInstance called with instance-level locks set"
 310     self.op.instance_name = _ExpandInstanceName(self.cfg,
 311                                                 self.op.instance_name)
 312     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 313
 314   def _LockInstancesNodes(self, primary_only=False):
 315     """Helper function to declare instances' nodes for locking.
 316
 317     This function should be called after locking one or more instances to lock
 318     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 319     with all primary or secondary nodes for instances already locked and
 320     present in self.needed_locks[locking.LEVEL_INSTANCE].
 321
 322     It should be called from DeclareLocks, and for safety only works if
 323     self.recalculate_locks[locking.LEVEL_NODE] is set.
 324
 325     In the future it may grow parameters to just lock some instance's nodes, or
 326     to just lock primaries or secondary nodes, if needed.
 327
 328     If should be called in DeclareLocks in a way similar to::
 329
 330       if level == locking.LEVEL_NODE:
 331         self._LockInstancesNodes()
 332
 333     @type primary_only: boolean
 334     @param primary_only: only lock primary nodes of locked instances
 335
 336     """
 337     assert locking.LEVEL_NODE in self.recalculate_locks, \
 338       "_LockInstancesNodes helper function called with no nodes to recalculate"
 339
 340     # TODO: check if we're really been called with the instance locks held
 341
 342     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 343     # future we might want to have different behaviors depending on the value
 344     # of self.recalculate_locks[locking.LEVEL_NODE]
 345     wanted_nodes = []
 346     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 347       instance = self.context.cfg.GetInstanceInfo(instance_name)
 348       wanted_nodes.append(instance.primary_node)
 349       if not primary_only:
 350         wanted_nodes.extend(instance.secondary_nodes)
 351
 352     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 353       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 354     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 355       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 356
 357     del self.recalculate_locks[locking.LEVEL_NODE]
 358
 359
 360 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 361   """Simple LU which runs no hooks.
 362
 363   This LU is intended as a parent for other LogicalUnits which will
 364   run no hooks, in order to reduce duplicate code.
 365
 366   """
 367   HPATH = None
 368   HTYPE = None
 369
 370   def BuildHooksEnv(self):
 371     """Empty BuildHooksEnv for NoHooksLu.
 372
 373     This just raises an error.
 374
 375     """
 376     assert False, "BuildHooksEnv called for NoHooksLUs"
 377
 378
 379 class Tasklet:
 380   """Tasklet base class.
 381
 382   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 383   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 384   tasklets know nothing about locks.
 385
 386   Subclasses must follow these rules:
 387     - Implement CheckPrereq
 388     - Implement Exec
 389
 390   """
 391   def __init__(self, lu):
 392     self.lu = lu
 393
 394     # Shortcuts
 395     self.cfg = lu.cfg
 396     self.rpc = lu.rpc
 397
 398   def CheckPrereq(self):
 399     """Check prerequisites for this tasklets.
 400
 401     This method should check whether the prerequisites for the execution of
 402     this tasklet are fulfilled. It can do internode communication, but it
 403     should be idempotent - no cluster or system changes are allowed.
 404
 405     The method should raise errors.OpPrereqError in case something is not
 406     fulfilled. Its return value is ignored.
 407
 408     This method should also update all parameters to their canonical form if it
 409     hasn't been done before.
 410
 411     """
 412     raise NotImplementedError
 413
 414   def Exec(self, feedback_fn):
 415     """Execute the tasklet.
 416
 417     This method should implement the actual work. It should raise
 418     errors.OpExecError for failures that are somewhat dealt with in code, or
 419     expected.
 420
 421     """
 422     raise NotImplementedError
 423
 424
 425 def _GetWantedNodes(lu, nodes):
 426   """Returns list of checked and expanded node names.
 427
 428   @type lu: L{LogicalUnit}
 429   @param lu: the logical unit on whose behalf we execute
 430   @type nodes: list
 431   @param nodes: list of node names or None for all nodes
 432   @rtype: list
 433   @return: the list of nodes, sorted
 434   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 435
 436   """
 437   if not isinstance(nodes, list):
 438     raise errors.OpPrereqError("Invalid argument type 'nodes'",
 439                                errors.ECODE_INVAL)
 440
 441   if not nodes:
 442     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 443       " non-empty list of nodes whose name is to be expanded.")
 444
 445   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 446   return utils.NiceSort(wanted)
 447
 448
 449 def _GetWantedInstances(lu, instances):
 450   """Returns list of checked and expanded instance names.
 451
 452   @type lu: L{LogicalUnit}
 453   @param lu: the logical unit on whose behalf we execute
 454   @type instances: list
 455   @param instances: list of instance names or None for all instances
 456   @rtype: list
 457   @return: the list of instances, sorted
 458   @raise errors.OpPrereqError: if the instances parameter is wrong type
 459   @raise errors.OpPrereqError: if any of the passed instances is not found
 460
 461   """
 462   if not isinstance(instances, list):
 463     raise errors.OpPrereqError("Invalid argument type 'instances'",
 464                                errors.ECODE_INVAL)
 465
 466   if instances:
 467     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 468   else:
 469     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 470   return wanted
 471
 472
 473 def _CheckOutputFields(static, dynamic, selected):
 474   """Checks whether all selected fields are valid.
 475
 476   @type static: L{utils.FieldSet}
 477   @param static: static fields set
 478   @type dynamic: L{utils.FieldSet}
 479   @param dynamic: dynamic fields set
 480
 481   """
 482   f = utils.FieldSet()
 483   f.Extend(static)
 484   f.Extend(dynamic)
 485
 486   delta = f.NonMatching(selected)
 487   if delta:
 488     raise errors.OpPrereqError("Unknown output fields selected: %s"
 489                                % ",".join(delta), errors.ECODE_INVAL)
 490
 491
 492 def _CheckBooleanOpField(op, name):
 493   """Validates boolean opcode parameters.
 494
 495   This will ensure that an opcode parameter is either a boolean value,
 496   or None (but that it always exists).
 497
 498   """
 499   val = getattr(op, name, None)
 500   if not (val is None or isinstance(val, bool)):
 501     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 502                                (name, str(val)), errors.ECODE_INVAL)
 503   setattr(op, name, val)
 504
 505
 506 def _CheckGlobalHvParams(params):
 507   """Validates that given hypervisor params are not global ones.
 508
 509   This will ensure that instances don't get customised versions of
 510   global params.
 511
 512   """
 513   used_globals = constants.HVC_GLOBALS.intersection(params)
 514   if used_globals:
 515     msg = ("The following hypervisor parameters are global and cannot"
 516            " be customized at instance level, please modify them at"
 517            " cluster level: %s" % utils.CommaJoin(used_globals))
 518     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 519
 520
 521 def _CheckNodeOnline(lu, node):
 522   """Ensure that a given node is online.
 523
 524   @param lu: the LU on behalf of which we make the check
 525   @param node: the node to check
 526   @raise errors.OpPrereqError: if the node is offline
 527
 528   """
 529   if lu.cfg.GetNodeInfo(node).offline:
 530     raise errors.OpPrereqError("Can't use offline node %s" % node,
 531                                errors.ECODE_INVAL)
 532
 533
 534 def _CheckNodeNotDrained(lu, node):
 535   """Ensure that a given node is not drained.
 536
 537   @param lu: the LU on behalf of which we make the check
 538   @param node: the node to check
 539   @raise errors.OpPrereqError: if the node is drained
 540
 541   """
 542   if lu.cfg.GetNodeInfo(node).drained:
 543     raise errors.OpPrereqError("Can't use drained node %s" % node,
 544                                errors.ECODE_INVAL)
 545
 546
 547 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 548   """Ensure that a node supports a given OS.
 549
 550   @param lu: the LU on behalf of which we make the check
 551   @param node: the node to check
 552   @param os_name: the OS to query about
 553   @param force_variant: whether to ignore variant errors
 554   @raise errors.OpPrereqError: if the node is not supporting the OS
 555
 556   """
 557   result = lu.rpc.call_os_get(node, os_name)
 558   result.Raise("OS '%s' not in supported OS list for node %s" %
 559                (os_name, node),
 560                prereq=True, ecode=errors.ECODE_INVAL)
 561   if not force_variant:
 562     _CheckOSVariant(result.payload, os_name)
 563
 564
 565 def _RequireFileStorage():
 566   """Checks that file storage is enabled.
 567
 568   @raise errors.OpPrereqError: when file storage is disabled
 569
 570   """
 571   if not constants.ENABLE_FILE_STORAGE:
 572     raise errors.OpPrereqError("File storage disabled at configure time",
 573                                errors.ECODE_INVAL)
 574
 575
 576 def _CheckDiskTemplate(template):
 577   """Ensure a given disk template is valid.
 578
 579   """
 580   if template not in constants.DISK_TEMPLATES:
 581     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 582            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 583     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 584   if template == constants.DT_FILE:
 585     _RequireFileStorage()
 586
 587
 588 def _CheckStorageType(storage_type):
 589   """Ensure a given storage type is valid.
 590
 591   """
 592   if storage_type not in constants.VALID_STORAGE_TYPES:
 593     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 594                                errors.ECODE_INVAL)
 595   if storage_type == constants.ST_FILE:
 596     _RequireFileStorage()
 597
 598
 599
 600 def _CheckInstanceDown(lu, instance, reason):
 601   """Ensure that an instance is not running."""
 602   if instance.admin_up:
 603     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 604                                (instance.name, reason), errors.ECODE_STATE)
 605
 606   pnode = instance.primary_node
 607   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 608   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 609               prereq=True, ecode=errors.ECODE_ENVIRON)
 610
 611   if instance.name in ins_l.payload:
 612     raise errors.OpPrereqError("Instance %s is running, %s" %
 613                                (instance.name, reason), errors.ECODE_STATE)
 614
 615
 616 def _ExpandItemName(fn, name, kind):
 617   """Expand an item name.
 618
 619   @param fn: the function to use for expansion
 620   @param name: requested item name
 621   @param kind: text description ('Node' or 'Instance')
 622   @return: the resolved (full) name
 623   @raise errors.OpPrereqError: if the item is not found
 624
 625   """
 626   full_name = fn(name)
 627   if full_name is None:
 628     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 629                                errors.ECODE_NOENT)
 630   return full_name
 631
 632
 633 def _ExpandNodeName(cfg, name):
 634   """Wrapper over L{_ExpandItemName} for nodes."""
 635   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 636
 637
 638 def _ExpandInstanceName(cfg, name):
 639   """Wrapper over L{_ExpandItemName} for instance."""
 640   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 641
 642
 643 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 644                           memory, vcpus, nics, disk_template, disks,
 645                           bep, hvp, hypervisor_name):
 646   """Builds instance related env variables for hooks
 647
 648   This builds the hook environment from individual variables.
 649
 650   @type name: string
 651   @param name: the name of the instance
 652   @type primary_node: string
 653   @param primary_node: the name of the instance's primary node
 654   @type secondary_nodes: list
 655   @param secondary_nodes: list of secondary nodes as strings
 656   @type os_type: string
 657   @param os_type: the name of the instance's OS
 658   @type status: boolean
 659   @param status: the should_run status of the instance
 660   @type memory: string
 661   @param memory: the memory size of the instance
 662   @type vcpus: string
 663   @param vcpus: the count of VCPUs the instance has
 664   @type nics: list
 665   @param nics: list of tuples (ip, mac, mode, link) representing
 666       the NICs the instance has
 667   @type disk_template: string
 668   @param disk_template: the disk template of the instance
 669   @type disks: list
 670   @param disks: the list of (size, mode) pairs
 671   @type bep: dict
 672   @param bep: the backend parameters for the instance
 673   @type hvp: dict
 674   @param hvp: the hypervisor parameters for the instance
 675   @type hypervisor_name: string
 676   @param hypervisor_name: the hypervisor for the instance
 677   @rtype: dict
 678   @return: the hook environment for this instance
 679
 680   """
 681   if status:
 682     str_status = "up"
 683   else:
 684     str_status = "down"
 685   env = {
 686     "OP_TARGET": name,
 687     "INSTANCE_NAME": name,
 688     "INSTANCE_PRIMARY": primary_node,
 689     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 690     "INSTANCE_OS_TYPE": os_type,
 691     "INSTANCE_STATUS": str_status,
 692     "INSTANCE_MEMORY": memory,
 693     "INSTANCE_VCPUS": vcpus,
 694     "INSTANCE_DISK_TEMPLATE": disk_template,
 695     "INSTANCE_HYPERVISOR": hypervisor_name,
 696   }
 697
 698   if nics:
 699     nic_count = len(nics)
 700     for idx, (ip, mac, mode, link) in enumerate(nics):
 701       if ip is None:
 702         ip = ""
 703       env["INSTANCE_NIC%d_IP" % idx] = ip
 704       env["INSTANCE_NIC%d_MAC" % idx] = mac
 705       env["INSTANCE_NIC%d_MODE" % idx] = mode
 706       env["INSTANCE_NIC%d_LINK" % idx] = link
 707       if mode == constants.NIC_MODE_BRIDGED:
 708         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 709   else:
 710     nic_count = 0
 711
 712   env["INSTANCE_NIC_COUNT"] = nic_count
 713
 714   if disks:
 715     disk_count = len(disks)
 716     for idx, (size, mode) in enumerate(disks):
 717       env["INSTANCE_DISK%d_SIZE" % idx] = size
 718       env["INSTANCE_DISK%d_MODE" % idx] = mode
 719   else:
 720     disk_count = 0
 721
 722   env["INSTANCE_DISK_COUNT"] = disk_count
 723
 724   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 725     for key, value in source.items():
 726       env["INSTANCE_%s_%s" % (kind, key)] = value
 727
 728   return env
 729
 730
 731 def _NICListToTuple(lu, nics):
 732   """Build a list of nic information tuples.
 733
 734   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 735   value in LUQueryInstanceData.
 736
 737   @type lu:  L{LogicalUnit}
 738   @param lu: the logical unit on whose behalf we execute
 739   @type nics: list of L{objects.NIC}
 740   @param nics: list of nics to convert to hooks tuples
 741
 742   """
 743   hooks_nics = []
 744   c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
 745   for nic in nics:
 746     ip = nic.ip
 747     mac = nic.mac
 748     filled_params = objects.FillDict(c_nicparams, nic.nicparams)
 749     mode = filled_params[constants.NIC_MODE]
 750     link = filled_params[constants.NIC_LINK]
 751     hooks_nics.append((ip, mac, mode, link))
 752   return hooks_nics
 753
 754
 755 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 756   """Builds instance related env variables for hooks from an object.
 757
 758   @type lu: L{LogicalUnit}
 759   @param lu: the logical unit on whose behalf we execute
 760   @type instance: L{objects.Instance}
 761   @param instance: the instance for which we should build the
 762       environment
 763   @type override: dict
 764   @param override: dictionary with key/values that will override
 765       our values
 766   @rtype: dict
 767   @return: the hook environment dictionary
 768
 769   """
 770   cluster = lu.cfg.GetClusterInfo()
 771   bep = cluster.FillBE(instance)
 772   hvp = cluster.FillHV(instance)
 773   args = {
 774     'name': instance.name,
 775     'primary_node': instance.primary_node,
 776     'secondary_nodes': instance.secondary_nodes,
 777     'os_type': instance.os,
 778     'status': instance.admin_up,
 779     'memory': bep[constants.BE_MEMORY],
 780     'vcpus': bep[constants.BE_VCPUS],
 781     'nics': _NICListToTuple(lu, instance.nics),
 782     'disk_template': instance.disk_template,
 783     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 784     'bep': bep,
 785     'hvp': hvp,
 786     'hypervisor_name': instance.hypervisor,
 787   }
 788   if override:
 789     args.update(override)
 790   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 791
 792
 793 def _AdjustCandidatePool(lu, exceptions):
 794   """Adjust the candidate pool after node operations.
 795
 796   """
 797   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 798   if mod_list:
 799     lu.LogInfo("Promoted nodes to master candidate role: %s",
 800                utils.CommaJoin(node.name for node in mod_list))
 801     for name in mod_list:
 802       lu.context.ReaddNode(name)
 803   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 804   if mc_now > mc_max:
 805     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 806                (mc_now, mc_max))
 807
 808
 809 def _DecideSelfPromotion(lu, exceptions=None):
 810   """Decide whether I should promote myself as a master candidate.
 811
 812   """
 813   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 814   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 815   # the new node will increase mc_max with one, so:
 816   mc_should = min(mc_should + 1, cp_size)
 817   return mc_now < mc_should
 818
 819
 820 def _CheckNicsBridgesExist(lu, target_nics, target_node,
 821                                profile=constants.PP_DEFAULT):
 822   """Check that the brigdes needed by a list of nics exist.
 823
 824   """
 825   c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
 826   paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
 827                 for nic in target_nics]
 828   brlist = [params[constants.NIC_LINK] for params in paramslist
 829             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 830   if brlist:
 831     result = lu.rpc.call_bridges_exist(target_node, brlist)
 832     result.Raise("Error checking bridges on destination node '%s'" %
 833                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 834
 835
 836 def _CheckInstanceBridgesExist(lu, instance, node=None):
 837   """Check that the brigdes needed by an instance exist.
 838
 839   """
 840   if node is None:
 841     node = instance.primary_node
 842   _CheckNicsBridgesExist(lu, instance.nics, node)
 843
 844
 845 def _CheckOSVariant(os_obj, name):
 846   """Check whether an OS name conforms to the os variants specification.
 847
 848   @type os_obj: L{objects.OS}
 849   @param os_obj: OS object to check
 850   @type name: string
 851   @param name: OS name passed by the user, to check for validity
 852
 853   """
 854   if not os_obj.supported_variants:
 855     return
 856   try:
 857     variant = name.split("+", 1)[1]
 858   except IndexError:
 859     raise errors.OpPrereqError("OS name must include a variant",
 860                                errors.ECODE_INVAL)
 861
 862   if variant not in os_obj.supported_variants:
 863     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 864
 865
 866 def _GetNodeInstancesInner(cfg, fn):
 867   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 868
 869
 870 def _GetNodeInstances(cfg, node_name):
 871   """Returns a list of all primary and secondary instances on a node.
 872
 873   """
 874
 875   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 876
 877
 878 def _GetNodePrimaryInstances(cfg, node_name):
 879   """Returns primary instances on a node.
 880
 881   """
 882   return _GetNodeInstancesInner(cfg,
 883                                 lambda inst: node_name == inst.primary_node)
 884
 885
 886 def _GetNodeSecondaryInstances(cfg, node_name):
 887   """Returns secondary instances on a node.
 888
 889   """
 890   return _GetNodeInstancesInner(cfg,
 891                                 lambda inst: node_name in inst.secondary_nodes)
 892
 893
 894 def _GetStorageTypeArgs(cfg, storage_type):
 895   """Returns the arguments for a storage type.
 896
 897   """
 898   # Special case for file storage
 899   if storage_type == constants.ST_FILE:
 900     # storage.FileStorage wants a list of storage directories
 901     return [[cfg.GetFileStorageDir()]]
 902
 903   return []
 904
 905
 906 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 907   faulty = []
 908
 909   for dev in instance.disks:
 910     cfg.SetDiskID(dev, node_name)
 911
 912   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 913   result.Raise("Failed to get disk status from node %s" % node_name,
 914                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 915
 916   for idx, bdev_status in enumerate(result.payload):
 917     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 918       faulty.append(idx)
 919
 920   return faulty
 921
 922
 923 def _FormatTimestamp(secs):
 924   """Formats a Unix timestamp with the local timezone.
 925
 926   """
 927   return time.strftime("%F %T %Z", time.gmtime(secs))
 928
 929
 930 class LUPostInitCluster(LogicalUnit):
 931   """Logical unit for running hooks after cluster initialization.
 932
 933   """
 934   HPATH = "cluster-init"
 935   HTYPE = constants.HTYPE_CLUSTER
 936   _OP_REQP = []
 937
 938   def BuildHooksEnv(self):
 939     """Build hooks env.
 940
 941     """
 942     env = {"OP_TARGET": self.cfg.GetClusterName()}
 943     mn = self.cfg.GetMasterNode()
 944     return env, [], [mn]
 945
 946   def CheckPrereq(self):
 947     """No prerequisites to check.
 948
 949     """
 950     return True
 951
 952   def Exec(self, feedback_fn):
 953     """Nothing to do.
 954
 955     """
 956     return True
 957
 958
 959 class LUDestroyCluster(LogicalUnit):
 960   """Logical unit for destroying the cluster.
 961
 962   """
 963   HPATH = "cluster-destroy"
 964   HTYPE = constants.HTYPE_CLUSTER
 965   _OP_REQP = []
 966
 967   def BuildHooksEnv(self):
 968     """Build hooks env.
 969
 970     """
 971     env = {"OP_TARGET": self.cfg.GetClusterName()}
 972     return env, [], []
 973
 974   def CheckPrereq(self):
 975     """Check prerequisites.
 976
 977     This checks whether the cluster is empty.
 978
 979     Any errors are signaled by raising errors.OpPrereqError.
 980
 981     """
 982     master = self.cfg.GetMasterNode()
 983
 984     nodelist = self.cfg.GetNodeList()
 985     if len(nodelist) != 1 or nodelist[0] != master:
 986       raise errors.OpPrereqError("There are still %d node(s) in"
 987                                  " this cluster." % (len(nodelist) - 1),
 988                                  errors.ECODE_INVAL)
 989     instancelist = self.cfg.GetInstanceList()
 990     if instancelist:
 991       raise errors.OpPrereqError("There are still %d instance(s) in"
 992                                  " this cluster." % len(instancelist),
 993                                  errors.ECODE_INVAL)
 994
 995   def Exec(self, feedback_fn):
 996     """Destroys the cluster.
 997
 998     """
 999     master = self.cfg.GetMasterNode()
1000     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1001
1002     # Run post hooks on master node before it's removed
1003     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1004     try:
1005       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1006     except:
1007       # pylint: disable-msg=W0702
1008       self.LogWarning("Errors occurred running hooks on %s" % master)
1009
1010     result = self.rpc.call_node_stop_master(master, False)
1011     result.Raise("Could not disable the master role")
1012
1013     if modify_ssh_setup:
1014       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1015       utils.CreateBackup(priv_key)
1016       utils.CreateBackup(pub_key)
1017
1018     return master
1019
1020
1021 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1022                             warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1023                             error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1024   """Verifies certificate details for LUVerifyCluster.
1025
1026   """
1027   if expired:
1028     msg = "Certificate %s is expired" % filename
1029
1030     if not_before is not None and not_after is not None:
1031       msg += (" (valid from %s to %s)" %
1032               (_FormatTimestamp(not_before),
1033                _FormatTimestamp(not_after)))
1034     elif not_before is not None:
1035       msg += " (valid from %s)" % _FormatTimestamp(not_before)
1036     elif not_after is not None:
1037       msg += " (valid until %s)" % _FormatTimestamp(not_after)
1038
1039     return (LUVerifyCluster.ETYPE_ERROR, msg)
1040
1041   elif not_before is not None and not_before > now:
1042     return (LUVerifyCluster.ETYPE_WARNING,
1043             "Certificate %s not yet valid (valid from %s)" %
1044             (filename, _FormatTimestamp(not_before)))
1045
1046   elif not_after is not None:
1047     remaining_days = int((not_after - now) / (24 * 3600))
1048
1049     msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1050
1051     if remaining_days <= error_days:
1052       return (LUVerifyCluster.ETYPE_ERROR, msg)
1053
1054     if remaining_days <= warn_days:
1055       return (LUVerifyCluster.ETYPE_WARNING, msg)
1056
1057   return (None, None)
1058
1059
1060 def _VerifyCertificate(filename):
1061   """Verifies a certificate for LUVerifyCluster.
1062
1063   @type filename: string
1064   @param filename: Path to PEM file
1065
1066   """
1067   try:
1068     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1069                                            utils.ReadFile(filename))
1070   except Exception, err: # pylint: disable-msg=W0703
1071     return (LUVerifyCluster.ETYPE_ERROR,
1072             "Failed to load X509 certificate %s: %s" % (filename, err))
1073
1074   # Depending on the pyOpenSSL version, this can just return (None, None)
1075   (not_before, not_after) = utils.GetX509CertValidity(cert)
1076
1077   return _VerifyCertificateInner(filename, cert.has_expired(),
1078                                  not_before, not_after, time.time())
1079
1080
1081 class LUVerifyCluster(LogicalUnit):
1082   """Verifies the cluster status.
1083
1084   """
1085   HPATH = "cluster-verify"
1086   HTYPE = constants.HTYPE_CLUSTER
1087   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1088   REQ_BGL = False
1089
1090   TCLUSTER = "cluster"
1091   TNODE = "node"
1092   TINSTANCE = "instance"
1093
1094   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1095   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1096   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1097   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1098   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1099   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1102   ENODEDRBD = (TNODE, "ENODEDRBD")
1103   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1104   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1105   ENODEHV = (TNODE, "ENODEHV")
1106   ENODELVM = (TNODE, "ENODELVM")
1107   ENODEN1 = (TNODE, "ENODEN1")
1108   ENODENET = (TNODE, "ENODENET")
1109   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1110   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1111   ENODERPC = (TNODE, "ENODERPC")
1112   ENODESSH = (TNODE, "ENODESSH")
1113   ENODEVERSION = (TNODE, "ENODEVERSION")
1114   ENODESETUP = (TNODE, "ENODESETUP")
1115   ENODETIME = (TNODE, "ENODETIME")
1116
1117   ETYPE_FIELD = "code"
1118   ETYPE_ERROR = "ERROR"
1119   ETYPE_WARNING = "WARNING"
1120
1121   class NodeImage(object):
1122     """A class representing the logical and physical status of a node.
1123
1124     @ivar volumes: a structure as returned from
1125         L{ganeti.backend.GetVolumeList} (runtime)
1126     @ivar instances: a list of running instances (runtime)
1127     @ivar pinst: list of configured primary instances (config)
1128     @ivar sinst: list of configured secondary instances (config)
1129     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1130         of this node (config)
1131     @ivar mfree: free memory, as reported by hypervisor (runtime)
1132     @ivar dfree: free disk, as reported by the node (runtime)
1133     @ivar offline: the offline status (config)
1134     @type rpc_fail: boolean
1135     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1136         not whether the individual keys were correct) (runtime)
1137     @type lvm_fail: boolean
1138     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1139     @type hyp_fail: boolean
1140     @ivar hyp_fail: whether the RPC call didn't return the instance list
1141     @type ghost: boolean
1142     @ivar ghost: whether this is a known node or not (config)
1143
1144     """
1145     def __init__(self, offline=False):
1146       self.volumes = {}
1147       self.instances = []
1148       self.pinst = []
1149       self.sinst = []
1150       self.sbp = {}
1151       self.mfree = 0
1152       self.dfree = 0
1153       self.offline = offline
1154       self.rpc_fail = False
1155       self.lvm_fail = False
1156       self.hyp_fail = False
1157       self.ghost = False
1158
1159   def ExpandNames(self):
1160     self.needed_locks = {
1161       locking.LEVEL_NODE: locking.ALL_SET,
1162       locking.LEVEL_INSTANCE: locking.ALL_SET,
1163     }
1164     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1165
1166   def _Error(self, ecode, item, msg, *args, **kwargs):
1167     """Format an error message.
1168
1169     Based on the opcode's error_codes parameter, either format a
1170     parseable error code, or a simpler error string.
1171
1172     This must be called only from Exec and functions called from Exec.
1173
1174     """
1175     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1176     itype, etxt = ecode
1177     # first complete the msg
1178     if args:
1179       msg = msg % args
1180     # then format the whole message
1181     if self.op.error_codes:
1182       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1183     else:
1184       if item:
1185         item = " " + item
1186       else:
1187         item = ""
1188       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1189     # and finally report it via the feedback_fn
1190     self._feedback_fn("  - %s" % msg)
1191
1192   def _ErrorIf(self, cond, *args, **kwargs):
1193     """Log an error message if the passed condition is True.
1194
1195     """
1196     cond = bool(cond) or self.op.debug_simulate_errors
1197     if cond:
1198       self._Error(*args, **kwargs)
1199     # do not mark the operation as failed for WARN cases only
1200     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1201       self.bad = self.bad or cond
1202
1203   def _VerifyNode(self, ninfo, nresult):
1204     """Run multiple tests against a node.
1205
1206     Test list:
1207
1208       - compares ganeti version
1209       - checks vg existence and size > 20G
1210       - checks config file checksum
1211       - checks ssh to other nodes
1212
1213     @type ninfo: L{objects.Node}
1214     @param ninfo: the node to check
1215     @param nresult: the results from the node
1216     @rtype: boolean
1217     @return: whether overall this call was successful (and we can expect
1218          reasonable values in the respose)
1219
1220     """
1221     node = ninfo.name
1222     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1223
1224     # main result, nresult should be a non-empty dict
1225     test = not nresult or not isinstance(nresult, dict)
1226     _ErrorIf(test, self.ENODERPC, node,
1227                   "unable to verify node: no data returned")
1228     if test:
1229       return False
1230
1231     # compares ganeti version
1232     local_version = constants.PROTOCOL_VERSION
1233     remote_version = nresult.get("version", None)
1234     test = not (remote_version and
1235                 isinstance(remote_version, (list, tuple)) and
1236                 len(remote_version) == 2)
1237     _ErrorIf(test, self.ENODERPC, node,
1238              "connection to node returned invalid data")
1239     if test:
1240       return False
1241
1242     test = local_version != remote_version[0]
1243     _ErrorIf(test, self.ENODEVERSION, node,
1244              "incompatible protocol versions: master %s,"
1245              " node %s", local_version, remote_version[0])
1246     if test:
1247       return False
1248
1249     # node seems compatible, we can actually try to look into its results
1250
1251     # full package version
1252     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1253                   self.ENODEVERSION, node,
1254                   "software version mismatch: master %s, node %s",
1255                   constants.RELEASE_VERSION, remote_version[1],
1256                   code=self.ETYPE_WARNING)
1257
1258     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1259     if isinstance(hyp_result, dict):
1260       for hv_name, hv_result in hyp_result.iteritems():
1261         test = hv_result is not None
1262         _ErrorIf(test, self.ENODEHV, node,
1263                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1264
1265
1266     test = nresult.get(constants.NV_NODESETUP,
1267                            ["Missing NODESETUP results"])
1268     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1269              "; ".join(test))
1270
1271     return True
1272
1273   def _VerifyNodeTime(self, ninfo, nresult,
1274                       nvinfo_starttime, nvinfo_endtime):
1275     """Check the node time.
1276
1277     @type ninfo: L{objects.Node}
1278     @param ninfo: the node to check
1279     @param nresult: the remote results for the node
1280     @param nvinfo_starttime: the start time of the RPC call
1281     @param nvinfo_endtime: the end time of the RPC call
1282
1283     """
1284     node = ninfo.name
1285     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1286
1287     ntime = nresult.get(constants.NV_TIME, None)
1288     try:
1289       ntime_merged = utils.MergeTime(ntime)
1290     except (ValueError, TypeError):
1291       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1292       return
1293
1294     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1295       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1296     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1297       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1298     else:
1299       ntime_diff = None
1300
1301     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1302              "Node time diverges by at least %s from master node time",
1303              ntime_diff)
1304
1305   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1306     """Check the node time.
1307
1308     @type ninfo: L{objects.Node}
1309     @param ninfo: the node to check
1310     @param nresult: the remote results for the node
1311     @param vg_name: the configured VG name
1312
1313     """
1314     if vg_name is None:
1315       return
1316
1317     node = ninfo.name
1318     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1319
1320     # checks vg existence and size > 20G
1321     vglist = nresult.get(constants.NV_VGLIST, None)
1322     test = not vglist
1323     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1324     if not test:
1325       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1326                                             constants.MIN_VG_SIZE)
1327       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1328
1329     # check pv names
1330     pvlist = nresult.get(constants.NV_PVLIST, None)
1331     test = pvlist is None
1332     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1333     if not test:
1334       # check that ':' is not present in PV names, since it's a
1335       # special character for lvcreate (denotes the range of PEs to
1336       # use on the PV)
1337       for _, pvname, owner_vg in pvlist:
1338         test = ":" in pvname
1339         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1340                  " '%s' of VG '%s'", pvname, owner_vg)
1341
1342   def _VerifyNodeNetwork(self, ninfo, nresult):
1343     """Check the node time.
1344
1345     @type ninfo: L{objects.Node}
1346     @param ninfo: the node to check
1347     @param nresult: the remote results for the node
1348
1349     """
1350     node = ninfo.name
1351     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1352
1353     test = constants.NV_NODELIST not in nresult
1354     _ErrorIf(test, self.ENODESSH, node,
1355              "node hasn't returned node ssh connectivity data")
1356     if not test:
1357       if nresult[constants.NV_NODELIST]:
1358         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1359           _ErrorIf(True, self.ENODESSH, node,
1360                    "ssh communication with node '%s': %s", a_node, a_msg)
1361
1362     test = constants.NV_NODENETTEST not in nresult
1363     _ErrorIf(test, self.ENODENET, node,
1364              "node hasn't returned node tcp connectivity data")
1365     if not test:
1366       if nresult[constants.NV_NODENETTEST]:
1367         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1368         for anode in nlist:
1369           _ErrorIf(True, self.ENODENET, node,
1370                    "tcp communication with node '%s': %s",
1371                    anode, nresult[constants.NV_NODENETTEST][anode])
1372
1373   def _VerifyInstance(self, instance, instanceconfig, node_image):
1374     """Verify an instance.
1375
1376     This function checks to see if the required block devices are
1377     available on the instance's node.
1378
1379     """
1380     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1381     node_current = instanceconfig.primary_node
1382
1383     node_vol_should = {}
1384     instanceconfig.MapLVsByNode(node_vol_should)
1385
1386     for node in node_vol_should:
1387       n_img = node_image[node]
1388       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1389         # ignore missing volumes on offline or broken nodes
1390         continue
1391       for volume in node_vol_should[node]:
1392         test = volume not in n_img.volumes
1393         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1394                  "volume %s missing on node %s", volume, node)
1395
1396     if instanceconfig.admin_up:
1397       pri_img = node_image[node_current]
1398       test = instance not in pri_img.instances and not pri_img.offline
1399       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1400                "instance not running on its primary node %s",
1401                node_current)
1402
1403     for node, n_img in node_image.items():
1404       if (not node == node_current):
1405         test = instance in n_img.instances
1406         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1407                  "instance should not run on node %s", node)
1408
1409   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1410     """Verify if there are any unknown volumes in the cluster.
1411
1412     The .os, .swap and backup volumes are ignored. All other volumes are
1413     reported as unknown.
1414
1415     """
1416     for node, n_img in node_image.items():
1417       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1418         # skip non-healthy nodes
1419         continue
1420       for volume in n_img.volumes:
1421         test = (node not in node_vol_should or
1422                 volume not in node_vol_should[node])
1423         self._ErrorIf(test, self.ENODEORPHANLV, node,
1424                       "volume %s is unknown", volume)
1425
1426   def _VerifyOrphanInstances(self, instancelist, node_image):
1427     """Verify the list of running instances.
1428
1429     This checks what instances are running but unknown to the cluster.
1430
1431     """
1432     for node, n_img in node_image.items():
1433       for o_inst in n_img.instances:
1434         test = o_inst not in instancelist
1435         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1436                       "instance %s on node %s should not exist", o_inst, node)
1437
1438   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1439     """Verify N+1 Memory Resilience.
1440
1441     Check that if one single node dies we can still start all the
1442     instances it was primary for.
1443
1444     """
1445     for node, n_img in node_image.items():
1446       # This code checks that every node which is now listed as
1447       # secondary has enough memory to host all instances it is
1448       # supposed to should a single other node in the cluster fail.
1449       # FIXME: not ready for failover to an arbitrary node
1450       # FIXME: does not support file-backed instances
1451       # WARNING: we currently take into account down instances as well
1452       # as up ones, considering that even if they're down someone
1453       # might want to start them even in the event of a node failure.
1454       for prinode, instances in n_img.sbp.items():
1455         needed_mem = 0
1456         for instance in instances:
1457           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1458           if bep[constants.BE_AUTO_BALANCE]:
1459             needed_mem += bep[constants.BE_MEMORY]
1460         test = n_img.mfree < needed_mem
1461         self._ErrorIf(test, self.ENODEN1, node,
1462                       "not enough memory on to accommodate"
1463                       " failovers should peer node %s fail", prinode)
1464
1465   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1466                        master_files):
1467     """Verifies and computes the node required file checksums.
1468
1469     @type ninfo: L{objects.Node}
1470     @param ninfo: the node to check
1471     @param nresult: the remote results for the node
1472     @param file_list: required list of files
1473     @param local_cksum: dictionary of local files and their checksums
1474     @param master_files: list of files that only masters should have
1475
1476     """
1477     node = ninfo.name
1478     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1479
1480     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1481     test = not isinstance(remote_cksum, dict)
1482     _ErrorIf(test, self.ENODEFILECHECK, node,
1483              "node hasn't returned file checksum data")
1484     if test:
1485       return
1486
1487     for file_name in file_list:
1488       node_is_mc = ninfo.master_candidate
1489       must_have = (file_name not in master_files) or node_is_mc
1490       # missing
1491       test1 = file_name not in remote_cksum
1492       # invalid checksum
1493       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1494       # existing and good
1495       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1496       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1497                "file '%s' missing", file_name)
1498       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1499                "file '%s' has wrong checksum", file_name)
1500       # not candidate and this is not a must-have file
1501       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1502                "file '%s' should not exist on non master"
1503                " candidates (and the file is outdated)", file_name)
1504       # all good, except non-master/non-must have combination
1505       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1506                "file '%s' should not exist"
1507                " on non master candidates", file_name)
1508
1509   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1510     """Verifies and the node DRBD status.
1511
1512     @type ninfo: L{objects.Node}
1513     @param ninfo: the node to check
1514     @param nresult: the remote results for the node
1515     @param instanceinfo: the dict of instances
1516     @param drbd_map: the DRBD map as returned by
1517         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1518
1519     """
1520     node = ninfo.name
1521     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1522
1523     # compute the DRBD minors
1524     node_drbd = {}
1525     for minor, instance in drbd_map[node].items():
1526       test = instance not in instanceinfo
1527       _ErrorIf(test, self.ECLUSTERCFG, None,
1528                "ghost instance '%s' in temporary DRBD map", instance)
1529         # ghost instance should not be running, but otherwise we
1530         # don't give double warnings (both ghost instance and
1531         # unallocated minor in use)
1532       if test:
1533         node_drbd[minor] = (instance, False)
1534       else:
1535         instance = instanceinfo[instance]
1536         node_drbd[minor] = (instance.name, instance.admin_up)
1537
1538     # and now check them
1539     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1540     test = not isinstance(used_minors, (tuple, list))
1541     _ErrorIf(test, self.ENODEDRBD, node,
1542              "cannot parse drbd status file: %s", str(used_minors))
1543     if test:
1544       # we cannot check drbd status
1545       return
1546
1547     for minor, (iname, must_exist) in node_drbd.items():
1548       test = minor not in used_minors and must_exist
1549       _ErrorIf(test, self.ENODEDRBD, node,
1550                "drbd minor %d of instance %s is not active", minor, iname)
1551     for minor in used_minors:
1552       test = minor not in node_drbd
1553       _ErrorIf(test, self.ENODEDRBD, node,
1554                "unallocated drbd minor %d is in use", minor)
1555
1556   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1557     """Verifies and updates the node volume data.
1558
1559     This function will update a L{NodeImage}'s internal structures
1560     with data from the remote call.
1561
1562     @type ninfo: L{objects.Node}
1563     @param ninfo: the node to check
1564     @param nresult: the remote results for the node
1565     @param nimg: the node image object
1566     @param vg_name: the configured VG name
1567
1568     """
1569     node = ninfo.name
1570     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1571
1572     nimg.lvm_fail = True
1573     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1574     if vg_name is None:
1575       pass
1576     elif isinstance(lvdata, basestring):
1577       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1578                utils.SafeEncode(lvdata))
1579     elif not isinstance(lvdata, dict):
1580       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1581     else:
1582       nimg.volumes = lvdata
1583       nimg.lvm_fail = False
1584
1585   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1586     """Verifies and updates the node instance list.
1587
1588     If the listing was successful, then updates this node's instance
1589     list. Otherwise, it marks the RPC call as failed for the instance
1590     list key.
1591
1592     @type ninfo: L{objects.Node}
1593     @param ninfo: the node to check
1594     @param nresult: the remote results for the node
1595     @param nimg: the node image object
1596
1597     """
1598     idata = nresult.get(constants.NV_INSTANCELIST, None)
1599     test = not isinstance(idata, list)
1600     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1601                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1602     if test:
1603       nimg.hyp_fail = True
1604     else:
1605       nimg.instances = idata
1606
1607   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1608     """Verifies and computes a node information map
1609
1610     @type ninfo: L{objects.Node}
1611     @param ninfo: the node to check
1612     @param nresult: the remote results for the node
1613     @param nimg: the node image object
1614     @param vg_name: the configured VG name
1615
1616     """
1617     node = ninfo.name
1618     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1619
1620     # try to read free memory (from the hypervisor)
1621     hv_info = nresult.get(constants.NV_HVINFO, None)
1622     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1623     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1624     if not test:
1625       try:
1626         nimg.mfree = int(hv_info["memory_free"])
1627       except (ValueError, TypeError):
1628         _ErrorIf(True, self.ENODERPC, node,
1629                  "node returned invalid nodeinfo, check hypervisor")
1630
1631     # FIXME: devise a free space model for file based instances as well
1632     if vg_name is not None:
1633       test = (constants.NV_VGLIST not in nresult or
1634               vg_name not in nresult[constants.NV_VGLIST])
1635       _ErrorIf(test, self.ENODELVM, node,
1636                "node didn't return data for the volume group '%s'"
1637                " - it is either missing or broken", vg_name)
1638       if not test:
1639         try:
1640           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1641         except (ValueError, TypeError):
1642           _ErrorIf(True, self.ENODERPC, node,
1643                    "node returned invalid LVM info, check LVM status")
1644
1645   def CheckPrereq(self):
1646     """Check prerequisites.
1647
1648     Transform the list of checks we're going to skip into a set and check that
1649     all its members are valid.
1650
1651     """
1652     self.skip_set = frozenset(self.op.skip_checks)
1653     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1654       raise errors.OpPrereqError("Invalid checks to be skipped specified",
1655                                  errors.ECODE_INVAL)
1656
1657   def BuildHooksEnv(self):
1658     """Build hooks env.
1659
1660     Cluster-Verify hooks just ran in the post phase and their failure makes
1661     the output be logged in the verify output and the verification to fail.
1662
1663     """
1664     all_nodes = self.cfg.GetNodeList()
1665     env = {
1666       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1667       }
1668     for node in self.cfg.GetAllNodesInfo().values():
1669       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1670
1671     return env, [], all_nodes
1672
1673   def Exec(self, feedback_fn):
1674     """Verify integrity of cluster, performing various test on nodes.
1675
1676     """
1677     self.bad = False
1678     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1679     verbose = self.op.verbose
1680     self._feedback_fn = feedback_fn
1681     feedback_fn("* Verifying global settings")
1682     for msg in self.cfg.VerifyConfig():
1683       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1684
1685     # Check the cluster certificates
1686     for cert_filename in constants.ALL_CERT_FILES:
1687       (errcode, msg) = _VerifyCertificate(cert_filename)
1688       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1689
1690     vg_name = self.cfg.GetVGName()
1691     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1692     cluster = self.cfg.GetClusterInfo()
1693     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1694     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1695     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1696     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1697                         for iname in instancelist)
1698     i_non_redundant = [] # Non redundant instances
1699     i_non_a_balanced = [] # Non auto-balanced instances
1700     n_offline = 0 # Count of offline nodes
1701     n_drained = 0 # Count of nodes being drained
1702     node_vol_should = {}
1703
1704     # FIXME: verify OS list
1705     # do local checksums
1706     master_files = [constants.CLUSTER_CONF_FILE]
1707
1708     file_names = ssconf.SimpleStore().GetFileList()
1709     file_names.extend(constants.ALL_CERT_FILES)
1710     file_names.extend(master_files)
1711     if cluster.modify_etc_hosts:
1712       file_names.append(constants.ETC_HOSTS)
1713
1714     local_checksums = utils.FingerprintFiles(file_names)
1715
1716     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1717     node_verify_param = {
1718       constants.NV_FILELIST: file_names,
1719       constants.NV_NODELIST: [node.name for node in nodeinfo
1720                               if not node.offline],
1721       constants.NV_HYPERVISOR: hypervisors,
1722       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1723                                   node.secondary_ip) for node in nodeinfo
1724                                  if not node.offline],
1725       constants.NV_INSTANCELIST: hypervisors,
1726       constants.NV_VERSION: None,
1727       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1728       constants.NV_NODESETUP: None,
1729       constants.NV_TIME: None,
1730       }
1731
1732     if vg_name is not None:
1733       node_verify_param[constants.NV_VGLIST] = None
1734       node_verify_param[constants.NV_LVLIST] = vg_name
1735       node_verify_param[constants.NV_PVLIST] = [vg_name]
1736       node_verify_param[constants.NV_DRBDLIST] = None
1737
1738     # Build our expected cluster state
1739     node_image = dict((node.name, self.NodeImage(offline=node.offline))
1740                       for node in nodeinfo)
1741
1742     for instance in instancelist:
1743       inst_config = instanceinfo[instance]
1744
1745       for nname in inst_config.all_nodes:
1746         if nname not in node_image:
1747           # ghost node
1748           gnode = self.NodeImage()
1749           gnode.ghost = True
1750           node_image[nname] = gnode
1751
1752       inst_config.MapLVsByNode(node_vol_should)
1753
1754       pnode = inst_config.primary_node
1755       node_image[pnode].pinst.append(instance)
1756
1757       for snode in inst_config.secondary_nodes:
1758         nimg = node_image[snode]
1759         nimg.sinst.append(instance)
1760         if pnode not in nimg.sbp:
1761           nimg.sbp[pnode] = []
1762         nimg.sbp[pnode].append(instance)
1763
1764     # At this point, we have the in-memory data structures complete,
1765     # except for the runtime information, which we'll gather next
1766
1767     # Due to the way our RPC system works, exact response times cannot be
1768     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1769     # time before and after executing the request, we can at least have a time
1770     # window.
1771     nvinfo_starttime = time.time()
1772     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1773                                            self.cfg.GetClusterName())
1774     nvinfo_endtime = time.time()
1775
1776     master_node = self.cfg.GetMasterNode()
1777     all_drbd_map = self.cfg.ComputeDRBDMap()
1778
1779     feedback_fn("* Verifying node status")
1780     for node_i in nodeinfo:
1781       node = node_i.name
1782       nimg = node_image[node]
1783
1784       if node_i.offline:
1785         if verbose:
1786           feedback_fn("* Skipping offline node %s" % (node,))
1787         n_offline += 1
1788         continue
1789
1790       if node == master_node:
1791         ntype = "master"
1792       elif node_i.master_candidate:
1793         ntype = "master candidate"
1794       elif node_i.drained:
1795         ntype = "drained"
1796         n_drained += 1
1797       else:
1798         ntype = "regular"
1799       if verbose:
1800         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1801
1802       msg = all_nvinfo[node].fail_msg
1803       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1804       if msg:
1805         nimg.rpc_fail = True
1806         continue
1807
1808       nresult = all_nvinfo[node].payload
1809
1810       nimg.call_ok = self._VerifyNode(node_i, nresult)
1811       self._VerifyNodeNetwork(node_i, nresult)
1812       self._VerifyNodeLVM(node_i, nresult, vg_name)
1813       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1814                             master_files)
1815       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1816       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1817
1818       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1819       self._UpdateNodeInstances(node_i, nresult, nimg)
1820       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1821
1822     feedback_fn("* Verifying instance status")
1823     for instance in instancelist:
1824       if verbose:
1825         feedback_fn("* Verifying instance %s" % instance)
1826       inst_config = instanceinfo[instance]
1827       self._VerifyInstance(instance, inst_config, node_image)
1828       inst_nodes_offline = []
1829
1830       pnode = inst_config.primary_node
1831       pnode_img = node_image[pnode]
1832       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1833                self.ENODERPC, pnode, "instance %s, connection to"
1834                " primary node failed", instance)
1835
1836       if pnode_img.offline:
1837         inst_nodes_offline.append(pnode)
1838
1839       # If the instance is non-redundant we cannot survive losing its primary
1840       # node, so we are not N+1 compliant. On the other hand we have no disk
1841       # templates with more than one secondary so that situation is not well
1842       # supported either.
1843       # FIXME: does not support file-backed instances
1844       if not inst_config.secondary_nodes:
1845         i_non_redundant.append(instance)
1846       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1847                instance, "instance has multiple secondary nodes: %s",
1848                utils.CommaJoin(inst_config.secondary_nodes),
1849                code=self.ETYPE_WARNING)
1850
1851       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1852         i_non_a_balanced.append(instance)
1853
1854       for snode in inst_config.secondary_nodes:
1855         s_img = node_image[snode]
1856         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1857                  "instance %s, connection to secondary node failed", instance)
1858
1859         if s_img.offline:
1860           inst_nodes_offline.append(snode)
1861
1862       # warn that the instance lives on offline nodes
1863       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1864                "instance lives on offline node(s) %s",
1865                utils.CommaJoin(inst_nodes_offline))
1866       # ... or ghost nodes
1867       for node in inst_config.all_nodes:
1868         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1869                  "instance lives on ghost node %s", node)
1870
1871     feedback_fn("* Verifying orphan volumes")
1872     self._VerifyOrphanVolumes(node_vol_should, node_image)
1873
1874     feedback_fn("* Verifying oprhan instances")
1875     self._VerifyOrphanInstances(instancelist, node_image)
1876
1877     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1878       feedback_fn("* Verifying N+1 Memory redundancy")
1879       self._VerifyNPlusOneMemory(node_image, instanceinfo)
1880
1881     feedback_fn("* Other Notes")
1882     if i_non_redundant:
1883       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1884                   % len(i_non_redundant))
1885
1886     if i_non_a_balanced:
1887       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1888                   % len(i_non_a_balanced))
1889
1890     if n_offline:
1891       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1892
1893     if n_drained:
1894       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1895
1896     return not self.bad
1897
1898   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1899     """Analyze the post-hooks' result
1900
1901     This method analyses the hook result, handles it, and sends some
1902     nicely-formatted feedback back to the user.
1903
1904     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1905         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1906     @param hooks_results: the results of the multi-node hooks rpc call
1907     @param feedback_fn: function used send feedback back to the caller
1908     @param lu_result: previous Exec result
1909     @return: the new Exec result, based on the previous result
1910         and hook results
1911
1912     """
1913     # We only really run POST phase hooks, and are only interested in
1914     # their results
1915     if phase == constants.HOOKS_PHASE_POST:
1916       # Used to change hooks' output to proper indentation
1917       indent_re = re.compile('^', re.M)
1918       feedback_fn("* Hooks Results")
1919       assert hooks_results, "invalid result from hooks"
1920
1921       for node_name in hooks_results:
1922         res = hooks_results[node_name]
1923         msg = res.fail_msg
1924         test = msg and not res.offline
1925         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1926                       "Communication failure in hooks execution: %s", msg)
1927         if res.offline or msg:
1928           # No need to investigate payload if node is offline or gave an error.
1929           # override manually lu_result here as _ErrorIf only
1930           # overrides self.bad
1931           lu_result = 1
1932           continue
1933         for script, hkr, output in res.payload:
1934           test = hkr == constants.HKR_FAIL
1935           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1936                         "Script %s failed, output:", script)
1937           if test:
1938             output = indent_re.sub('      ', output)
1939             feedback_fn("%s" % output)
1940             lu_result = 0
1941
1942       return lu_result
1943
1944
1945 class LUVerifyDisks(NoHooksLU):
1946   """Verifies the cluster disks status.
1947
1948   """
1949   _OP_REQP = []
1950   REQ_BGL = False
1951
1952   def ExpandNames(self):
1953     self.needed_locks = {
1954       locking.LEVEL_NODE: locking.ALL_SET,
1955       locking.LEVEL_INSTANCE: locking.ALL_SET,
1956     }
1957     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1958
1959   def CheckPrereq(self):
1960     """Check prerequisites.
1961
1962     This has no prerequisites.
1963
1964     """
1965     pass
1966
1967   def Exec(self, feedback_fn):
1968     """Verify integrity of cluster disks.
1969
1970     @rtype: tuple of three items
1971     @return: a tuple of (dict of node-to-node_error, list of instances
1972         which need activate-disks, dict of instance: (node, volume) for
1973         missing volumes
1974
1975     """
1976     result = res_nodes, res_instances, res_missing = {}, [], {}
1977
1978     vg_name = self.cfg.GetVGName()
1979     nodes = utils.NiceSort(self.cfg.GetNodeList())
1980     instances = [self.cfg.GetInstanceInfo(name)
1981                  for name in self.cfg.GetInstanceList()]
1982
1983     nv_dict = {}
1984     for inst in instances:
1985       inst_lvs = {}
1986       if (not inst.admin_up or
1987           inst.disk_template not in constants.DTS_NET_MIRROR):
1988         continue
1989       inst.MapLVsByNode(inst_lvs)
1990       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1991       for node, vol_list in inst_lvs.iteritems():
1992         for vol in vol_list:
1993           nv_dict[(node, vol)] = inst
1994
1995     if not nv_dict:
1996       return result
1997
1998     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1999
2000     for node in nodes:
2001       # node_volume
2002       node_res = node_lvs[node]
2003       if node_res.offline:
2004         continue
2005       msg = node_res.fail_msg
2006       if msg:
2007         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2008         res_nodes[node] = msg
2009         continue
2010
2011       lvs = node_res.payload
2012       for lv_name, (_, _, lv_online) in lvs.items():
2013         inst = nv_dict.pop((node, lv_name), None)
2014         if (not lv_online and inst is not None
2015             and inst.name not in res_instances):
2016           res_instances.append(inst.name)
2017
2018     # any leftover items in nv_dict are missing LVs, let's arrange the
2019     # data better
2020     for key, inst in nv_dict.iteritems():
2021       if inst.name not in res_missing:
2022         res_missing[inst.name] = []
2023       res_missing[inst.name].append(key)
2024
2025     return result
2026
2027
2028 class LURepairDiskSizes(NoHooksLU):
2029   """Verifies the cluster disks sizes.
2030
2031   """
2032   _OP_REQP = ["instances"]
2033   REQ_BGL = False
2034
2035   def ExpandNames(self):
2036     if not isinstance(self.op.instances, list):
2037       raise errors.OpPrereqError("Invalid argument type 'instances'",
2038                                  errors.ECODE_INVAL)
2039
2040     if self.op.instances:
2041       self.wanted_names = []
2042       for name in self.op.instances:
2043         full_name = _ExpandInstanceName(self.cfg, name)
2044         self.wanted_names.append(full_name)
2045       self.needed_locks = {
2046         locking.LEVEL_NODE: [],
2047         locking.LEVEL_INSTANCE: self.wanted_names,
2048         }
2049       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2050     else:
2051       self.wanted_names = None
2052       self.needed_locks = {
2053         locking.LEVEL_NODE: locking.ALL_SET,
2054         locking.LEVEL_INSTANCE: locking.ALL_SET,
2055         }
2056     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2057
2058   def DeclareLocks(self, level):
2059     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2060       self._LockInstancesNodes(primary_only=True)
2061
2062   def CheckPrereq(self):
2063     """Check prerequisites.
2064
2065     This only checks the optional instance list against the existing names.
2066
2067     """
2068     if self.wanted_names is None:
2069       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2070
2071     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2072                              in self.wanted_names]
2073
2074   def _EnsureChildSizes(self, disk):
2075     """Ensure children of the disk have the needed disk size.
2076
2077     This is valid mainly for DRBD8 and fixes an issue where the
2078     children have smaller disk size.
2079
2080     @param disk: an L{ganeti.objects.Disk} object
2081
2082     """
2083     if disk.dev_type == constants.LD_DRBD8:
2084       assert disk.children, "Empty children for DRBD8?"
2085       fchild = disk.children[0]
2086       mismatch = fchild.size < disk.size
2087       if mismatch:
2088         self.LogInfo("Child disk has size %d, parent %d, fixing",
2089                      fchild.size, disk.size)
2090         fchild.size = disk.size
2091
2092       # and we recurse on this child only, not on the metadev
2093       return self._EnsureChildSizes(fchild) or mismatch
2094     else:
2095       return False
2096
2097   def Exec(self, feedback_fn):
2098     """Verify the size of cluster disks.
2099
2100     """
2101     # TODO: check child disks too
2102     # TODO: check differences in size between primary/secondary nodes
2103     per_node_disks = {}
2104     for instance in self.wanted_instances:
2105       pnode = instance.primary_node
2106       if pnode not in per_node_disks:
2107         per_node_disks[pnode] = []
2108       for idx, disk in enumerate(instance.disks):
2109         per_node_disks[pnode].append((instance, idx, disk))
2110
2111     changed = []
2112     for node, dskl in per_node_disks.items():
2113       newl = [v[2].Copy() for v in dskl]
2114       for dsk in newl:
2115         self.cfg.SetDiskID(dsk, node)
2116       result = self.rpc.call_blockdev_getsizes(node, newl)
2117       if result.fail_msg:
2118         self.LogWarning("Failure in blockdev_getsizes call to node"
2119                         " %s, ignoring", node)
2120         continue
2121       if len(result.data) != len(dskl):
2122         self.LogWarning("Invalid result from node %s, ignoring node results",
2123                         node)
2124         continue
2125       for ((instance, idx, disk), size) in zip(dskl, result.data):
2126         if size is None:
2127           self.LogWarning("Disk %d of instance %s did not return size"
2128                           " information, ignoring", idx, instance.name)
2129           continue
2130         if not isinstance(size, (int, long)):
2131           self.LogWarning("Disk %d of instance %s did not return valid"
2132                           " size information, ignoring", idx, instance.name)
2133           continue
2134         size = size >> 20
2135         if size != disk.size:
2136           self.LogInfo("Disk %d of instance %s has mismatched size,"
2137                        " correcting: recorded %d, actual %d", idx,
2138                        instance.name, disk.size, size)
2139           disk.size = size
2140           self.cfg.Update(instance, feedback_fn)
2141           changed.append((instance.name, idx, size))
2142         if self._EnsureChildSizes(disk):
2143           self.cfg.Update(instance, feedback_fn)
2144           changed.append((instance.name, idx, disk.size))
2145     return changed
2146
2147
2148 class LURenameCluster(LogicalUnit):
2149   """Rename the cluster.
2150
2151   """
2152   HPATH = "cluster-rename"
2153   HTYPE = constants.HTYPE_CLUSTER
2154   _OP_REQP = ["name"]
2155
2156   def BuildHooksEnv(self):
2157     """Build hooks env.
2158
2159     """
2160     env = {
2161       "OP_TARGET": self.cfg.GetClusterName(),
2162       "NEW_NAME": self.op.name,
2163       }
2164     mn = self.cfg.GetMasterNode()
2165     all_nodes = self.cfg.GetNodeList()
2166     return env, [mn], all_nodes
2167
2168   def CheckPrereq(self):
2169     """Verify that the passed name is a valid one.
2170
2171     """
2172     hostname = utils.GetHostInfo(self.op.name)
2173
2174     new_name = hostname.name
2175     self.ip = new_ip = hostname.ip
2176     old_name = self.cfg.GetClusterName()
2177     old_ip = self.cfg.GetMasterIP()
2178     if new_name == old_name and new_ip == old_ip:
2179       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2180                                  " cluster has changed",
2181                                  errors.ECODE_INVAL)
2182     if new_ip != old_ip:
2183       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2184         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2185                                    " reachable on the network. Aborting." %
2186                                    new_ip, errors.ECODE_NOTUNIQUE)
2187
2188     self.op.name = new_name
2189
2190   def Exec(self, feedback_fn):
2191     """Rename the cluster.
2192
2193     """
2194     clustername = self.op.name
2195     ip = self.ip
2196
2197     # shutdown the master IP
2198     master = self.cfg.GetMasterNode()
2199     result = self.rpc.call_node_stop_master(master, False)
2200     result.Raise("Could not disable the master role")
2201
2202     try:
2203       cluster = self.cfg.GetClusterInfo()
2204       cluster.cluster_name = clustername
2205       cluster.master_ip = ip
2206       self.cfg.Update(cluster, feedback_fn)
2207
2208       # update the known hosts file
2209       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2210       node_list = self.cfg.GetNodeList()
2211       try:
2212         node_list.remove(master)
2213       except ValueError:
2214         pass
2215       result = self.rpc.call_upload_file(node_list,
2216                                          constants.SSH_KNOWN_HOSTS_FILE)
2217       for to_node, to_result in result.iteritems():
2218         msg = to_result.fail_msg
2219         if msg:
2220           msg = ("Copy of file %s to node %s failed: %s" %
2221                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2222           self.proc.LogWarning(msg)
2223
2224     finally:
2225       result = self.rpc.call_node_start_master(master, False, False)
2226       msg = result.fail_msg
2227       if msg:
2228         self.LogWarning("Could not re-enable the master role on"
2229                         " the master, please restart manually: %s", msg)
2230
2231
2232 def _RecursiveCheckIfLVMBased(disk):
2233   """Check if the given disk or its children are lvm-based.
2234
2235   @type disk: L{objects.Disk}
2236   @param disk: the disk to check
2237   @rtype: boolean
2238   @return: boolean indicating whether a LD_LV dev_type was found or not
2239
2240   """
2241   if disk.children:
2242     for chdisk in disk.children:
2243       if _RecursiveCheckIfLVMBased(chdisk):
2244         return True
2245   return disk.dev_type == constants.LD_LV
2246
2247
2248 class LUSetClusterParams(LogicalUnit):
2249   """Change the parameters of the cluster.
2250
2251   """
2252   HPATH = "cluster-modify"
2253   HTYPE = constants.HTYPE_CLUSTER
2254   _OP_REQP = []
2255   REQ_BGL = False
2256
2257   def CheckArguments(self):
2258     """Check parameters
2259
2260     """
2261     for attr in ["candidate_pool_size",
2262                  "uid_pool", "add_uids", "remove_uids"]:
2263       if not hasattr(self.op, attr):
2264         setattr(self.op, attr, None)
2265
2266     if self.op.candidate_pool_size is not None:
2267       try:
2268         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2269       except (ValueError, TypeError), err:
2270         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2271                                    str(err), errors.ECODE_INVAL)
2272       if self.op.candidate_pool_size < 1:
2273         raise errors.OpPrereqError("At least one master candidate needed",
2274                                    errors.ECODE_INVAL)
2275
2276     _CheckBooleanOpField(self.op, "maintain_node_health")
2277
2278     if self.op.uid_pool:
2279       uidpool.CheckUidPool(self.op.uid_pool)
2280
2281     if self.op.add_uids:
2282       uidpool.CheckUidPool(self.op.add_uids)
2283
2284     if self.op.remove_uids:
2285       uidpool.CheckUidPool(self.op.remove_uids)
2286
2287   def ExpandNames(self):
2288     # FIXME: in the future maybe other cluster params won't require checking on
2289     # all nodes to be modified.
2290     self.needed_locks = {
2291       locking.LEVEL_NODE: locking.ALL_SET,
2292     }
2293     self.share_locks[locking.LEVEL_NODE] = 1
2294
2295   def BuildHooksEnv(self):
2296     """Build hooks env.
2297
2298     """
2299     env = {
2300       "OP_TARGET": self.cfg.GetClusterName(),
2301       "NEW_VG_NAME": self.op.vg_name,
2302       }
2303     mn = self.cfg.GetMasterNode()
2304     return env, [mn], [mn]
2305
2306   def CheckPrereq(self):
2307     """Check prerequisites.
2308
2309     This checks whether the given params don't conflict and
2310     if the given volume group is valid.
2311
2312     """
2313     if self.op.vg_name is not None and not self.op.vg_name:
2314       instances = self.cfg.GetAllInstancesInfo().values()
2315       for inst in instances:
2316         for disk in inst.disks:
2317           if _RecursiveCheckIfLVMBased(disk):
2318             raise errors.OpPrereqError("Cannot disable lvm storage while"
2319                                        " lvm-based instances exist",
2320                                        errors.ECODE_INVAL)
2321
2322     node_list = self.acquired_locks[locking.LEVEL_NODE]
2323
2324     # if vg_name not None, checks given volume group on all nodes
2325     if self.op.vg_name:
2326       vglist = self.rpc.call_vg_list(node_list)
2327       for node in node_list:
2328         msg = vglist[node].fail_msg
2329         if msg:
2330           # ignoring down node
2331           self.LogWarning("Error while gathering data on node %s"
2332                           " (ignoring node): %s", node, msg)
2333           continue
2334         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2335                                               self.op.vg_name,
2336                                               constants.MIN_VG_SIZE)
2337         if vgstatus:
2338           raise errors.OpPrereqError("Error on node '%s': %s" %
2339                                      (node, vgstatus), errors.ECODE_ENVIRON)
2340
2341     self.cluster = cluster = self.cfg.GetClusterInfo()
2342     # validate params changes
2343     if self.op.beparams:
2344       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2345       self.new_beparams = objects.FillDict(
2346         cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2347
2348     if self.op.nicparams:
2349       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2350       self.new_nicparams = objects.FillDict(
2351         cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2352       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2353       nic_errors = []
2354
2355       # check all instances for consistency
2356       for instance in self.cfg.GetAllInstancesInfo().values():
2357         for nic_idx, nic in enumerate(instance.nics):
2358           params_copy = copy.deepcopy(nic.nicparams)
2359           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2360
2361           # check parameter syntax
2362           try:
2363             objects.NIC.CheckParameterSyntax(params_filled)
2364           except errors.ConfigurationError, err:
2365             nic_errors.append("Instance %s, nic/%d: %s" %
2366                               (instance.name, nic_idx, err))
2367
2368           # if we're moving instances to routed, check that they have an ip
2369           target_mode = params_filled[constants.NIC_MODE]
2370           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2371             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2372                               (instance.name, nic_idx))
2373       if nic_errors:
2374         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2375                                    "\n".join(nic_errors))
2376
2377     # hypervisor list/parameters
2378     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2379     if self.op.hvparams:
2380       if not isinstance(self.op.hvparams, dict):
2381         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2382                                    errors.ECODE_INVAL)
2383       for hv_name, hv_dict in self.op.hvparams.items():
2384         if hv_name not in self.new_hvparams:
2385           self.new_hvparams[hv_name] = hv_dict
2386         else:
2387           self.new_hvparams[hv_name].update(hv_dict)
2388
2389     # os hypervisor parameters
2390     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2391     if self.op.os_hvp:
2392       if not isinstance(self.op.os_hvp, dict):
2393         raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2394                                    errors.ECODE_INVAL)
2395       for os_name, hvs in self.op.os_hvp.items():
2396         if not isinstance(hvs, dict):
2397           raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2398                                       " input"), errors.ECODE_INVAL)
2399         if os_name not in self.new_os_hvp:
2400           self.new_os_hvp[os_name] = hvs
2401         else:
2402           for hv_name, hv_dict in hvs.items():
2403             if hv_name not in self.new_os_hvp[os_name]:
2404               self.new_os_hvp[os_name][hv_name] = hv_dict
2405             else:
2406               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2407
2408     # changes to the hypervisor list
2409     if self.op.enabled_hypervisors is not None:
2410       self.hv_list = self.op.enabled_hypervisors
2411       if not self.hv_list:
2412         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2413                                    " least one member",
2414                                    errors.ECODE_INVAL)
2415       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2416       if invalid_hvs:
2417         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2418                                    " entries: %s" %
2419                                    utils.CommaJoin(invalid_hvs),
2420                                    errors.ECODE_INVAL)
2421       for hv in self.hv_list:
2422         # if the hypervisor doesn't already exist in the cluster
2423         # hvparams, we initialize it to empty, and then (in both
2424         # cases) we make sure to fill the defaults, as we might not
2425         # have a complete defaults list if the hypervisor wasn't
2426         # enabled before
2427         if hv not in new_hvp:
2428           new_hvp[hv] = {}
2429         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2430         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2431     else:
2432       self.hv_list = cluster.enabled_hypervisors
2433
2434     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2435       # either the enabled list has changed, or the parameters have, validate
2436       for hv_name, hv_params in self.new_hvparams.items():
2437         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2438             (self.op.enabled_hypervisors and
2439              hv_name in self.op.enabled_hypervisors)):
2440           # either this is a new hypervisor, or its parameters have changed
2441           hv_class = hypervisor.GetHypervisor(hv_name)
2442           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2443           hv_class.CheckParameterSyntax(hv_params)
2444           _CheckHVParams(self, node_list, hv_name, hv_params)
2445
2446     if self.op.os_hvp:
2447       # no need to check any newly-enabled hypervisors, since the
2448       # defaults have already been checked in the above code-block
2449       for os_name, os_hvp in self.new_os_hvp.items():
2450         for hv_name, hv_params in os_hvp.items():
2451           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2452           # we need to fill in the new os_hvp on top of the actual hv_p
2453           cluster_defaults = self.new_hvparams.get(hv_name, {})
2454           new_osp = objects.FillDict(cluster_defaults, hv_params)
2455           hv_class = hypervisor.GetHypervisor(hv_name)
2456           hv_class.CheckParameterSyntax(new_osp)
2457           _CheckHVParams(self, node_list, hv_name, new_osp)
2458
2459
2460   def Exec(self, feedback_fn):
2461     """Change the parameters of the cluster.
2462
2463     """
2464     if self.op.vg_name is not None:
2465       new_volume = self.op.vg_name
2466       if not new_volume:
2467         new_volume = None
2468       if new_volume != self.cfg.GetVGName():
2469         self.cfg.SetVGName(new_volume)
2470       else:
2471         feedback_fn("Cluster LVM configuration already in desired"
2472                     " state, not changing")
2473     if self.op.hvparams:
2474       self.cluster.hvparams = self.new_hvparams
2475     if self.op.os_hvp:
2476       self.cluster.os_hvp = self.new_os_hvp
2477     if self.op.enabled_hypervisors is not None:
2478       self.cluster.hvparams = self.new_hvparams
2479       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2480     if self.op.beparams:
2481       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2482     if self.op.nicparams:
2483       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2484
2485     if self.op.candidate_pool_size is not None:
2486       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2487       # we need to update the pool size here, otherwise the save will fail
2488       _AdjustCandidatePool(self, [])
2489
2490     if self.op.maintain_node_health is not None:
2491       self.cluster.maintain_node_health = self.op.maintain_node_health
2492
2493     if self.op.add_uids is not None:
2494       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2495
2496     if self.op.remove_uids is not None:
2497       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2498
2499     if self.op.uid_pool is not None:
2500       self.cluster.uid_pool = self.op.uid_pool
2501
2502     self.cfg.Update(self.cluster, feedback_fn)
2503
2504
2505 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2506   """Distribute additional files which are part of the cluster configuration.
2507
2508   ConfigWriter takes care of distributing the config and ssconf files, but
2509   there are more files which should be distributed to all nodes. This function
2510   makes sure those are copied.
2511
2512   @param lu: calling logical unit
2513   @param additional_nodes: list of nodes not in the config to distribute to
2514
2515   """
2516   # 1. Gather target nodes
2517   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2518   dist_nodes = lu.cfg.GetOnlineNodeList()
2519   if additional_nodes is not None:
2520     dist_nodes.extend(additional_nodes)
2521   if myself.name in dist_nodes:
2522     dist_nodes.remove(myself.name)
2523
2524   # 2. Gather files to distribute
2525   dist_files = set([constants.ETC_HOSTS,
2526                     constants.SSH_KNOWN_HOSTS_FILE,
2527                     constants.RAPI_CERT_FILE,
2528                     constants.RAPI_USERS_FILE,
2529                     constants.CONFD_HMAC_KEY,
2530                    ])
2531
2532   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2533   for hv_name in enabled_hypervisors:
2534     hv_class = hypervisor.GetHypervisor(hv_name)
2535     dist_files.update(hv_class.GetAncillaryFiles())
2536
2537   # 3. Perform the files upload
2538   for fname in dist_files:
2539     if os.path.exists(fname):
2540       result = lu.rpc.call_upload_file(dist_nodes, fname)
2541       for to_node, to_result in result.items():
2542         msg = to_result.fail_msg
2543         if msg:
2544           msg = ("Copy of file %s to node %s failed: %s" %
2545                  (fname, to_node, msg))
2546           lu.proc.LogWarning(msg)
2547
2548
2549 class LURedistributeConfig(NoHooksLU):
2550   """Force the redistribution of cluster configuration.
2551
2552   This is a very simple LU.
2553
2554   """
2555   _OP_REQP = []
2556   REQ_BGL = False
2557
2558   def ExpandNames(self):
2559     self.needed_locks = {
2560       locking.LEVEL_NODE: locking.ALL_SET,
2561     }
2562     self.share_locks[locking.LEVEL_NODE] = 1
2563
2564   def CheckPrereq(self):
2565     """Check prerequisites.
2566
2567     """
2568
2569   def Exec(self, feedback_fn):
2570     """Redistribute the configuration.
2571
2572     """
2573     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2574     _RedistributeAncillaryFiles(self)
2575
2576
2577 def _WaitForSync(lu, instance, oneshot=False):
2578   """Sleep and poll for an instance's disk to sync.
2579
2580   """
2581   if not instance.disks:
2582     return True
2583
2584   if not oneshot:
2585     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2586
2587   node = instance.primary_node
2588
2589   for dev in instance.disks:
2590     lu.cfg.SetDiskID(dev, node)
2591
2592   # TODO: Convert to utils.Retry
2593
2594   retries = 0
2595   degr_retries = 10 # in seconds, as we sleep 1 second each time
2596   while True:
2597     max_time = 0
2598     done = True
2599     cumul_degraded = False
2600     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2601     msg = rstats.fail_msg
2602     if msg:
2603       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2604       retries += 1
2605       if retries >= 10:
2606         raise errors.RemoteError("Can't contact node %s for mirror data,"
2607                                  " aborting." % node)
2608       time.sleep(6)
2609       continue
2610     rstats = rstats.payload
2611     retries = 0
2612     for i, mstat in enumerate(rstats):
2613       if mstat is None:
2614         lu.LogWarning("Can't compute data for node %s/%s",
2615                            node, instance.disks[i].iv_name)
2616         continue
2617
2618       cumul_degraded = (cumul_degraded or
2619                         (mstat.is_degraded and mstat.sync_percent is None))
2620       if mstat.sync_percent is not None:
2621         done = False
2622         if mstat.estimated_time is not None:
2623           rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2624           max_time = mstat.estimated_time
2625         else:
2626           rem_time = "no time estimate"
2627         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2628                         (instance.disks[i].iv_name, mstat.sync_percent,
2629                          rem_time))
2630
2631     # if we're done but degraded, let's do a few small retries, to
2632     # make sure we see a stable and not transient situation; therefore
2633     # we force restart of the loop
2634     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2635       logging.info("Degraded disks found, %d retries left", degr_retries)
2636       degr_retries -= 1
2637       time.sleep(1)
2638       continue
2639
2640     if done or oneshot:
2641       break
2642
2643     time.sleep(min(60, max_time))
2644
2645   if done:
2646     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2647   return not cumul_degraded
2648
2649
2650 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2651   """Check that mirrors are not degraded.
2652
2653   The ldisk parameter, if True, will change the test from the
2654   is_degraded attribute (which represents overall non-ok status for
2655   the device(s)) to the ldisk (representing the local storage status).
2656
2657   """
2658   lu.cfg.SetDiskID(dev, node)
2659
2660   result = True
2661
2662   if on_primary or dev.AssembleOnSecondary():
2663     rstats = lu.rpc.call_blockdev_find(node, dev)
2664     msg = rstats.fail_msg
2665     if msg:
2666       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2667       result = False
2668     elif not rstats.payload:
2669       lu.LogWarning("Can't find disk on node %s", node)
2670       result = False
2671     else:
2672       if ldisk:
2673         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2674       else:
2675         result = result and not rstats.payload.is_degraded
2676
2677   if dev.children:
2678     for child in dev.children:
2679       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2680
2681   return result
2682
2683
2684 class LUDiagnoseOS(NoHooksLU):
2685   """Logical unit for OS diagnose/query.
2686
2687   """
2688   _OP_REQP = ["output_fields", "names"]
2689   REQ_BGL = False
2690   _FIELDS_STATIC = utils.FieldSet()
2691   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2692   # Fields that need calculation of global os validity
2693   _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2694
2695   def ExpandNames(self):
2696     if self.op.names:
2697       raise errors.OpPrereqError("Selective OS query not supported",
2698                                  errors.ECODE_INVAL)
2699
2700     _CheckOutputFields(static=self._FIELDS_STATIC,
2701                        dynamic=self._FIELDS_DYNAMIC,
2702                        selected=self.op.output_fields)
2703
2704     # Lock all nodes, in shared mode
2705     # Temporary removal of locks, should be reverted later
2706     # TODO: reintroduce locks when they are lighter-weight
2707     self.needed_locks = {}
2708     #self.share_locks[locking.LEVEL_NODE] = 1
2709     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2710
2711   def CheckPrereq(self):
2712     """Check prerequisites.
2713
2714     """
2715
2716   @staticmethod
2717   def _DiagnoseByOS(rlist):
2718     """Remaps a per-node return list into an a per-os per-node dictionary
2719
2720     @param rlist: a map with node names as keys and OS objects as values
2721
2722     @rtype: dict
2723     @return: a dictionary with osnames as keys and as value another map, with
2724         nodes as keys and tuples of (path, status, diagnose) as values, eg::
2725
2726           {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2727                                      (/srv/..., False, "invalid api")],
2728                            "node2": [(/srv/..., True, "")]}
2729           }
2730
2731     """
2732     all_os = {}
2733     # we build here the list of nodes that didn't fail the RPC (at RPC
2734     # level), so that nodes with a non-responding node daemon don't
2735     # make all OSes invalid
2736     good_nodes = [node_name for node_name in rlist
2737                   if not rlist[node_name].fail_msg]
2738     for node_name, nr in rlist.items():
2739       if nr.fail_msg or not nr.payload:
2740         continue
2741       for name, path, status, diagnose, variants in nr.payload:
2742         if name not in all_os:
2743           # build a list of nodes for this os containing empty lists
2744           # for each node in node_list
2745           all_os[name] = {}
2746           for nname in good_nodes:
2747             all_os[name][nname] = []
2748         all_os[name][node_name].append((path, status, diagnose, variants))
2749     return all_os
2750
2751   def Exec(self, feedback_fn):
2752     """Compute the list of OSes.
2753
2754     """
2755     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2756     node_data = self.rpc.call_os_diagnose(valid_nodes)
2757     pol = self._DiagnoseByOS(node_data)
2758     output = []
2759     calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2760     calc_variants = "variants" in self.op.output_fields
2761
2762     for os_name, os_data in pol.items():
2763       row = []
2764       if calc_valid:
2765         valid = True
2766         variants = None
2767         for osl in os_data.values():
2768           valid = valid and osl and osl[0][1]
2769           if not valid:
2770             variants = None
2771             break
2772           if calc_variants:
2773             node_variants = osl[0][3]
2774             if variants is None:
2775               variants = node_variants
2776             else:
2777               variants = [v for v in variants if v in node_variants]
2778
2779       for field in self.op.output_fields:
2780         if field == "name":
2781           val = os_name
2782         elif field == "valid":
2783           val = valid
2784         elif field == "node_status":
2785           # this is just a copy of the dict
2786           val = {}
2787           for node_name, nos_list in os_data.items():
2788             val[node_name] = nos_list
2789         elif field == "variants":
2790           val =  variants
2791         else:
2792           raise errors.ParameterError(field)
2793         row.append(val)
2794       output.append(row)
2795
2796     return output
2797
2798
2799 class LURemoveNode(LogicalUnit):
2800   """Logical unit for removing a node.
2801
2802   """
2803   HPATH = "node-remove"
2804   HTYPE = constants.HTYPE_NODE
2805   _OP_REQP = ["node_name"]
2806
2807   def BuildHooksEnv(self):
2808     """Build hooks env.
2809
2810     This doesn't run on the target node in the pre phase as a failed
2811     node would then be impossible to remove.
2812
2813     """
2814     env = {
2815       "OP_TARGET": self.op.node_name,
2816       "NODE_NAME": self.op.node_name,
2817       }
2818     all_nodes = self.cfg.GetNodeList()
2819     try:
2820       all_nodes.remove(self.op.node_name)
2821     except ValueError:
2822       logging.warning("Node %s which is about to be removed not found"
2823                       " in the all nodes list", self.op.node_name)
2824     return env, all_nodes, all_nodes
2825
2826   def CheckPrereq(self):
2827     """Check prerequisites.
2828
2829     This checks:
2830      - the node exists in the configuration
2831      - it does not have primary or secondary instances
2832      - it's not the master
2833
2834     Any errors are signaled by raising errors.OpPrereqError.
2835
2836     """
2837     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2838     node = self.cfg.GetNodeInfo(self.op.node_name)
2839     assert node is not None
2840
2841     instance_list = self.cfg.GetInstanceList()
2842
2843     masternode = self.cfg.GetMasterNode()
2844     if node.name == masternode:
2845       raise errors.OpPrereqError("Node is the master node,"
2846                                  " you need to failover first.",
2847                                  errors.ECODE_INVAL)
2848
2849     for instance_name in instance_list:
2850       instance = self.cfg.GetInstanceInfo(instance_name)
2851       if node.name in instance.all_nodes:
2852         raise errors.OpPrereqError("Instance %s is still running on the node,"
2853                                    " please remove first." % instance_name,
2854                                    errors.ECODE_INVAL)
2855     self.op.node_name = node.name
2856     self.node = node
2857
2858   def Exec(self, feedback_fn):
2859     """Removes the node from the cluster.
2860
2861     """
2862     node = self.node
2863     logging.info("Stopping the node daemon and removing configs from node %s",
2864                  node.name)
2865
2866     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2867
2868     # Promote nodes to master candidate as needed
2869     _AdjustCandidatePool(self, exceptions=[node.name])
2870     self.context.RemoveNode(node.name)
2871
2872     # Run post hooks on the node before it's removed
2873     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2874     try:
2875       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2876     except:
2877       # pylint: disable-msg=W0702
2878       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2879
2880     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2881     msg = result.fail_msg
2882     if msg:
2883       self.LogWarning("Errors encountered on the remote node while leaving"
2884                       " the cluster: %s", msg)
2885
2886
2887 class LUQueryNodes(NoHooksLU):
2888   """Logical unit for querying nodes.
2889
2890   """
2891   # pylint: disable-msg=W0142
2892   _OP_REQP = ["output_fields", "names", "use_locking"]
2893   REQ_BGL = False
2894
2895   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2896                     "master_candidate", "offline", "drained"]
2897
2898   _FIELDS_DYNAMIC = utils.FieldSet(
2899     "dtotal", "dfree",
2900     "mtotal", "mnode", "mfree",
2901     "bootid",
2902     "ctotal", "cnodes", "csockets",
2903     )
2904
2905   _FIELDS_STATIC = utils.FieldSet(*[
2906     "pinst_cnt", "sinst_cnt",
2907     "pinst_list", "sinst_list",
2908     "pip", "sip", "tags",
2909     "master",
2910     "role"] + _SIMPLE_FIELDS
2911     )
2912
2913   def ExpandNames(self):
2914     _CheckOutputFields(static=self._FIELDS_STATIC,
2915                        dynamic=self._FIELDS_DYNAMIC,
2916                        selected=self.op.output_fields)
2917
2918     self.needed_locks = {}
2919     self.share_locks[locking.LEVEL_NODE] = 1
2920
2921     if self.op.names:
2922       self.wanted = _GetWantedNodes(self, self.op.names)
2923     else:
2924       self.wanted = locking.ALL_SET
2925
2926     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2927     self.do_locking = self.do_node_query and self.op.use_locking
2928     if self.do_locking:
2929       # if we don't request only static fields, we need to lock the nodes
2930       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2931
2932   def CheckPrereq(self):
2933     """Check prerequisites.
2934
2935     """
2936     # The validation of the node list is done in the _GetWantedNodes,
2937     # if non empty, and if empty, there's no validation to do
2938     pass
2939
2940   def Exec(self, feedback_fn):
2941     """Computes the list of nodes and their attributes.
2942
2943     """
2944     all_info = self.cfg.GetAllNodesInfo()
2945     if self.do_locking:
2946       nodenames = self.acquired_locks[locking.LEVEL_NODE]
2947     elif self.wanted != locking.ALL_SET:
2948       nodenames = self.wanted
2949       missing = set(nodenames).difference(all_info.keys())
2950       if missing:
2951         raise errors.OpExecError(
2952           "Some nodes were removed before retrieving their data: %s" % missing)
2953     else:
2954       nodenames = all_info.keys()
2955
2956     nodenames = utils.NiceSort(nodenames)
2957     nodelist = [all_info[name] for name in nodenames]
2958
2959     # begin data gathering
2960
2961     if self.do_node_query:
2962       live_data = {}
2963       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2964                                           self.cfg.GetHypervisorType())
2965       for name in nodenames:
2966         nodeinfo = node_data[name]
2967         if not nodeinfo.fail_msg and nodeinfo.payload:
2968           nodeinfo = nodeinfo.payload
2969           fn = utils.TryConvert
2970           live_data[name] = {
2971             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2972             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2973             "mfree": fn(int, nodeinfo.get('memory_free', None)),
2974             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2975             "dfree": fn(int, nodeinfo.get('vg_free', None)),
2976             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2977             "bootid": nodeinfo.get('bootid', None),
2978             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2979             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2980             }
2981         else:
2982           live_data[name] = {}
2983     else:
2984       live_data = dict.fromkeys(nodenames, {})
2985
2986     node_to_primary = dict([(name, set()) for name in nodenames])
2987     node_to_secondary = dict([(name, set()) for name in nodenames])
2988
2989     inst_fields = frozenset(("pinst_cnt", "pinst_list",
2990                              "sinst_cnt", "sinst_list"))
2991     if inst_fields & frozenset(self.op.output_fields):
2992       inst_data = self.cfg.GetAllInstancesInfo()
2993
2994       for inst in inst_data.values():
2995         if inst.primary_node in node_to_primary:
2996           node_to_primary[inst.primary_node].add(inst.name)
2997         for secnode in inst.secondary_nodes:
2998           if secnode in node_to_secondary:
2999             node_to_secondary[secnode].add(inst.name)
3000
3001     master_node = self.cfg.GetMasterNode()
3002
3003     # end data gathering
3004
3005     output = []
3006     for node in nodelist:
3007       node_output = []
3008       for field in self.op.output_fields:
3009         if field in self._SIMPLE_FIELDS:
3010           val = getattr(node, field)
3011         elif field == "pinst_list":
3012           val = list(node_to_primary[node.name])
3013         elif field == "sinst_list":
3014           val = list(node_to_secondary[node.name])
3015         elif field == "pinst_cnt":
3016           val = len(node_to_primary[node.name])
3017         elif field == "sinst_cnt":
3018           val = len(node_to_secondary[node.name])
3019         elif field == "pip":
3020           val = node.primary_ip
3021         elif field == "sip":
3022           val = node.secondary_ip
3023         elif field == "tags":
3024           val = list(node.GetTags())
3025         elif field == "master":
3026           val = node.name == master_node
3027         elif self._FIELDS_DYNAMIC.Matches(field):
3028           val = live_data[node.name].get(field, None)
3029         elif field == "role":
3030           if node.name == master_node:
3031             val = "M"
3032           elif node.master_candidate:
3033             val = "C"
3034           elif node.drained:
3035             val = "D"
3036           elif node.offline:
3037             val = "O"
3038           else:
3039             val = "R"
3040         else:
3041           raise errors.ParameterError(field)
3042         node_output.append(val)
3043       output.append(node_output)
3044
3045     return output
3046
3047
3048 class LUQueryNodeVolumes(NoHooksLU):
3049   """Logical unit for getting volumes on node(s).
3050
3051   """
3052   _OP_REQP = ["nodes", "output_fields"]
3053   REQ_BGL = False
3054   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3055   _FIELDS_STATIC = utils.FieldSet("node")
3056
3057   def ExpandNames(self):
3058     _CheckOutputFields(static=self._FIELDS_STATIC,
3059                        dynamic=self._FIELDS_DYNAMIC,
3060                        selected=self.op.output_fields)
3061
3062     self.needed_locks = {}
3063     self.share_locks[locking.LEVEL_NODE] = 1
3064     if not self.op.nodes:
3065       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3066     else:
3067       self.needed_locks[locking.LEVEL_NODE] = \
3068         _GetWantedNodes(self, self.op.nodes)
3069
3070   def CheckPrereq(self):
3071     """Check prerequisites.
3072
3073     This checks that the fields required are valid output fields.
3074
3075     """
3076     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3077
3078   def Exec(self, feedback_fn):
3079     """Computes the list of nodes and their attributes.
3080
3081     """
3082     nodenames = self.nodes
3083     volumes = self.rpc.call_node_volumes(nodenames)
3084
3085     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3086              in self.cfg.GetInstanceList()]
3087
3088     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3089
3090     output = []
3091     for node in nodenames:
3092       nresult = volumes[node]
3093       if nresult.offline:
3094         continue
3095       msg = nresult.fail_msg
3096       if msg:
3097         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3098         continue
3099
3100       node_vols = nresult.payload[:]
3101       node_vols.sort(key=lambda vol: vol['dev'])
3102
3103       for vol in node_vols:
3104         node_output = []
3105         for field in self.op.output_fields:
3106           if field == "node":
3107             val = node
3108           elif field == "phys":
3109             val = vol['dev']
3110           elif field == "vg":
3111             val = vol['vg']
3112           elif field == "name":
3113             val = vol['name']
3114           elif field == "size":
3115             val = int(float(vol['size']))
3116           elif field == "instance":
3117             for inst in ilist:
3118               if node not in lv_by_node[inst]:
3119                 continue
3120               if vol['name'] in lv_by_node[inst][node]:
3121                 val = inst.name
3122                 break
3123             else:
3124               val = '-'
3125           else:
3126             raise errors.ParameterError(field)
3127           node_output.append(str(val))
3128
3129         output.append(node_output)
3130
3131     return output
3132
3133
3134 class LUQueryNodeStorage(NoHooksLU):
3135   """Logical unit for getting information on storage units on node(s).
3136
3137   """
3138   _OP_REQP = ["nodes", "storage_type", "output_fields"]
3139   REQ_BGL = False
3140   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3141
3142   def CheckArguments(self):
3143     _CheckStorageType(self.op.storage_type)
3144
3145     _CheckOutputFields(static=self._FIELDS_STATIC,
3146                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3147                        selected=self.op.output_fields)
3148
3149   def ExpandNames(self):
3150     self.needed_locks = {}
3151     self.share_locks[locking.LEVEL_NODE] = 1
3152
3153     if self.op.nodes:
3154       self.needed_locks[locking.LEVEL_NODE] = \
3155         _GetWantedNodes(self, self.op.nodes)
3156     else:
3157       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3158
3159   def CheckPrereq(self):
3160     """Check prerequisites.
3161
3162     This checks that the fields required are valid output fields.
3163
3164     """
3165     self.op.name = getattr(self.op, "name", None)
3166
3167     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3168
3169   def Exec(self, feedback_fn):
3170     """Computes the list of nodes and their attributes.
3171
3172     """
3173     # Always get name to sort by
3174     if constants.SF_NAME in self.op.output_fields:
3175       fields = self.op.output_fields[:]
3176     else:
3177       fields = [constants.SF_NAME] + self.op.output_fields
3178
3179     # Never ask for node or type as it's only known to the LU
3180     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3181       while extra in fields:
3182         fields.remove(extra)
3183
3184     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3185     name_idx = field_idx[constants.SF_NAME]
3186
3187     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3188     data = self.rpc.call_storage_list(self.nodes,
3189                                       self.op.storage_type, st_args,
3190                                       self.op.name, fields)
3191
3192     result = []
3193
3194     for node in utils.NiceSort(self.nodes):
3195       nresult = data[node]
3196       if nresult.offline:
3197         continue
3198
3199       msg = nresult.fail_msg
3200       if msg:
3201         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3202         continue
3203
3204       rows = dict([(row[name_idx], row) for row in nresult.payload])
3205
3206       for name in utils.NiceSort(rows.keys()):
3207         row = rows[name]
3208
3209         out = []
3210
3211         for field in self.op.output_fields:
3212           if field == constants.SF_NODE:
3213             val = node
3214           elif field == constants.SF_TYPE:
3215             val = self.op.storage_type
3216           elif field in field_idx:
3217             val = row[field_idx[field]]
3218           else:
3219             raise errors.ParameterError(field)
3220
3221           out.append(val)
3222
3223         result.append(out)
3224
3225     return result
3226
3227
3228 class LUModifyNodeStorage(NoHooksLU):
3229   """Logical unit for modifying a storage volume on a node.
3230
3231   """
3232   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3233   REQ_BGL = False
3234
3235   def CheckArguments(self):
3236     self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3237
3238     _CheckStorageType(self.op.storage_type)
3239
3240   def ExpandNames(self):
3241     self.needed_locks = {
3242       locking.LEVEL_NODE: self.op.node_name,
3243       }
3244
3245   def CheckPrereq(self):
3246     """Check prerequisites.
3247
3248     """
3249     storage_type = self.op.storage_type
3250
3251     try:
3252       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3253     except KeyError:
3254       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3255                                  " modified" % storage_type,
3256                                  errors.ECODE_INVAL)
3257
3258     diff = set(self.op.changes.keys()) - modifiable
3259     if diff:
3260       raise errors.OpPrereqError("The following fields can not be modified for"
3261                                  " storage units of type '%s': %r" %
3262                                  (storage_type, list(diff)),
3263                                  errors.ECODE_INVAL)
3264
3265   def Exec(self, feedback_fn):
3266     """Computes the list of nodes and their attributes.
3267
3268     """
3269     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3270     result = self.rpc.call_storage_modify(self.op.node_name,
3271                                           self.op.storage_type, st_args,
3272                                           self.op.name, self.op.changes)
3273     result.Raise("Failed to modify storage unit '%s' on %s" %
3274                  (self.op.name, self.op.node_name))
3275
3276
3277 class LUAddNode(LogicalUnit):
3278   """Logical unit for adding node to the cluster.
3279
3280   """
3281   HPATH = "node-add"
3282   HTYPE = constants.HTYPE_NODE
3283   _OP_REQP = ["node_name"]
3284
3285   def CheckArguments(self):
3286     # validate/normalize the node name
3287     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3288
3289   def BuildHooksEnv(self):
3290     """Build hooks env.
3291
3292     This will run on all nodes before, and on all nodes + the new node after.
3293
3294     """
3295     env = {
3296       "OP_TARGET": self.op.node_name,
3297       "NODE_NAME": self.op.node_name,
3298       "NODE_PIP": self.op.primary_ip,
3299       "NODE_SIP": self.op.secondary_ip,
3300       }
3301     nodes_0 = self.cfg.GetNodeList()
3302     nodes_1 = nodes_0 + [self.op.node_name, ]
3303     return env, nodes_0, nodes_1
3304
3305   def CheckPrereq(self):
3306     """Check prerequisites.
3307
3308     This checks:
3309      - the new node is not already in the config
3310      - it is resolvable
3311      - its parameters (single/dual homed) matches the cluster
3312
3313     Any errors are signaled by raising errors.OpPrereqError.
3314
3315     """
3316     node_name = self.op.node_name
3317     cfg = self.cfg
3318
3319     dns_data = utils.GetHostInfo(node_name)
3320
3321     node = dns_data.name
3322     primary_ip = self.op.primary_ip = dns_data.ip
3323     secondary_ip = getattr(self.op, "secondary_ip", None)
3324     if secondary_ip is None:
3325       secondary_ip = primary_ip
3326     if not utils.IsValidIP(secondary_ip):
3327       raise errors.OpPrereqError("Invalid secondary IP given",
3328                                  errors.ECODE_INVAL)
3329     self.op.secondary_ip = secondary_ip
3330
3331     node_list = cfg.GetNodeList()
3332     if not self.op.readd and node in node_list:
3333       raise errors.OpPrereqError("Node %s is already in the configuration" %
3334                                  node, errors.ECODE_EXISTS)
3335     elif self.op.readd and node not in node_list:
3336       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3337                                  errors.ECODE_NOENT)
3338
3339     self.changed_primary_ip = False
3340
3341     for existing_node_name in node_list:
3342       existing_node = cfg.GetNodeInfo(existing_node_name)
3343
3344       if self.op.readd and node == existing_node_name:
3345         if existing_node.secondary_ip != secondary_ip:
3346           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3347                                      " address configuration as before",
3348                                      errors.ECODE_INVAL)
3349         if existing_node.primary_ip != primary_ip:
3350           self.changed_primary_ip = True
3351
3352         continue
3353
3354       if (existing_node.primary_ip == primary_ip or
3355           existing_node.secondary_ip == primary_ip or
3356           existing_node.primary_ip == secondary_ip or
3357           existing_node.secondary_ip == secondary_ip):
3358         raise errors.OpPrereqError("New node ip address(es) conflict with"
3359                                    " existing node %s" % existing_node.name,
3360                                    errors.ECODE_NOTUNIQUE)
3361
3362     # check that the type of the node (single versus dual homed) is the
3363     # same as for the master
3364     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3365     master_singlehomed = myself.secondary_ip == myself.primary_ip
3366     newbie_singlehomed = secondary_ip == primary_ip
3367     if master_singlehomed != newbie_singlehomed:
3368       if master_singlehomed:
3369         raise errors.OpPrereqError("The master has no private ip but the"
3370                                    " new node has one",
3371                                    errors.ECODE_INVAL)
3372       else:
3373         raise errors.OpPrereqError("The master has a private ip but the"
3374                                    " new node doesn't have one",
3375                                    errors.ECODE_INVAL)
3376
3377     # checks reachability
3378     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3379       raise errors.OpPrereqError("Node not reachable by ping",
3380                                  errors.ECODE_ENVIRON)
3381
3382     if not newbie_singlehomed:
3383       # check reachability from my secondary ip to newbie's secondary ip
3384       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3385                            source=myself.secondary_ip):
3386         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3387                                    " based ping to noded port",
3388                                    errors.ECODE_ENVIRON)
3389
3390     if self.op.readd:
3391       exceptions = [node]
3392     else:
3393       exceptions = []
3394
3395     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3396
3397     if self.op.readd:
3398       self.new_node = self.cfg.GetNodeInfo(node)
3399       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3400     else:
3401       self.new_node = objects.Node(name=node,
3402                                    primary_ip=primary_ip,
3403                                    secondary_ip=secondary_ip,
3404                                    master_candidate=self.master_candidate,
3405                                    offline=False, drained=False)
3406
3407   def Exec(self, feedback_fn):
3408     """Adds the new node to the cluster.
3409
3410     """
3411     new_node = self.new_node
3412     node = new_node.name
3413
3414     # for re-adds, reset the offline/drained/master-candidate flags;
3415     # we need to reset here, otherwise offline would prevent RPC calls
3416     # later in the procedure; this also means that if the re-add
3417     # fails, we are left with a non-offlined, broken node
3418     if self.op.readd:
3419       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3420       self.LogInfo("Readding a node, the offline/drained flags were reset")
3421       # if we demote the node, we do cleanup later in the procedure
3422       new_node.master_candidate = self.master_candidate
3423       if self.changed_primary_ip:
3424         new_node.primary_ip = self.op.primary_ip
3425
3426     # notify the user about any possible mc promotion
3427     if new_node.master_candidate:
3428       self.LogInfo("Node will be a master candidate")
3429
3430     # check connectivity
3431     result = self.rpc.call_version([node])[node]
3432     result.Raise("Can't get version information from node %s" % node)
3433     if constants.PROTOCOL_VERSION == result.payload:
3434       logging.info("Communication to node %s fine, sw version %s match",
3435                    node, result.payload)
3436     else:
3437       raise errors.OpExecError("Version mismatch master version %s,"
3438                                " node version %s" %
3439                                (constants.PROTOCOL_VERSION, result.payload))
3440
3441     # setup ssh on node
3442     if self.cfg.GetClusterInfo().modify_ssh_setup:
3443       logging.info("Copy ssh key to node %s", node)
3444       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3445       keyarray = []
3446       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3447                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3448                   priv_key, pub_key]
3449
3450       for i in keyfiles:
3451         keyarray.append(utils.ReadFile(i))
3452
3453       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3454                                       keyarray[2], keyarray[3], keyarray[4],
3455                                       keyarray[5])
3456       result.Raise("Cannot transfer ssh keys to the new node")
3457
3458     # Add node to our /etc/hosts, and add key to known_hosts
3459     if self.cfg.GetClusterInfo().modify_etc_hosts:
3460       utils.AddHostToEtcHosts(new_node.name)
3461
3462     if new_node.secondary_ip != new_node.primary_ip:
3463       result = self.rpc.call_node_has_ip_address(new_node.name,
3464                                                  new_node.secondary_ip)
3465       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3466                    prereq=True, ecode=errors.ECODE_ENVIRON)
3467       if not result.payload:
3468         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3469                                  " you gave (%s). Please fix and re-run this"
3470                                  " command." % new_node.secondary_ip)
3471
3472     node_verify_list = [self.cfg.GetMasterNode()]
3473     node_verify_param = {
3474       constants.NV_NODELIST: [node],
3475       # TODO: do a node-net-test as well?
3476     }
3477
3478     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3479                                        self.cfg.GetClusterName())
3480     for verifier in node_verify_list:
3481       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3482       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3483       if nl_payload:
3484         for failed in nl_payload:
3485           feedback_fn("ssh/hostname verification failed"
3486                       " (checking from %s): %s" %
3487                       (verifier, nl_payload[failed]))
3488         raise errors.OpExecError("ssh/hostname verification failed.")
3489
3490     if self.op.readd:
3491       _RedistributeAncillaryFiles(self)
3492       self.context.ReaddNode(new_node)
3493       # make sure we redistribute the config
3494       self.cfg.Update(new_node, feedback_fn)
3495       # and make sure the new node will not have old files around
3496       if not new_node.master_candidate:
3497         result = self.rpc.call_node_demote_from_mc(new_node.name)
3498         msg = result.fail_msg
3499         if msg:
3500           self.LogWarning("Node failed to demote itself from master"
3501                           " candidate status: %s" % msg)
3502     else:
3503       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3504       self.context.AddNode(new_node, self.proc.GetECId())
3505
3506
3507 class LUSetNodeParams(LogicalUnit):
3508   """Modifies the parameters of a node.
3509
3510   """
3511   HPATH = "node-modify"
3512   HTYPE = constants.HTYPE_NODE
3513   _OP_REQP = ["node_name"]
3514   REQ_BGL = False
3515
3516   def CheckArguments(self):
3517     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3518     _CheckBooleanOpField(self.op, 'master_candidate')
3519     _CheckBooleanOpField(self.op, 'offline')
3520     _CheckBooleanOpField(self.op, 'drained')
3521     _CheckBooleanOpField(self.op, 'auto_promote')
3522     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3523     if all_mods.count(None) == 3:
3524       raise errors.OpPrereqError("Please pass at least one modification",
3525                                  errors.ECODE_INVAL)
3526     if all_mods.count(True) > 1:
3527       raise errors.OpPrereqError("Can't set the node into more than one"
3528                                  " state at the same time",
3529                                  errors.ECODE_INVAL)
3530
3531     # Boolean value that tells us whether we're offlining or draining the node
3532     self.offline_or_drain = (self.op.offline == True or
3533                              self.op.drained == True)
3534     self.deoffline_or_drain = (self.op.offline == False or
3535                                self.op.drained == False)
3536     self.might_demote = (self.op.master_candidate == False or
3537                          self.offline_or_drain)
3538
3539     self.lock_all = self.op.auto_promote and self.might_demote
3540
3541
3542   def ExpandNames(self):
3543     if self.lock_all:
3544       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3545     else:
3546       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3547
3548   def BuildHooksEnv(self):
3549     """Build hooks env.
3550
3551     This runs on the master node.
3552
3553     """
3554     env = {
3555       "OP_TARGET": self.op.node_name,
3556       "MASTER_CANDIDATE": str(self.op.master_candidate),
3557       "OFFLINE": str(self.op.offline),
3558       "DRAINED": str(self.op.drained),
3559       }
3560     nl = [self.cfg.GetMasterNode(),
3561           self.op.node_name]
3562     return env, nl, nl
3563
3564   def CheckPrereq(self):
3565     """Check prerequisites.
3566
3567     This only checks the instance list against the existing names.
3568
3569     """
3570     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3571
3572     if (self.op.master_candidate is not None or
3573         self.op.drained is not None or
3574         self.op.offline is not None):
3575       # we can't change the master's node flags
3576       if self.op.node_name == self.cfg.GetMasterNode():
3577         raise errors.OpPrereqError("The master role can be changed"
3578                                    " only via masterfailover",
3579                                    errors.ECODE_INVAL)
3580
3581
3582     if node.master_candidate and self.might_demote and not self.lock_all:
3583       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3584       # check if after removing the current node, we're missing master
3585       # candidates
3586       (mc_remaining, mc_should, _) = \
3587           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3588       if mc_remaining < mc_should:
3589         raise errors.OpPrereqError("Not enough master candidates, please"
3590                                    " pass auto_promote to allow promotion",
3591                                    errors.ECODE_INVAL)
3592
3593     if (self.op.master_candidate == True and
3594         ((node.offline and not self.op.offline == False) or
3595          (node.drained and not self.op.drained == False))):
3596       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3597                                  " to master_candidate" % node.name,
3598                                  errors.ECODE_INVAL)
3599
3600     # If we're being deofflined/drained, we'll MC ourself if needed
3601     if (self.deoffline_or_drain and not self.offline_or_drain and not
3602         self.op.master_candidate == True and not node.master_candidate):
3603       self.op.master_candidate = _DecideSelfPromotion(self)
3604       if self.op.master_candidate:
3605         self.LogInfo("Autopromoting node to master candidate")
3606
3607     return
3608
3609   def Exec(self, feedback_fn):
3610     """Modifies a node.
3611
3612     """
3613     node = self.node
3614
3615     result = []
3616     changed_mc = False
3617
3618     if self.op.offline is not None:
3619       node.offline = self.op.offline
3620       result.append(("offline", str(self.op.offline)))
3621       if self.op.offline == True:
3622         if node.master_candidate:
3623           node.master_candidate = False
3624           changed_mc = True
3625           result.append(("master_candidate", "auto-demotion due to offline"))
3626         if node.drained:
3627           node.drained = False
3628           result.append(("drained", "clear drained status due to offline"))
3629
3630     if self.op.master_candidate is not None:
3631       node.master_candidate = self.op.master_candidate
3632       changed_mc = True
3633       result.append(("master_candidate", str(self.op.master_candidate)))
3634       if self.op.master_candidate == False:
3635         rrc = self.rpc.call_node_demote_from_mc(node.name)
3636         msg = rrc.fail_msg
3637         if msg:
3638           self.LogWarning("Node failed to demote itself: %s" % msg)
3639
3640     if self.op.drained is not None:
3641       node.drained = self.op.drained
3642       result.append(("drained", str(self.op.drained)))
3643       if self.op.drained == True:
3644         if node.master_candidate:
3645           node.master_candidate = False
3646           changed_mc = True
3647           result.append(("master_candidate", "auto-demotion due to drain"))
3648           rrc = self.rpc.call_node_demote_from_mc(node.name)
3649           msg = rrc.fail_msg
3650           if msg:
3651             self.LogWarning("Node failed to demote itself: %s" % msg)
3652         if node.offline:
3653           node.offline = False
3654           result.append(("offline", "clear offline status due to drain"))
3655
3656     # we locked all nodes, we adjust the CP before updating this node
3657     if self.lock_all:
3658       _AdjustCandidatePool(self, [node.name])
3659
3660     # this will trigger configuration file update, if needed
3661     self.cfg.Update(node, feedback_fn)
3662
3663     # this will trigger job queue propagation or cleanup
3664     if changed_mc:
3665       self.context.ReaddNode(node)
3666
3667     return result
3668
3669
3670 class LUPowercycleNode(NoHooksLU):
3671   """Powercycles a node.
3672
3673   """
3674   _OP_REQP = ["node_name", "force"]
3675   REQ_BGL = False
3676
3677   def CheckArguments(self):
3678     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3679     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3680       raise errors.OpPrereqError("The node is the master and the force"
3681                                  " parameter was not set",
3682                                  errors.ECODE_INVAL)
3683
3684   def ExpandNames(self):
3685     """Locking for PowercycleNode.
3686
3687     This is a last-resort option and shouldn't block on other
3688     jobs. Therefore, we grab no locks.
3689
3690     """
3691     self.needed_locks = {}
3692
3693   def CheckPrereq(self):
3694     """Check prerequisites.
3695
3696     This LU has no prereqs.
3697
3698     """
3699     pass
3700
3701   def Exec(self, feedback_fn):
3702     """Reboots a node.
3703
3704     """
3705     result = self.rpc.call_node_powercycle(self.op.node_name,
3706                                            self.cfg.GetHypervisorType())
3707     result.Raise("Failed to schedule the reboot")
3708     return result.payload
3709
3710
3711 class LUQueryClusterInfo(NoHooksLU):
3712   """Query cluster configuration.
3713
3714   """
3715   _OP_REQP = []
3716   REQ_BGL = False
3717
3718   def ExpandNames(self):
3719     self.needed_locks = {}
3720
3721   def CheckPrereq(self):
3722     """No prerequsites needed for this LU.
3723
3724     """
3725     pass
3726
3727   def Exec(self, feedback_fn):
3728     """Return cluster config.
3729
3730     """
3731     cluster = self.cfg.GetClusterInfo()
3732     os_hvp = {}
3733
3734     # Filter just for enabled hypervisors
3735     for os_name, hv_dict in cluster.os_hvp.items():
3736       os_hvp[os_name] = {}
3737       for hv_name, hv_params in hv_dict.items():
3738         if hv_name in cluster.enabled_hypervisors:
3739           os_hvp[os_name][hv_name] = hv_params
3740
3741     result = {
3742       "software_version": constants.RELEASE_VERSION,
3743       "protocol_version": constants.PROTOCOL_VERSION,
3744       "config_version": constants.CONFIG_VERSION,
3745       "os_api_version": max(constants.OS_API_VERSIONS),
3746       "export_version": constants.EXPORT_VERSION,
3747       "architecture": (platform.architecture()[0], platform.machine()),
3748       "name": cluster.cluster_name,
3749       "master": cluster.master_node,
3750       "default_hypervisor": cluster.enabled_hypervisors[0],
3751       "enabled_hypervisors": cluster.enabled_hypervisors,
3752       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3753                         for hypervisor_name in cluster.enabled_hypervisors]),
3754       "os_hvp": os_hvp,
3755       "beparams": cluster.beparams,
3756       "nicparams": cluster.nicparams,
3757       "candidate_pool_size": cluster.candidate_pool_size,
3758       "master_netdev": cluster.master_netdev,
3759       "volume_group_name": cluster.volume_group_name,
3760       "file_storage_dir": cluster.file_storage_dir,
3761       "maintain_node_health": cluster.maintain_node_health,
3762       "ctime": cluster.ctime,
3763       "mtime": cluster.mtime,
3764       "uuid": cluster.uuid,
3765       "tags": list(cluster.GetTags()),
3766       "uid_pool": cluster.uid_pool,
3767       }
3768
3769     return result
3770
3771
3772 class LUQueryConfigValues(NoHooksLU):
3773   """Return configuration values.
3774
3775   """
3776   _OP_REQP = []
3777   REQ_BGL = False
3778   _FIELDS_DYNAMIC = utils.FieldSet()
3779   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3780                                   "watcher_pause")
3781
3782   def ExpandNames(self):
3783     self.needed_locks = {}
3784
3785     _CheckOutputFields(static=self._FIELDS_STATIC,
3786                        dynamic=self._FIELDS_DYNAMIC,
3787                        selected=self.op.output_fields)
3788
3789   def CheckPrereq(self):
3790     """No prerequisites.
3791
3792     """
3793     pass
3794
3795   def Exec(self, feedback_fn):
3796     """Dump a representation of the cluster config to the standard output.
3797
3798     """
3799     values = []
3800     for field in self.op.output_fields:
3801       if field == "cluster_name":
3802         entry = self.cfg.GetClusterName()
3803       elif field == "master_node":
3804         entry = self.cfg.GetMasterNode()
3805       elif field == "drain_flag":
3806         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3807       elif field == "watcher_pause":
3808         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3809       else:
3810         raise errors.ParameterError(field)
3811       values.append(entry)
3812     return values
3813
3814
3815 class LUActivateInstanceDisks(NoHooksLU):
3816   """Bring up an instance's disks.
3817
3818   """
3819   _OP_REQP = ["instance_name"]
3820   REQ_BGL = False
3821
3822   def ExpandNames(self):
3823     self._ExpandAndLockInstance()
3824     self.needed_locks[locking.LEVEL_NODE] = []
3825     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3826
3827   def DeclareLocks(self, level):
3828     if level == locking.LEVEL_NODE:
3829       self._LockInstancesNodes()
3830
3831   def CheckPrereq(self):
3832     """Check prerequisites.
3833
3834     This checks that the instance is in the cluster.
3835
3836     """
3837     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3838     assert self.instance is not None, \
3839       "Cannot retrieve locked instance %s" % self.op.instance_name
3840     _CheckNodeOnline(self, self.instance.primary_node)
3841     if not hasattr(self.op, "ignore_size"):
3842       self.op.ignore_size = False
3843
3844   def Exec(self, feedback_fn):
3845     """Activate the disks.
3846
3847     """
3848     disks_ok, disks_info = \
3849               _AssembleInstanceDisks(self, self.instance,
3850                                      ignore_size=self.op.ignore_size)
3851     if not disks_ok:
3852       raise errors.OpExecError("Cannot activate block devices")
3853
3854     return disks_info
3855
3856
3857 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3858                            ignore_size=False):
3859   """Prepare the block devices for an instance.
3860
3861   This sets up the block devices on all nodes.
3862
3863   @type lu: L{LogicalUnit}
3864   @param lu: the logical unit on whose behalf we execute
3865   @type instance: L{objects.Instance}
3866   @param instance: the instance for whose disks we assemble
3867   @type ignore_secondaries: boolean
3868   @param ignore_secondaries: if true, errors on secondary nodes
3869       won't result in an error return from the function
3870   @type ignore_size: boolean
3871   @param ignore_size: if true, the current known size of the disk
3872       will not be used during the disk activation, useful for cases
3873       when the size is wrong
3874   @return: False if the operation failed, otherwise a list of
3875       (host, instance_visible_name, node_visible_name)
3876       with the mapping from node devices to instance devices
3877
3878   """
3879   device_info = []
3880   disks_ok = True
3881   iname = instance.name
3882   # With the two passes mechanism we try to reduce the window of
3883   # opportunity for the race condition of switching DRBD to primary
3884   # before handshaking occured, but we do not eliminate it
3885
3886   # The proper fix would be to wait (with some limits) until the
3887   # connection has been made and drbd transitions from WFConnection
3888   # into any other network-connected state (Connected, SyncTarget,
3889   # SyncSource, etc.)
3890
3891   # 1st pass, assemble on all nodes in secondary mode
3892   for inst_disk in instance.disks:
3893     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3894       if ignore_size:
3895         node_disk = node_disk.Copy()
3896         node_disk.UnsetSize()
3897       lu.cfg.SetDiskID(node_disk, node)
3898       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3899       msg = result.fail_msg
3900       if msg:
3901         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3902                            " (is_primary=False, pass=1): %s",
3903                            inst_disk.iv_name, node, msg)
3904         if not ignore_secondaries:
3905           disks_ok = False
3906
3907   # FIXME: race condition on drbd migration to primary
3908
3909   # 2nd pass, do only the primary node
3910   for inst_disk in instance.disks:
3911     dev_path = None
3912
3913     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3914       if node != instance.primary_node:
3915         continue
3916       if ignore_size:
3917         node_disk = node_disk.Copy()
3918         node_disk.UnsetSize()
3919       lu.cfg.SetDiskID(node_disk, node)
3920       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3921       msg = result.fail_msg
3922       if msg:
3923         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3924                            " (is_primary=True, pass=2): %s",
3925                            inst_disk.iv_name, node, msg)
3926         disks_ok = False
3927       else:
3928         dev_path = result.payload
3929
3930     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3931
3932   # leave the disks configured for the primary node
3933   # this is a workaround that would be fixed better by
3934   # improving the logical/physical id handling
3935   for disk in instance.disks:
3936     lu.cfg.SetDiskID(disk, instance.primary_node)
3937
3938   return disks_ok, device_info
3939
3940
3941 def _StartInstanceDisks(lu, instance, force):
3942   """Start the disks of an instance.
3943
3944   """
3945   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3946                                            ignore_secondaries=force)
3947   if not disks_ok:
3948     _ShutdownInstanceDisks(lu, instance)
3949     if force is not None and not force:
3950       lu.proc.LogWarning("", hint="If the message above refers to a"
3951                          " secondary node,"
3952                          " you can retry the operation using '--force'.")
3953     raise errors.OpExecError("Disk consistency error")
3954
3955
3956 class LUDeactivateInstanceDisks(NoHooksLU):
3957   """Shutdown an instance's disks.
3958
3959   """
3960   _OP_REQP = ["instance_name"]
3961   REQ_BGL = False
3962
3963   def ExpandNames(self):
3964     self._ExpandAndLockInstance()
3965     self.needed_locks[locking.LEVEL_NODE] = []
3966     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3967
3968   def DeclareLocks(self, level):
3969     if level == locking.LEVEL_NODE:
3970       self._LockInstancesNodes()
3971
3972   def CheckPrereq(self):
3973     """Check prerequisites.
3974
3975     This checks that the instance is in the cluster.
3976
3977     """
3978     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3979     assert self.instance is not None, \
3980       "Cannot retrieve locked instance %s" % self.op.instance_name
3981
3982   def Exec(self, feedback_fn):
3983     """Deactivate the disks
3984
3985     """
3986     instance = self.instance
3987     _SafeShutdownInstanceDisks(self, instance)
3988
3989
3990 def _SafeShutdownInstanceDisks(lu, instance):
3991   """Shutdown block devices of an instance.
3992
3993   This function checks if an instance is running, before calling
3994   _ShutdownInstanceDisks.
3995
3996   """
3997   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3998   _ShutdownInstanceDisks(lu, instance)
3999
4000
4001 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
4002   """Shutdown block devices of an instance.
4003
4004   This does the shutdown on all nodes of the instance.
4005
4006   If the ignore_primary is false, errors on the primary node are
4007   ignored.
4008
4009   """
4010   all_result = True
4011   for disk in instance.disks:
4012     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4013       lu.cfg.SetDiskID(top_disk, node)
4014       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4015       msg = result.fail_msg
4016       if msg:
4017         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4018                       disk.iv_name, node, msg)
4019         if not ignore_primary or node != instance.primary_node:
4020           all_result = False
4021   return all_result
4022
4023
4024 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4025   """Checks if a node has enough free memory.
4026
4027   This function check if a given node has the needed amount of free
4028   memory. In case the node has less memory or we cannot get the
4029   information from the node, this function raise an OpPrereqError
4030   exception.
4031
4032   @type lu: C{LogicalUnit}
4033   @param lu: a logical unit from which we get configuration data
4034   @type node: C{str}
4035   @param node: the node to check
4036   @type reason: C{str}
4037   @param reason: string to use in the error message
4038   @type requested: C{int}
4039   @param requested: the amount of memory in MiB to check for
4040   @type hypervisor_name: C{str}
4041   @param hypervisor_name: the hypervisor to ask for memory stats
4042   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4043       we cannot check the node
4044
4045   """
4046   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4047   nodeinfo[node].Raise("Can't get data from node %s" % node,
4048                        prereq=True, ecode=errors.ECODE_ENVIRON)
4049   free_mem = nodeinfo[node].payload.get('memory_free', None)
4050   if not isinstance(free_mem, int):
4051     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4052                                " was '%s'" % (node, free_mem),
4053                                errors.ECODE_ENVIRON)
4054   if requested > free_mem:
4055     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4056                                " needed %s MiB, available %s MiB" %
4057                                (node, reason, requested, free_mem),
4058                                errors.ECODE_NORES)
4059
4060
4061 def _CheckNodesFreeDisk(lu, nodenames, requested):
4062   """Checks if nodes have enough free disk space in the default VG.
4063
4064   This function check if all given nodes have the needed amount of
4065   free disk. In case any node has less disk or we cannot get the
4066   information from the node, this function raise an OpPrereqError
4067   exception.
4068
4069   @type lu: C{LogicalUnit}
4070   @param lu: a logical unit from which we get configuration data
4071   @type nodenames: C{list}
4072   @param nodenames: the list of node names to check
4073   @type requested: C{int}
4074   @param requested: the amount of disk in MiB to check for
4075   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4076       we cannot check the node
4077
4078   """
4079   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4080                                    lu.cfg.GetHypervisorType())
4081   for node in nodenames:
4082     info = nodeinfo[node]
4083     info.Raise("Cannot get current information from node %s" % node,
4084                prereq=True, ecode=errors.ECODE_ENVIRON)
4085     vg_free = info.payload.get("vg_free", None)
4086     if not isinstance(vg_free, int):
4087       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4088                                  " result was '%s'" % (node, vg_free),
4089                                  errors.ECODE_ENVIRON)
4090     if requested > vg_free:
4091       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4092                                  " required %d MiB, available %d MiB" %
4093                                  (node, requested, vg_free),
4094                                  errors.ECODE_NORES)
4095
4096
4097 class LUStartupInstance(LogicalUnit):
4098   """Starts an instance.
4099
4100   """
4101   HPATH = "instance-start"
4102   HTYPE = constants.HTYPE_INSTANCE
4103   _OP_REQP = ["instance_name", "force"]
4104   REQ_BGL = False
4105
4106   def ExpandNames(self):
4107     self._ExpandAndLockInstance()
4108
4109   def BuildHooksEnv(self):
4110     """Build hooks env.
4111
4112     This runs on master, primary and secondary nodes of the instance.
4113
4114     """
4115     env = {
4116       "FORCE": self.op.force,
4117       }
4118     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4119     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4120     return env, nl, nl
4121
4122   def CheckPrereq(self):
4123     """Check prerequisites.
4124
4125     This checks that the instance is in the cluster.
4126
4127     """
4128     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4129     assert self.instance is not None, \
4130       "Cannot retrieve locked instance %s" % self.op.instance_name
4131
4132     # extra beparams
4133     self.beparams = getattr(self.op, "beparams", {})
4134     if self.beparams:
4135       if not isinstance(self.beparams, dict):
4136         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4137                                    " dict" % (type(self.beparams), ),
4138                                    errors.ECODE_INVAL)
4139       # fill the beparams dict
4140       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4141       self.op.beparams = self.beparams
4142
4143     # extra hvparams
4144     self.hvparams = getattr(self.op, "hvparams", {})
4145     if self.hvparams:
4146       if not isinstance(self.hvparams, dict):
4147         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4148                                    " dict" % (type(self.hvparams), ),
4149                                    errors.ECODE_INVAL)
4150
4151       # check hypervisor parameter syntax (locally)
4152       cluster = self.cfg.GetClusterInfo()
4153       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4154       filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4155                                     instance.hvparams)
4156       filled_hvp.update(self.hvparams)
4157       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4158       hv_type.CheckParameterSyntax(filled_hvp)
4159       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4160       self.op.hvparams = self.hvparams
4161
4162     _CheckNodeOnline(self, instance.primary_node)
4163
4164     bep = self.cfg.GetClusterInfo().FillBE(instance)
4165     # check bridges existence
4166     _CheckInstanceBridgesExist(self, instance)
4167
4168     remote_info = self.rpc.call_instance_info(instance.primary_node,
4169                                               instance.name,
4170                                               instance.hypervisor)
4171     remote_info.Raise("Error checking node %s" % instance.primary_node,
4172                       prereq=True, ecode=errors.ECODE_ENVIRON)
4173     if not remote_info.payload: # not running already
4174       _CheckNodeFreeMemory(self, instance.primary_node,
4175                            "starting instance %s" % instance.name,
4176                            bep[constants.BE_MEMORY], instance.hypervisor)
4177
4178   def Exec(self, feedback_fn):
4179     """Start the instance.
4180
4181     """
4182     instance = self.instance
4183     force = self.op.force
4184
4185     self.cfg.MarkInstanceUp(instance.name)
4186
4187     node_current = instance.primary_node
4188
4189     _StartInstanceDisks(self, instance, force)
4190
4191     result = self.rpc.call_instance_start(node_current, instance,
4192                                           self.hvparams, self.beparams)
4193     msg = result.fail_msg
4194     if msg:
4195       _ShutdownInstanceDisks(self, instance)
4196       raise errors.OpExecError("Could not start instance: %s" % msg)
4197
4198
4199 class LURebootInstance(LogicalUnit):
4200   """Reboot an instance.
4201
4202   """
4203   HPATH = "instance-reboot"
4204   HTYPE = constants.HTYPE_INSTANCE
4205   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4206   REQ_BGL = False
4207
4208   def CheckArguments(self):
4209     """Check the arguments.
4210
4211     """
4212     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4213                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4214
4215   def ExpandNames(self):
4216     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4217                                    constants.INSTANCE_REBOOT_HARD,
4218                                    constants.INSTANCE_REBOOT_FULL]:
4219       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4220                                   (constants.INSTANCE_REBOOT_SOFT,
4221                                    constants.INSTANCE_REBOOT_HARD,
4222                                    constants.INSTANCE_REBOOT_FULL))
4223     self._ExpandAndLockInstance()
4224
4225   def BuildHooksEnv(self):
4226     """Build hooks env.
4227
4228     This runs on master, primary and secondary nodes of the instance.
4229
4230     """
4231     env = {
4232       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4233       "REBOOT_TYPE": self.op.reboot_type,
4234       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4235       }
4236     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4237     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4238     return env, nl, nl
4239
4240   def CheckPrereq(self):
4241     """Check prerequisites.
4242
4243     This checks that the instance is in the cluster.
4244
4245     """
4246     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4247     assert self.instance is not None, \
4248       "Cannot retrieve locked instance %s" % self.op.instance_name
4249
4250     _CheckNodeOnline(self, instance.primary_node)
4251
4252     # check bridges existence
4253     _CheckInstanceBridgesExist(self, instance)
4254
4255   def Exec(self, feedback_fn):
4256     """Reboot the instance.
4257
4258     """
4259     instance = self.instance
4260     ignore_secondaries = self.op.ignore_secondaries
4261     reboot_type = self.op.reboot_type
4262
4263     node_current = instance.primary_node
4264
4265     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4266                        constants.INSTANCE_REBOOT_HARD]:
4267       for disk in instance.disks:
4268         self.cfg.SetDiskID(disk, node_current)
4269       result = self.rpc.call_instance_reboot(node_current, instance,
4270                                              reboot_type,
4271                                              self.shutdown_timeout)
4272       result.Raise("Could not reboot instance")
4273     else:
4274       result = self.rpc.call_instance_shutdown(node_current, instance,
4275                                                self.shutdown_timeout)
4276       result.Raise("Could not shutdown instance for full reboot")
4277       _ShutdownInstanceDisks(self, instance)
4278       _StartInstanceDisks(self, instance, ignore_secondaries)
4279       result = self.rpc.call_instance_start(node_current, instance, None, None)
4280       msg = result.fail_msg
4281       if msg:
4282         _ShutdownInstanceDisks(self, instance)
4283         raise errors.OpExecError("Could not start instance for"
4284                                  " full reboot: %s" % msg)
4285
4286     self.cfg.MarkInstanceUp(instance.name)
4287
4288
4289 class LUShutdownInstance(LogicalUnit):
4290   """Shutdown an instance.
4291
4292   """
4293   HPATH = "instance-stop"
4294   HTYPE = constants.HTYPE_INSTANCE
4295   _OP_REQP = ["instance_name"]
4296   REQ_BGL = False
4297
4298   def CheckArguments(self):
4299     """Check the arguments.
4300
4301     """
4302     self.timeout = getattr(self.op, "timeout",
4303                            constants.DEFAULT_SHUTDOWN_TIMEOUT)
4304
4305   def ExpandNames(self):
4306     self._ExpandAndLockInstance()
4307
4308   def BuildHooksEnv(self):
4309     """Build hooks env.
4310
4311     This runs on master, primary and secondary nodes of the instance.
4312
4313     """
4314     env = _BuildInstanceHookEnvByObject(self, self.instance)
4315     env["TIMEOUT"] = self.timeout
4316     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4317     return env, nl, nl
4318
4319   def CheckPrereq(self):
4320     """Check prerequisites.
4321
4322     This checks that the instance is in the cluster.
4323
4324     """
4325     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4326     assert self.instance is not None, \
4327       "Cannot retrieve locked instance %s" % self.op.instance_name
4328     _CheckNodeOnline(self, self.instance.primary_node)
4329
4330   def Exec(self, feedback_fn):
4331     """Shutdown the instance.
4332
4333     """
4334     instance = self.instance
4335     node_current = instance.primary_node
4336     timeout = self.timeout
4337     self.cfg.MarkInstanceDown(instance.name)
4338     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4339     msg = result.fail_msg
4340     if msg:
4341       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4342
4343     _ShutdownInstanceDisks(self, instance)
4344
4345
4346 class LUReinstallInstance(LogicalUnit):
4347   """Reinstall an instance.
4348
4349   """
4350   HPATH = "instance-reinstall"
4351   HTYPE = constants.HTYPE_INSTANCE
4352   _OP_REQP = ["instance_name"]
4353   REQ_BGL = False
4354
4355   def ExpandNames(self):
4356     self._ExpandAndLockInstance()
4357
4358   def BuildHooksEnv(self):
4359     """Build hooks env.
4360
4361     This runs on master, primary and secondary nodes of the instance.
4362
4363     """
4364     env = _BuildInstanceHookEnvByObject(self, self.instance)
4365     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4366     return env, nl, nl
4367
4368   def CheckPrereq(self):
4369     """Check prerequisites.
4370
4371     This checks that the instance is in the cluster and is not running.
4372
4373     """
4374     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4375     assert instance is not None, \
4376       "Cannot retrieve locked instance %s" % self.op.instance_name
4377     _CheckNodeOnline(self, instance.primary_node)
4378
4379     if instance.disk_template == constants.DT_DISKLESS:
4380       raise errors.OpPrereqError("Instance '%s' has no disks" %
4381                                  self.op.instance_name,
4382                                  errors.ECODE_INVAL)
4383     _CheckInstanceDown(self, instance, "cannot reinstall")
4384
4385     self.op.os_type = getattr(self.op, "os_type", None)
4386     self.op.force_variant = getattr(self.op, "force_variant", False)
4387     if self.op.os_type is not None:
4388       # OS verification
4389       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4390       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4391
4392     self.instance = instance
4393
4394   def Exec(self, feedback_fn):
4395     """Reinstall the instance.
4396
4397     """
4398     inst = self.instance
4399
4400     if self.op.os_type is not None:
4401       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4402       inst.os = self.op.os_type
4403       self.cfg.Update(inst, feedback_fn)
4404
4405     _StartInstanceDisks(self, inst, None)
4406     try:
4407       feedback_fn("Running the instance OS create scripts...")
4408       # FIXME: pass debug option from opcode to backend
4409       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4410                                              self.op.debug_level)
4411       result.Raise("Could not install OS for instance %s on node %s" %
4412                    (inst.name, inst.primary_node))
4413     finally:
4414       _ShutdownInstanceDisks(self, inst)
4415
4416
4417 class LURecreateInstanceDisks(LogicalUnit):
4418   """Recreate an instance's missing disks.
4419
4420   """
4421   HPATH = "instance-recreate-disks"
4422   HTYPE = constants.HTYPE_INSTANCE
4423   _OP_REQP = ["instance_name", "disks"]
4424   REQ_BGL = False
4425
4426   def CheckArguments(self):
4427     """Check the arguments.
4428
4429     """
4430     if not isinstance(self.op.disks, list):
4431       raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4432     for item in self.op.disks:
4433       if (not isinstance(item, int) or
4434           item < 0):
4435         raise errors.OpPrereqError("Invalid disk specification '%s'" %
4436                                    str(item), errors.ECODE_INVAL)
4437
4438   def ExpandNames(self):
4439     self._ExpandAndLockInstance()
4440
4441   def BuildHooksEnv(self):
4442     """Build hooks env.
4443
4444     This runs on master, primary and secondary nodes of the instance.
4445
4446     """
4447     env = _BuildInstanceHookEnvByObject(self, self.instance)
4448     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4449     return env, nl, nl
4450
4451   def CheckPrereq(self):
4452     """Check prerequisites.
4453
4454     This checks that the instance is in the cluster and is not running.
4455
4456     """
4457     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4458     assert instance is not None, \
4459       "Cannot retrieve locked instance %s" % self.op.instance_name
4460     _CheckNodeOnline(self, instance.primary_node)
4461
4462     if instance.disk_template == constants.DT_DISKLESS:
4463       raise errors.OpPrereqError("Instance '%s' has no disks" %
4464                                  self.op.instance_name, errors.ECODE_INVAL)
4465     _CheckInstanceDown(self, instance, "cannot recreate disks")
4466
4467     if not self.op.disks:
4468       self.op.disks = range(len(instance.disks))
4469     else:
4470       for idx in self.op.disks:
4471         if idx >= len(instance.disks):
4472           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4473                                      errors.ECODE_INVAL)
4474
4475     self.instance = instance
4476
4477   def Exec(self, feedback_fn):
4478     """Recreate the disks.
4479
4480     """
4481     to_skip = []
4482     for idx, _ in enumerate(self.instance.disks):
4483       if idx not in self.op.disks: # disk idx has not been passed in
4484         to_skip.append(idx)
4485         continue
4486
4487     _CreateDisks(self, self.instance, to_skip=to_skip)
4488
4489
4490 class LURenameInstance(LogicalUnit):
4491   """Rename an instance.
4492
4493   """
4494   HPATH = "instance-rename"
4495   HTYPE = constants.HTYPE_INSTANCE
4496   _OP_REQP = ["instance_name", "new_name"]
4497
4498   def BuildHooksEnv(self):
4499     """Build hooks env.
4500
4501     This runs on master, primary and secondary nodes of the instance.
4502
4503     """
4504     env = _BuildInstanceHookEnvByObject(self, self.instance)
4505     env["INSTANCE_NEW_NAME"] = self.op.new_name
4506     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4507     return env, nl, nl
4508
4509   def CheckPrereq(self):
4510     """Check prerequisites.
4511
4512     This checks that the instance is in the cluster and is not running.
4513
4514     """
4515     self.op.instance_name = _ExpandInstanceName(self.cfg,
4516                                                 self.op.instance_name)
4517     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4518     assert instance is not None
4519     _CheckNodeOnline(self, instance.primary_node)
4520     _CheckInstanceDown(self, instance, "cannot rename")
4521     self.instance = instance
4522
4523     # new name verification
4524     name_info = utils.GetHostInfo(self.op.new_name)
4525
4526     self.op.new_name = new_name = name_info.name
4527     instance_list = self.cfg.GetInstanceList()
4528     if new_name in instance_list:
4529       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4530                                  new_name, errors.ECODE_EXISTS)
4531
4532     if not getattr(self.op, "ignore_ip", False):
4533       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4534         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4535                                    (name_info.ip, new_name),
4536                                    errors.ECODE_NOTUNIQUE)
4537
4538
4539   def Exec(self, feedback_fn):
4540     """Reinstall the instance.
4541
4542     """
4543     inst = self.instance
4544     old_name = inst.name
4545
4546     if inst.disk_template == constants.DT_FILE:
4547       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4548
4549     self.cfg.RenameInstance(inst.name, self.op.new_name)
4550     # Change the instance lock. This is definitely safe while we hold the BGL
4551     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4552     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4553
4554     # re-read the instance from the configuration after rename
4555     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4556
4557     if inst.disk_template == constants.DT_FILE:
4558       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4559       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4560                                                      old_file_storage_dir,
4561                                                      new_file_storage_dir)
4562       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4563                    " (but the instance has been renamed in Ganeti)" %
4564                    (inst.primary_node, old_file_storage_dir,
4565                     new_file_storage_dir))
4566
4567     _StartInstanceDisks(self, inst, None)
4568     try:
4569       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4570                                                  old_name, self.op.debug_level)
4571       msg = result.fail_msg
4572       if msg:
4573         msg = ("Could not run OS rename script for instance %s on node %s"
4574                " (but the instance has been renamed in Ganeti): %s" %
4575                (inst.name, inst.primary_node, msg))
4576         self.proc.LogWarning(msg)
4577     finally:
4578       _ShutdownInstanceDisks(self, inst)
4579
4580
4581 class LURemoveInstance(LogicalUnit):
4582   """Remove an instance.
4583
4584   """
4585   HPATH = "instance-remove"
4586   HTYPE = constants.HTYPE_INSTANCE
4587   _OP_REQP = ["instance_name", "ignore_failures"]
4588   REQ_BGL = False
4589
4590   def CheckArguments(self):
4591     """Check the arguments.
4592
4593     """
4594     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4595                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4596
4597   def ExpandNames(self):
4598     self._ExpandAndLockInstance()
4599     self.needed_locks[locking.LEVEL_NODE] = []
4600     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4601
4602   def DeclareLocks(self, level):
4603     if level == locking.LEVEL_NODE:
4604       self._LockInstancesNodes()
4605
4606   def BuildHooksEnv(self):
4607     """Build hooks env.
4608
4609     This runs on master, primary and secondary nodes of the instance.
4610
4611     """
4612     env = _BuildInstanceHookEnvByObject(self, self.instance)
4613     env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4614     nl = [self.cfg.GetMasterNode()]
4615     nl_post = list(self.instance.all_nodes) + nl
4616     return env, nl, nl_post
4617
4618   def CheckPrereq(self):
4619     """Check prerequisites.
4620
4621     This checks that the instance is in the cluster.
4622
4623     """
4624     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4625     assert self.instance is not None, \
4626       "Cannot retrieve locked instance %s" % self.op.instance_name
4627
4628   def Exec(self, feedback_fn):
4629     """Remove the instance.
4630
4631     """
4632     instance = self.instance
4633     logging.info("Shutting down instance %s on node %s",
4634                  instance.name, instance.primary_node)
4635
4636     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4637                                              self.shutdown_timeout)
4638     msg = result.fail_msg
4639     if msg:
4640       if self.op.ignore_failures:
4641         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4642       else:
4643         raise errors.OpExecError("Could not shutdown instance %s on"
4644                                  " node %s: %s" %
4645                                  (instance.name, instance.primary_node, msg))
4646
4647     logging.info("Removing block devices for instance %s", instance.name)
4648
4649     if not _RemoveDisks(self, instance):
4650       if self.op.ignore_failures:
4651         feedback_fn("Warning: can't remove instance's disks")
4652       else:
4653         raise errors.OpExecError("Can't remove instance's disks")
4654
4655     logging.info("Removing instance %s out of cluster config", instance.name)
4656
4657     self.cfg.RemoveInstance(instance.name)
4658     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4659
4660
4661 class LUQueryInstances(NoHooksLU):
4662   """Logical unit for querying instances.
4663
4664   """
4665   # pylint: disable-msg=W0142
4666   _OP_REQP = ["output_fields", "names", "use_locking"]
4667   REQ_BGL = False
4668   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4669                     "serial_no", "ctime", "mtime", "uuid"]
4670   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4671                                     "admin_state",
4672                                     "disk_template", "ip", "mac", "bridge",
4673                                     "nic_mode", "nic_link",
4674                                     "sda_size", "sdb_size", "vcpus", "tags",
4675                                     "network_port", "beparams",
4676                                     r"(disk)\.(size)/([0-9]+)",
4677                                     r"(disk)\.(sizes)", "disk_usage",
4678                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4679                                     r"(nic)\.(bridge)/([0-9]+)",
4680                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4681                                     r"(disk|nic)\.(count)",
4682                                     "hvparams",
4683                                     ] + _SIMPLE_FIELDS +
4684                                   ["hv/%s" % name
4685                                    for name in constants.HVS_PARAMETERS
4686                                    if name not in constants.HVC_GLOBALS] +
4687                                   ["be/%s" % name
4688                                    for name in constants.BES_PARAMETERS])
4689   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4690
4691
4692   def ExpandNames(self):
4693     _CheckOutputFields(static=self._FIELDS_STATIC,
4694                        dynamic=self._FIELDS_DYNAMIC,
4695                        selected=self.op.output_fields)
4696
4697     self.needed_locks = {}
4698     self.share_locks[locking.LEVEL_INSTANCE] = 1
4699     self.share_locks[locking.LEVEL_NODE] = 1
4700
4701     if self.op.names:
4702       self.wanted = _GetWantedInstances(self, self.op.names)
4703     else:
4704       self.wanted = locking.ALL_SET
4705
4706     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4707     self.do_locking = self.do_node_query and self.op.use_locking
4708     if self.do_locking:
4709       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4710       self.needed_locks[locking.LEVEL_NODE] = []
4711       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4712
4713   def DeclareLocks(self, level):
4714     if level == locking.LEVEL_NODE and self.do_locking:
4715       self._LockInstancesNodes()
4716
4717   def CheckPrereq(self):
4718     """Check prerequisites.
4719
4720     """
4721     pass
4722
4723   def Exec(self, feedback_fn):
4724     """Computes the list of nodes and their attributes.
4725
4726     """
4727     # pylint: disable-msg=R0912
4728     # way too many branches here
4729     all_info = self.cfg.GetAllInstancesInfo()
4730     if self.wanted == locking.ALL_SET:
4731       # caller didn't specify instance names, so ordering is not important
4732       if self.do_locking:
4733         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4734       else:
4735         instance_names = all_info.keys()
4736       instance_names = utils.NiceSort(instance_names)
4737     else:
4738       # caller did specify names, so we must keep the ordering
4739       if self.do_locking:
4740         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4741       else:
4742         tgt_set = all_info.keys()
4743       missing = set(self.wanted).difference(tgt_set)
4744       if missing:
4745         raise errors.OpExecError("Some instances were removed before"
4746                                  " retrieving their data: %s" % missing)
4747       instance_names = self.wanted
4748
4749     instance_list = [all_info[iname] for iname in instance_names]
4750
4751     # begin data gathering
4752
4753     nodes = frozenset([inst.primary_node for inst in instance_list])
4754     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4755
4756     bad_nodes = []
4757     off_nodes = []
4758     if self.do_node_query:
4759       live_data = {}
4760       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4761       for name in nodes:
4762         result = node_data[name]
4763         if result.offline:
4764           # offline nodes will be in both lists
4765           off_nodes.append(name)
4766         if result.fail_msg:
4767           bad_nodes.append(name)
4768         else:
4769           if result.payload:
4770             live_data.update(result.payload)
4771           # else no instance is alive
4772     else:
4773       live_data = dict([(name, {}) for name in instance_names])
4774
4775     # end data gathering
4776
4777     HVPREFIX = "hv/"
4778     BEPREFIX = "be/"
4779     output = []
4780     cluster = self.cfg.GetClusterInfo()
4781     for instance in instance_list:
4782       iout = []
4783       i_hv = cluster.FillHV(instance, skip_globals=True)
4784       i_be = cluster.FillBE(instance)
4785       i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4786                                  nic.nicparams) for nic in instance.nics]
4787       for field in self.op.output_fields:
4788         st_match = self._FIELDS_STATIC.Matches(field)
4789         if field in self._SIMPLE_FIELDS:
4790           val = getattr(instance, field)
4791         elif field == "pnode":
4792           val = instance.primary_node
4793         elif field == "snodes":
4794           val = list(instance.secondary_nodes)
4795         elif field == "admin_state":
4796           val = instance.admin_up
4797         elif field == "oper_state":
4798           if instance.primary_node in bad_nodes:
4799             val = None
4800           else:
4801             val = bool(live_data.get(instance.name))
4802         elif field == "status":
4803           if instance.primary_node in off_nodes:
4804             val = "ERROR_nodeoffline"
4805           elif instance.primary_node in bad_nodes:
4806             val = "ERROR_nodedown"
4807           else:
4808             running = bool(live_data.get(instance.name))
4809             if running:
4810               if instance.admin_up:
4811                 val = "running"
4812               else:
4813                 val = "ERROR_up"
4814             else:
4815               if instance.admin_up:
4816                 val = "ERROR_down"
4817               else:
4818                 val = "ADMIN_down"
4819         elif field == "oper_ram":
4820           if instance.primary_node in bad_nodes:
4821             val = None
4822           elif instance.name in live_data:
4823             val = live_data[instance.name].get("memory", "?")
4824           else:
4825             val = "-"
4826         elif field == "vcpus":
4827           val = i_be[constants.BE_VCPUS]
4828         elif field == "disk_template":
4829           val = instance.disk_template
4830         elif field == "ip":
4831           if instance.nics:
4832             val = instance.nics[0].ip
4833           else:
4834             val = None
4835         elif field == "nic_mode":
4836           if instance.nics:
4837             val = i_nicp[0][constants.NIC_MODE]
4838           else:
4839             val = None
4840         elif field == "nic_link":
4841           if instance.nics:
4842             val = i_nicp[0][constants.NIC_LINK]
4843           else:
4844             val = None
4845         elif field == "bridge":
4846           if (instance.nics and
4847               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4848             val = i_nicp[0][constants.NIC_LINK]
4849           else:
4850             val = None
4851         elif field == "mac":
4852           if instance.nics:
4853             val = instance.nics[0].mac
4854           else:
4855             val = None
4856         elif field == "sda_size" or field == "sdb_size":
4857           idx = ord(field[2]) - ord('a')
4858           try:
4859             val = instance.FindDisk(idx).size
4860           except errors.OpPrereqError:
4861             val = None
4862         elif field == "disk_usage": # total disk usage per node
4863           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4864           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4865         elif field == "tags":
4866           val = list(instance.GetTags())
4867         elif field == "hvparams":
4868           val = i_hv
4869         elif (field.startswith(HVPREFIX) and
4870               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4871               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4872           val = i_hv.get(field[len(HVPREFIX):], None)
4873         elif field == "beparams":
4874           val = i_be
4875         elif (field.startswith(BEPREFIX) and
4876               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4877           val = i_be.get(field[len(BEPREFIX):], None)
4878         elif st_match and st_match.groups():
4879           # matches a variable list
4880           st_groups = st_match.groups()
4881           if st_groups and st_groups[0] == "disk":
4882             if st_groups[1] == "count":
4883               val = len(instance.disks)
4884             elif st_groups[1] == "sizes":
4885               val = [disk.size for disk in instance.disks]
4886             elif st_groups[1] == "size":
4887               try:
4888                 val = instance.FindDisk(st_groups[2]).size
4889               except errors.OpPrereqError:
4890                 val = None
4891             else:
4892               assert False, "Unhandled disk parameter"
4893           elif st_groups[0] == "nic":
4894             if st_groups[1] == "count":
4895               val = len(instance.nics)
4896             elif st_groups[1] == "macs":
4897               val = [nic.mac for nic in instance.nics]
4898             elif st_groups[1] == "ips":
4899               val = [nic.ip for nic in instance.nics]
4900             elif st_groups[1] == "modes":
4901               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4902             elif st_groups[1] == "links":
4903               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4904             elif st_groups[1] == "bridges":
4905               val = []
4906               for nicp in i_nicp:
4907                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4908                   val.append(nicp[constants.NIC_LINK])
4909                 else:
4910                   val.append(None)
4911             else:
4912               # index-based item
4913               nic_idx = int(st_groups[2])
4914               if nic_idx >= len(instance.nics):
4915                 val = None
4916               else:
4917                 if st_groups[1] == "mac":
4918                   val = instance.nics[nic_idx].mac
4919                 elif st_groups[1] == "ip":
4920                   val = instance.nics[nic_idx].ip
4921                 elif st_groups[1] == "mode":
4922                   val = i_nicp[nic_idx][constants.NIC_MODE]
4923                 elif st_groups[1] == "link":
4924                   val = i_nicp[nic_idx][constants.NIC_LINK]
4925                 elif st_groups[1] == "bridge":
4926                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4927                   if nic_mode == constants.NIC_MODE_BRIDGED:
4928                     val = i_nicp[nic_idx][constants.NIC_LINK]
4929                   else:
4930                     val = None
4931                 else:
4932                   assert False, "Unhandled NIC parameter"
4933           else:
4934             assert False, ("Declared but unhandled variable parameter '%s'" %
4935                            field)
4936         else:
4937           assert False, "Declared but unhandled parameter '%s'" % field
4938         iout.append(val)
4939       output.append(iout)
4940
4941     return output
4942
4943
4944 class LUFailoverInstance(LogicalUnit):
4945   """Failover an instance.
4946
4947   """
4948   HPATH = "instance-failover"
4949   HTYPE = constants.HTYPE_INSTANCE
4950   _OP_REQP = ["instance_name", "ignore_consistency"]
4951   REQ_BGL = False
4952
4953   def CheckArguments(self):
4954     """Check the arguments.
4955
4956     """
4957     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4958                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4959
4960   def ExpandNames(self):
4961     self._ExpandAndLockInstance()
4962     self.needed_locks[locking.LEVEL_NODE] = []
4963     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4964
4965   def DeclareLocks(self, level):
4966     if level == locking.LEVEL_NODE:
4967       self._LockInstancesNodes()
4968
4969   def BuildHooksEnv(self):
4970     """Build hooks env.
4971
4972     This runs on master, primary and secondary nodes of the instance.
4973
4974     """
4975     instance = self.instance
4976     source_node = instance.primary_node
4977     target_node = instance.secondary_nodes[0]
4978     env = {
4979       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4980       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4981       "OLD_PRIMARY": source_node,
4982       "OLD_SECONDARY": target_node,
4983       "NEW_PRIMARY": target_node,
4984       "NEW_SECONDARY": source_node,
4985       }
4986     env.update(_BuildInstanceHookEnvByObject(self, instance))
4987     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4988     nl_post = list(nl)
4989     nl_post.append(source_node)
4990     return env, nl, nl_post
4991
4992   def CheckPrereq(self):
4993     """Check prerequisites.
4994
4995     This checks that the instance is in the cluster.
4996
4997     """
4998     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4999     assert self.instance is not None, \
5000       "Cannot retrieve locked instance %s" % self.op.instance_name
5001
5002     bep = self.cfg.GetClusterInfo().FillBE(instance)
5003     if instance.disk_template not in constants.DTS_NET_MIRROR:
5004       raise errors.OpPrereqError("Instance's disk layout is not"
5005                                  " network mirrored, cannot failover.",
5006                                  errors.ECODE_STATE)
5007
5008     secondary_nodes = instance.secondary_nodes
5009     if not secondary_nodes:
5010       raise errors.ProgrammerError("no secondary node but using "
5011                                    "a mirrored disk template")
5012
5013     target_node = secondary_nodes[0]
5014     _CheckNodeOnline(self, target_node)
5015     _CheckNodeNotDrained(self, target_node)
5016     if instance.admin_up:
5017       # check memory requirements on the secondary node
5018       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5019                            instance.name, bep[constants.BE_MEMORY],
5020                            instance.hypervisor)
5021     else:
5022       self.LogInfo("Not checking memory on the secondary node as"
5023                    " instance will not be started")
5024
5025     # check bridge existance
5026     _CheckInstanceBridgesExist(self, instance, node=target_node)
5027
5028   def Exec(self, feedback_fn):
5029     """Failover an instance.
5030
5031     The failover is done by shutting it down on its present node and
5032     starting it on the secondary.
5033
5034     """
5035     instance = self.instance
5036
5037     source_node = instance.primary_node
5038     target_node = instance.secondary_nodes[0]
5039
5040     if instance.admin_up:
5041       feedback_fn("* checking disk consistency between source and target")
5042       for dev in instance.disks:
5043         # for drbd, these are drbd over lvm
5044         if not _CheckDiskConsistency(self, dev, target_node, False):
5045           if not self.op.ignore_consistency:
5046             raise errors.OpExecError("Disk %s is degraded on target node,"
5047                                      " aborting failover." % dev.iv_name)
5048     else:
5049       feedback_fn("* not checking disk consistency as instance is not running")
5050
5051     feedback_fn("* shutting down instance on source node")
5052     logging.info("Shutting down instance %s on node %s",
5053                  instance.name, source_node)
5054
5055     result = self.rpc.call_instance_shutdown(source_node, instance,
5056                                              self.shutdown_timeout)
5057     msg = result.fail_msg
5058     if msg:
5059       if self.op.ignore_consistency:
5060         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5061                              " Proceeding anyway. Please make sure node"
5062                              " %s is down. Error details: %s",
5063                              instance.name, source_node, source_node, msg)
5064       else:
5065         raise errors.OpExecError("Could not shutdown instance %s on"
5066                                  " node %s: %s" %
5067                                  (instance.name, source_node, msg))
5068
5069     feedback_fn("* deactivating the instance's disks on source node")
5070     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5071       raise errors.OpExecError("Can't shut down the instance's disks.")
5072
5073     instance.primary_node = target_node
5074     # distribute new instance config to the other nodes
5075     self.cfg.Update(instance, feedback_fn)
5076
5077     # Only start the instance if it's marked as up
5078     if instance.admin_up:
5079       feedback_fn("* activating the instance's disks on target node")
5080       logging.info("Starting instance %s on node %s",
5081                    instance.name, target_node)
5082
5083       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5084                                                ignore_secondaries=True)
5085       if not disks_ok:
5086         _ShutdownInstanceDisks(self, instance)
5087         raise errors.OpExecError("Can't activate the instance's disks")
5088
5089       feedback_fn("* starting the instance on the target node")
5090       result = self.rpc.call_instance_start(target_node, instance, None, None)
5091       msg = result.fail_msg
5092       if msg:
5093         _ShutdownInstanceDisks(self, instance)
5094         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5095                                  (instance.name, target_node, msg))
5096
5097
5098 class LUMigrateInstance(LogicalUnit):
5099   """Migrate an instance.
5100
5101   This is migration without shutting down, compared to the failover,
5102   which is done with shutdown.
5103
5104   """
5105   HPATH = "instance-migrate"
5106   HTYPE = constants.HTYPE_INSTANCE
5107   _OP_REQP = ["instance_name", "live", "cleanup"]
5108
5109   REQ_BGL = False
5110
5111   def ExpandNames(self):
5112     self._ExpandAndLockInstance()
5113
5114     self.needed_locks[locking.LEVEL_NODE] = []
5115     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5116
5117     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5118                                        self.op.live, self.op.cleanup)
5119     self.tasklets = [self._migrater]
5120
5121   def DeclareLocks(self, level):
5122     if level == locking.LEVEL_NODE:
5123       self._LockInstancesNodes()
5124
5125   def BuildHooksEnv(self):
5126     """Build hooks env.
5127
5128     This runs on master, primary and secondary nodes of the instance.
5129
5130     """
5131     instance = self._migrater.instance
5132     source_node = instance.primary_node
5133     target_node = instance.secondary_nodes[0]
5134     env = _BuildInstanceHookEnvByObject(self, instance)
5135     env["MIGRATE_LIVE"] = self.op.live
5136     env["MIGRATE_CLEANUP"] = self.op.cleanup
5137     env.update({
5138         "OLD_PRIMARY": source_node,
5139         "OLD_SECONDARY": target_node,
5140         "NEW_PRIMARY": target_node,
5141         "NEW_SECONDARY": source_node,
5142         })
5143     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5144     nl_post = list(nl)
5145     nl_post.append(source_node)
5146     return env, nl, nl_post
5147
5148
5149 class LUMoveInstance(LogicalUnit):
5150   """Move an instance by data-copying.
5151
5152   """
5153   HPATH = "instance-move"
5154   HTYPE = constants.HTYPE_INSTANCE
5155   _OP_REQP = ["instance_name", "target_node"]
5156   REQ_BGL = False
5157
5158   def CheckArguments(self):
5159     """Check the arguments.
5160
5161     """
5162     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5163                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
5164
5165   def ExpandNames(self):
5166     self._ExpandAndLockInstance()
5167     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5168     self.op.target_node = target_node
5169     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5170     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5171
5172   def DeclareLocks(self, level):
5173     if level == locking.LEVEL_NODE:
5174       self._LockInstancesNodes(primary_only=True)
5175
5176   def BuildHooksEnv(self):
5177     """Build hooks env.
5178
5179     This runs on master, primary and secondary nodes of the instance.
5180
5181     """
5182     env = {
5183       "TARGET_NODE": self.op.target_node,
5184       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5185       }
5186     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5187     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5188                                        self.op.target_node]
5189     return env, nl, nl
5190
5191   def CheckPrereq(self):
5192     """Check prerequisites.
5193
5194     This checks that the instance is in the cluster.
5195
5196     """
5197     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5198     assert self.instance is not None, \
5199       "Cannot retrieve locked instance %s" % self.op.instance_name
5200
5201     node = self.cfg.GetNodeInfo(self.op.target_node)
5202     assert node is not None, \
5203       "Cannot retrieve locked node %s" % self.op.target_node
5204
5205     self.target_node = target_node = node.name
5206
5207     if target_node == instance.primary_node:
5208       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5209                                  (instance.name, target_node),
5210                                  errors.ECODE_STATE)
5211
5212     bep = self.cfg.GetClusterInfo().FillBE(instance)
5213
5214     for idx, dsk in enumerate(instance.disks):
5215       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5216         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5217                                    " cannot copy" % idx, errors.ECODE_STATE)
5218
5219     _CheckNodeOnline(self, target_node)
5220     _CheckNodeNotDrained(self, target_node)
5221
5222     if instance.admin_up:
5223       # check memory requirements on the secondary node
5224       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5225                            instance.name, bep[constants.BE_MEMORY],
5226                            instance.hypervisor)
5227     else:
5228       self.LogInfo("Not checking memory on the secondary node as"
5229                    " instance will not be started")
5230
5231     # check bridge existance
5232     _CheckInstanceBridgesExist(self, instance, node=target_node)
5233
5234   def Exec(self, feedback_fn):
5235     """Move an instance.
5236
5237     The move is done by shutting it down on its present node, copying
5238     the data over (slow) and starting it on the new node.
5239
5240     """
5241     instance = self.instance
5242
5243     source_node = instance.primary_node
5244     target_node = self.target_node
5245
5246     self.LogInfo("Shutting down instance %s on source node %s",
5247                  instance.name, source_node)
5248
5249     result = self.rpc.call_instance_shutdown(source_node, instance,
5250                                              self.shutdown_timeout)
5251     msg = result.fail_msg
5252     if msg:
5253       if self.op.ignore_consistency:
5254         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5255                              " Proceeding anyway. Please make sure node"
5256                              " %s is down. Error details: %s",
5257                              instance.name, source_node, source_node, msg)
5258       else:
5259         raise errors.OpExecError("Could not shutdown instance %s on"
5260                                  " node %s: %s" %
5261                                  (instance.name, source_node, msg))
5262
5263     # create the target disks
5264     try:
5265       _CreateDisks(self, instance, target_node=target_node)
5266     except errors.OpExecError:
5267       self.LogWarning("Device creation failed, reverting...")
5268       try:
5269         _RemoveDisks(self, instance, target_node=target_node)
5270       finally:
5271         self.cfg.ReleaseDRBDMinors(instance.name)
5272         raise
5273
5274     cluster_name = self.cfg.GetClusterInfo().cluster_name
5275
5276     errs = []
5277     # activate, get path, copy the data over
5278     for idx, disk in enumerate(instance.disks):
5279       self.LogInfo("Copying data for disk %d", idx)
5280       result = self.rpc.call_blockdev_assemble(target_node, disk,
5281                                                instance.name, True)
5282       if result.fail_msg:
5283         self.LogWarning("Can't assemble newly created disk %d: %s",
5284                         idx, result.fail_msg)
5285         errs.append(result.fail_msg)
5286         break
5287       dev_path = result.payload
5288       result = self.rpc.call_blockdev_export(source_node, disk,
5289                                              target_node, dev_path,
5290                                              cluster_name)
5291       if result.fail_msg:
5292         self.LogWarning("Can't copy data over for disk %d: %s",
5293                         idx, result.fail_msg)
5294         errs.append(result.fail_msg)
5295         break
5296
5297     if errs:
5298       self.LogWarning("Some disks failed to copy, aborting")
5299       try:
5300         _RemoveDisks(self, instance, target_node=target_node)
5301       finally:
5302         self.cfg.ReleaseDRBDMinors(instance.name)
5303         raise errors.OpExecError("Errors during disk copy: %s" %
5304                                  (",".join(errs),))
5305
5306     instance.primary_node = target_node
5307     self.cfg.Update(instance, feedback_fn)
5308
5309     self.LogInfo("Removing the disks on the original node")
5310     _RemoveDisks(self, instance, target_node=source_node)
5311
5312     # Only start the instance if it's marked as up
5313     if instance.admin_up:
5314       self.LogInfo("Starting instance %s on node %s",
5315                    instance.name, target_node)
5316
5317       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5318                                            ignore_secondaries=True)
5319       if not disks_ok:
5320         _ShutdownInstanceDisks(self, instance)
5321         raise errors.OpExecError("Can't activate the instance's disks")
5322
5323       result = self.rpc.call_instance_start(target_node, instance, None, None)
5324       msg = result.fail_msg
5325       if msg:
5326         _ShutdownInstanceDisks(self, instance)
5327         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5328                                  (instance.name, target_node, msg))
5329
5330
5331 class LUMigrateNode(LogicalUnit):
5332   """Migrate all instances from a node.
5333
5334   """
5335   HPATH = "node-migrate"
5336   HTYPE = constants.HTYPE_NODE
5337   _OP_REQP = ["node_name", "live"]
5338   REQ_BGL = False
5339
5340   def ExpandNames(self):
5341     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5342
5343     self.needed_locks = {
5344       locking.LEVEL_NODE: [self.op.node_name],
5345       }
5346
5347     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5348
5349     # Create tasklets for migrating instances for all instances on this node
5350     names = []
5351     tasklets = []
5352
5353     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5354       logging.debug("Migrating instance %s", inst.name)
5355       names.append(inst.name)
5356
5357       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5358
5359     self.tasklets = tasklets
5360
5361     # Declare instance locks
5362     self.needed_locks[locking.LEVEL_INSTANCE] = names
5363
5364   def DeclareLocks(self, level):
5365     if level == locking.LEVEL_NODE:
5366       self._LockInstancesNodes()
5367
5368   def BuildHooksEnv(self):
5369     """Build hooks env.
5370
5371     This runs on the master, the primary and all the secondaries.
5372
5373     """
5374     env = {
5375       "NODE_NAME": self.op.node_name,
5376       }
5377
5378     nl = [self.cfg.GetMasterNode()]
5379
5380     return (env, nl, nl)
5381
5382
5383 class TLMigrateInstance(Tasklet):
5384   def __init__(self, lu, instance_name, live, cleanup):
5385     """Initializes this class.
5386
5387     """
5388     Tasklet.__init__(self, lu)
5389
5390     # Parameters
5391     self.instance_name = instance_name
5392     self.live = live
5393     self.cleanup = cleanup
5394
5395   def CheckPrereq(self):
5396     """Check prerequisites.
5397
5398     This checks that the instance is in the cluster.
5399
5400     """
5401     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5402     instance = self.cfg.GetInstanceInfo(instance_name)
5403     assert instance is not None
5404
5405     if instance.disk_template != constants.DT_DRBD8:
5406       raise errors.OpPrereqError("Instance's disk layout is not"
5407                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5408
5409     secondary_nodes = instance.secondary_nodes
5410     if not secondary_nodes:
5411       raise errors.ConfigurationError("No secondary node but using"
5412                                       " drbd8 disk template")
5413
5414     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5415
5416     target_node = secondary_nodes[0]
5417     # check memory requirements on the secondary node
5418     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5419                          instance.name, i_be[constants.BE_MEMORY],
5420                          instance.hypervisor)
5421
5422     # check bridge existance
5423     _CheckInstanceBridgesExist(self, instance, node=target_node)
5424
5425     if not self.cleanup:
5426       _CheckNodeNotDrained(self, target_node)
5427       result = self.rpc.call_instance_migratable(instance.primary_node,
5428                                                  instance)
5429       result.Raise("Can't migrate, please use failover",
5430                    prereq=True, ecode=errors.ECODE_STATE)
5431
5432     self.instance = instance
5433
5434   def _WaitUntilSync(self):
5435     """Poll with custom rpc for disk sync.
5436
5437     This uses our own step-based rpc call.
5438
5439     """
5440     self.feedback_fn("* wait until resync is done")
5441     all_done = False
5442     while not all_done:
5443       all_done = True
5444       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5445                                             self.nodes_ip,
5446                                             self.instance.disks)
5447       min_percent = 100
5448       for node, nres in result.items():
5449         nres.Raise("Cannot resync disks on node %s" % node)
5450         node_done, node_percent = nres.payload
5451         all_done = all_done and node_done
5452         if node_percent is not None:
5453           min_percent = min(min_percent, node_percent)
5454       if not all_done:
5455         if min_percent < 100:
5456           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5457         time.sleep(2)
5458
5459   def _EnsureSecondary(self, node):
5460     """Demote a node to secondary.
5461
5462     """
5463     self.feedback_fn("* switching node %s to secondary mode" % node)
5464
5465     for dev in self.instance.disks:
5466       self.cfg.SetDiskID(dev, node)
5467
5468     result = self.rpc.call_blockdev_close(node, self.instance.name,
5469                                           self.instance.disks)
5470     result.Raise("Cannot change disk to secondary on node %s" % node)
5471
5472   def _GoStandalone(self):
5473     """Disconnect from the network.
5474
5475     """
5476     self.feedback_fn("* changing into standalone mode")
5477     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5478                                                self.instance.disks)
5479     for node, nres in result.items():
5480       nres.Raise("Cannot disconnect disks node %s" % node)
5481
5482   def _GoReconnect(self, multimaster):
5483     """Reconnect to the network.
5484
5485     """
5486     if multimaster:
5487       msg = "dual-master"
5488     else:
5489       msg = "single-master"
5490     self.feedback_fn("* changing disks into %s mode" % msg)
5491     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5492                                            self.instance.disks,
5493                                            self.instance.name, multimaster)
5494     for node, nres in result.items():
5495       nres.Raise("Cannot change disks config on node %s" % node)
5496
5497   def _ExecCleanup(self):
5498     """Try to cleanup after a failed migration.
5499
5500     The cleanup is done by:
5501       - check that the instance is running only on one node
5502         (and update the config if needed)
5503       - change disks on its secondary node to secondary
5504       - wait until disks are fully synchronized
5505       - disconnect from the network
5506       - change disks into single-master mode
5507       - wait again until disks are fully synchronized
5508
5509     """
5510     instance = self.instance
5511     target_node = self.target_node
5512     source_node = self.source_node
5513
5514     # check running on only one node
5515     self.feedback_fn("* checking where the instance actually runs"
5516                      " (if this hangs, the hypervisor might be in"
5517                      " a bad state)")
5518     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5519     for node, result in ins_l.items():
5520       result.Raise("Can't contact node %s" % node)
5521
5522     runningon_source = instance.name in ins_l[source_node].payload
5523     runningon_target = instance.name in ins_l[target_node].payload
5524
5525     if runningon_source and runningon_target:
5526       raise errors.OpExecError("Instance seems to be running on two nodes,"
5527                                " or the hypervisor is confused. You will have"
5528                                " to ensure manually that it runs only on one"
5529                                " and restart this operation.")
5530
5531     if not (runningon_source or runningon_target):
5532       raise errors.OpExecError("Instance does not seem to be running at all."
5533                                " In this case, it's safer to repair by"
5534                                " running 'gnt-instance stop' to ensure disk"
5535                                " shutdown, and then restarting it.")
5536
5537     if runningon_target:
5538       # the migration has actually succeeded, we need to update the config
5539       self.feedback_fn("* instance running on secondary node (%s),"
5540                        " updating config" % target_node)
5541       instance.primary_node = target_node
5542       self.cfg.Update(instance, self.feedback_fn)
5543       demoted_node = source_node
5544     else:
5545       self.feedback_fn("* instance confirmed to be running on its"
5546                        " primary node (%s)" % source_node)
5547       demoted_node = target_node
5548
5549     self._EnsureSecondary(demoted_node)
5550     try:
5551       self._WaitUntilSync()
5552     except errors.OpExecError:
5553       # we ignore here errors, since if the device is standalone, it
5554       # won't be able to sync
5555       pass
5556     self._GoStandalone()
5557     self._GoReconnect(False)
5558     self._WaitUntilSync()
5559
5560     self.feedback_fn("* done")
5561
5562   def _RevertDiskStatus(self):
5563     """Try to revert the disk status after a failed migration.
5564
5565     """
5566     target_node = self.target_node
5567     try:
5568       self._EnsureSecondary(target_node)
5569       self._GoStandalone()
5570       self._GoReconnect(False)
5571       self._WaitUntilSync()
5572     except errors.OpExecError, err:
5573       self.lu.LogWarning("Migration failed and I can't reconnect the"
5574                          " drives: error '%s'\n"
5575                          "Please look and recover the instance status" %
5576                          str(err))
5577
5578   def _AbortMigration(self):
5579     """Call the hypervisor code to abort a started migration.
5580
5581     """
5582     instance = self.instance
5583     target_node = self.target_node
5584     migration_info = self.migration_info
5585
5586     abort_result = self.rpc.call_finalize_migration(target_node,
5587                                                     instance,
5588                                                     migration_info,
5589                                                     False)
5590     abort_msg = abort_result.fail_msg
5591     if abort_msg:
5592       logging.error("Aborting migration failed on target node %s: %s",
5593                     target_node, abort_msg)
5594       # Don't raise an exception here, as we stil have to try to revert the
5595       # disk status, even if this step failed.
5596
5597   def _ExecMigration(self):
5598     """Migrate an instance.
5599
5600     The migrate is done by:
5601       - change the disks into dual-master mode
5602       - wait until disks are fully synchronized again
5603       - migrate the instance
5604       - change disks on the new secondary node (the old primary) to secondary
5605       - wait until disks are fully synchronized
5606       - change disks into single-master mode
5607
5608     """
5609     instance = self.instance
5610     target_node = self.target_node
5611     source_node = self.source_node
5612
5613     self.feedback_fn("* checking disk consistency between source and target")
5614     for dev in instance.disks:
5615       if not _CheckDiskConsistency(self, dev, target_node, False):
5616         raise errors.OpExecError("Disk %s is degraded or not fully"
5617                                  " synchronized on target node,"
5618                                  " aborting migrate." % dev.iv_name)
5619
5620     # First get the migration information from the remote node
5621     result = self.rpc.call_migration_info(source_node, instance)
5622     msg = result.fail_msg
5623     if msg:
5624       log_err = ("Failed fetching source migration information from %s: %s" %
5625                  (source_node, msg))
5626       logging.error(log_err)
5627       raise errors.OpExecError(log_err)
5628
5629     self.migration_info = migration_info = result.payload
5630
5631     # Then switch the disks to master/master mode
5632     self._EnsureSecondary(target_node)
5633     self._GoStandalone()
5634     self._GoReconnect(True)
5635     self._WaitUntilSync()
5636
5637     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5638     result = self.rpc.call_accept_instance(target_node,
5639                                            instance,
5640                                            migration_info,
5641                                            self.nodes_ip[target_node])
5642
5643     msg = result.fail_msg
5644     if msg:
5645       logging.error("Instance pre-migration failed, trying to revert"
5646                     " disk status: %s", msg)
5647       self.feedback_fn("Pre-migration failed, aborting")
5648       self._AbortMigration()
5649       self._RevertDiskStatus()
5650       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5651                                (instance.name, msg))
5652
5653     self.feedback_fn("* migrating instance to %s" % target_node)
5654     time.sleep(10)
5655     result = self.rpc.call_instance_migrate(source_node, instance,
5656                                             self.nodes_ip[target_node],
5657                                             self.live)
5658     msg = result.fail_msg
5659     if msg:
5660       logging.error("Instance migration failed, trying to revert"
5661                     " disk status: %s", msg)
5662       self.feedback_fn("Migration failed, aborting")
5663       self._AbortMigration()
5664       self._RevertDiskStatus()
5665       raise errors.OpExecError("Could not migrate instance %s: %s" %
5666                                (instance.name, msg))
5667     time.sleep(10)
5668
5669     instance.primary_node = target_node
5670     # distribute new instance config to the other nodes
5671     self.cfg.Update(instance, self.feedback_fn)
5672
5673     result = self.rpc.call_finalize_migration(target_node,
5674                                               instance,
5675                                               migration_info,
5676                                               True)
5677     msg = result.fail_msg
5678     if msg:
5679       logging.error("Instance migration succeeded, but finalization failed:"
5680                     " %s", msg)
5681       raise errors.OpExecError("Could not finalize instance migration: %s" %
5682                                msg)
5683
5684     self._EnsureSecondary(source_node)
5685     self._WaitUntilSync()
5686     self._GoStandalone()
5687     self._GoReconnect(False)
5688     self._WaitUntilSync()
5689
5690     self.feedback_fn("* done")
5691
5692   def Exec(self, feedback_fn):
5693     """Perform the migration.
5694
5695     """
5696     feedback_fn("Migrating instance %s" % self.instance.name)
5697
5698     self.feedback_fn = feedback_fn
5699
5700     self.source_node = self.instance.primary_node
5701     self.target_node = self.instance.secondary_nodes[0]
5702     self.all_nodes = [self.source_node, self.target_node]
5703     self.nodes_ip = {
5704       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5705       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5706       }
5707
5708     if self.cleanup:
5709       return self._ExecCleanup()
5710     else:
5711       return self._ExecMigration()
5712
5713
5714 def _CreateBlockDev(lu, node, instance, device, force_create,
5715                     info, force_open):
5716   """Create a tree of block devices on a given node.
5717
5718   If this device type has to be created on secondaries, create it and
5719   all its children.
5720
5721   If not, just recurse to children keeping the same 'force' value.
5722
5723   @param lu: the lu on whose behalf we execute
5724   @param node: the node on which to create the device
5725   @type instance: L{objects.Instance}
5726   @param instance: the instance which owns the device
5727   @type device: L{objects.Disk}
5728   @param device: the device to create
5729   @type force_create: boolean
5730   @param force_create: whether to force creation of this device; this
5731       will be change to True whenever we find a device which has
5732       CreateOnSecondary() attribute
5733   @param info: the extra 'metadata' we should attach to the device
5734       (this will be represented as a LVM tag)
5735   @type force_open: boolean
5736   @param force_open: this parameter will be passes to the
5737       L{backend.BlockdevCreate} function where it specifies
5738       whether we run on primary or not, and it affects both
5739       the child assembly and the device own Open() execution
5740
5741   """
5742   if device.CreateOnSecondary():
5743     force_create = True
5744
5745   if device.children:
5746     for child in device.children:
5747       _CreateBlockDev(lu, node, instance, child, force_create,
5748                       info, force_open)
5749
5750   if not force_create:
5751     return
5752
5753   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5754
5755
5756 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5757   """Create a single block device on a given node.
5758
5759   This will not recurse over children of the device, so they must be
5760   created in advance.
5761
5762   @param lu: the lu on whose behalf we execute
5763   @param node: the node on which to create the device
5764   @type instance: L{objects.Instance}
5765   @param instance: the instance which owns the device
5766   @type device: L{objects.Disk}
5767   @param device: the device to create
5768   @param info: the extra 'metadata' we should attach to the device
5769       (this will be represented as a LVM tag)
5770   @type force_open: boolean
5771   @param force_open: this parameter will be passes to the
5772       L{backend.BlockdevCreate} function where it specifies
5773       whether we run on primary or not, and it affects both
5774       the child assembly and the device own Open() execution
5775
5776   """
5777   lu.cfg.SetDiskID(device, node)
5778   result = lu.rpc.call_blockdev_create(node, device, device.size,
5779                                        instance.name, force_open, info)
5780   result.Raise("Can't create block device %s on"
5781                " node %s for instance %s" % (device, node, instance.name))
5782   if device.physical_id is None:
5783     device.physical_id = result.payload
5784
5785
5786 def _GenerateUniqueNames(lu, exts):
5787   """Generate a suitable LV name.
5788
5789   This will generate a logical volume name for the given instance.
5790
5791   """
5792   results = []
5793   for val in exts:
5794     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5795     results.append("%s%s" % (new_id, val))
5796   return results
5797
5798
5799 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5800                          p_minor, s_minor):
5801   """Generate a drbd8 device complete with its children.
5802
5803   """
5804   port = lu.cfg.AllocatePort()
5805   vgname = lu.cfg.GetVGName()
5806   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5807   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5808                           logical_id=(vgname, names[0]))
5809   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5810                           logical_id=(vgname, names[1]))
5811   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5812                           logical_id=(primary, secondary, port,
5813                                       p_minor, s_minor,
5814                                       shared_secret),
5815                           children=[dev_data, dev_meta],
5816                           iv_name=iv_name)
5817   return drbd_dev
5818
5819
5820 def _GenerateDiskTemplate(lu, template_name,
5821                           instance_name, primary_node,
5822                           secondary_nodes, disk_info,
5823                           file_storage_dir, file_driver,
5824                           base_index):
5825   """Generate the entire disk layout for a given template type.
5826
5827   """
5828   #TODO: compute space requirements
5829
5830   vgname = lu.cfg.GetVGName()
5831   disk_count = len(disk_info)
5832   disks = []
5833   if template_name == constants.DT_DISKLESS:
5834     pass
5835   elif template_name == constants.DT_PLAIN:
5836     if len(secondary_nodes) != 0:
5837       raise errors.ProgrammerError("Wrong template configuration")
5838
5839     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5840                                       for i in range(disk_count)])
5841     for idx, disk in enumerate(disk_info):
5842       disk_index = idx + base_index
5843       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5844                               logical_id=(vgname, names[idx]),
5845                               iv_name="disk/%d" % disk_index,
5846                               mode=disk["mode"])
5847       disks.append(disk_dev)
5848   elif template_name == constants.DT_DRBD8:
5849     if len(secondary_nodes) != 1:
5850       raise errors.ProgrammerError("Wrong template configuration")
5851     remote_node = secondary_nodes[0]
5852     minors = lu.cfg.AllocateDRBDMinor(
5853       [primary_node, remote_node] * len(disk_info), instance_name)
5854
5855     names = []
5856     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5857                                                for i in range(disk_count)]):
5858       names.append(lv_prefix + "_data")
5859       names.append(lv_prefix + "_meta")
5860     for idx, disk in enumerate(disk_info):
5861       disk_index = idx + base_index
5862       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5863                                       disk["size"], names[idx*2:idx*2+2],
5864                                       "disk/%d" % disk_index,
5865                                       minors[idx*2], minors[idx*2+1])
5866       disk_dev.mode = disk["mode"]
5867       disks.append(disk_dev)
5868   elif template_name == constants.DT_FILE:
5869     if len(secondary_nodes) != 0:
5870       raise errors.ProgrammerError("Wrong template configuration")
5871
5872     _RequireFileStorage()
5873
5874     for idx, disk in enumerate(disk_info):
5875       disk_index = idx + base_index
5876       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5877                               iv_name="disk/%d" % disk_index,
5878                               logical_id=(file_driver,
5879                                           "%s/disk%d" % (file_storage_dir,
5880                                                          disk_index)),
5881                               mode=disk["mode"])
5882       disks.append(disk_dev)
5883   else:
5884     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5885   return disks
5886
5887
5888 def _GetInstanceInfoText(instance):
5889   """Compute that text that should be added to the disk's metadata.
5890
5891   """
5892   return "originstname+%s" % instance.name
5893
5894
5895 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5896   """Create all disks for an instance.
5897
5898   This abstracts away some work from AddInstance.
5899
5900   @type lu: L{LogicalUnit}
5901   @param lu: the logical unit on whose behalf we execute
5902   @type instance: L{objects.Instance}
5903   @param instance: the instance whose disks we should create
5904   @type to_skip: list
5905   @param to_skip: list of indices to skip
5906   @type target_node: string
5907   @param target_node: if passed, overrides the target node for creation
5908   @rtype: boolean
5909   @return: the success of the creation
5910
5911   """
5912   info = _GetInstanceInfoText(instance)
5913   if target_node is None:
5914     pnode = instance.primary_node
5915     all_nodes = instance.all_nodes
5916   else:
5917     pnode = target_node
5918     all_nodes = [pnode]
5919
5920   if instance.disk_template == constants.DT_FILE:
5921     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5922     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5923
5924     result.Raise("Failed to create directory '%s' on"
5925                  " node %s" % (file_storage_dir, pnode))
5926
5927   # Note: this needs to be kept in sync with adding of disks in
5928   # LUSetInstanceParams
5929   for idx, device in enumerate(instance.disks):
5930     if to_skip and idx in to_skip:
5931       continue
5932     logging.info("Creating volume %s for instance %s",
5933                  device.iv_name, instance.name)
5934     #HARDCODE
5935     for node in all_nodes:
5936       f_create = node == pnode
5937       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5938
5939
5940 def _RemoveDisks(lu, instance, target_node=None):
5941   """Remove all disks for an instance.
5942
5943   This abstracts away some work from `AddInstance()` and
5944   `RemoveInstance()`. Note that in case some of the devices couldn't
5945   be removed, the removal will continue with the other ones (compare
5946   with `_CreateDisks()`).
5947
5948   @type lu: L{LogicalUnit}
5949   @param lu: the logical unit on whose behalf we execute
5950   @type instance: L{objects.Instance}
5951   @param instance: the instance whose disks we should remove
5952   @type target_node: string
5953   @param target_node: used to override the node on which to remove the disks
5954   @rtype: boolean
5955   @return: the success of the removal
5956
5957   """
5958   logging.info("Removing block devices for instance %s", instance.name)
5959
5960   all_result = True
5961   for device in instance.disks:
5962     if target_node:
5963       edata = [(target_node, device)]
5964     else:
5965       edata = device.ComputeNodeTree(instance.primary_node)
5966     for node, disk in edata:
5967       lu.cfg.SetDiskID(disk, node)
5968       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5969       if msg:
5970         lu.LogWarning("Could not remove block device %s on node %s,"
5971                       " continuing anyway: %s", device.iv_name, node, msg)
5972         all_result = False
5973
5974   if instance.disk_template == constants.DT_FILE:
5975     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5976     if target_node:
5977       tgt = target_node
5978     else:
5979       tgt = instance.primary_node
5980     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5981     if result.fail_msg:
5982       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5983                     file_storage_dir, instance.primary_node, result.fail_msg)
5984       all_result = False
5985
5986   return all_result
5987
5988
5989 def _ComputeDiskSize(disk_template, disks):
5990   """Compute disk size requirements in the volume group
5991
5992   """
5993   # Required free disk space as a function of disk and swap space
5994   req_size_dict = {
5995     constants.DT_DISKLESS: None,
5996     constants.DT_PLAIN: sum(d["size"] for d in disks),
5997     # 128 MB are added for drbd metadata for each disk
5998     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5999     constants.DT_FILE: None,
6000   }
6001
6002   if disk_template not in req_size_dict:
6003     raise errors.ProgrammerError("Disk template '%s' size requirement"
6004                                  " is unknown" %  disk_template)
6005
6006   return req_size_dict[disk_template]
6007
6008
6009 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6010   """Hypervisor parameter validation.
6011
6012   This function abstract the hypervisor parameter validation to be
6013   used in both instance create and instance modify.
6014
6015   @type lu: L{LogicalUnit}
6016   @param lu: the logical unit for which we check
6017   @type nodenames: list
6018   @param nodenames: the list of nodes on which we should check
6019   @type hvname: string
6020   @param hvname: the name of the hypervisor we should use
6021   @type hvparams: dict
6022   @param hvparams: the parameters which we need to check
6023   @raise errors.OpPrereqError: if the parameters are not valid
6024
6025   """
6026   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6027                                                   hvname,
6028                                                   hvparams)
6029   for node in nodenames:
6030     info = hvinfo[node]
6031     if info.offline:
6032       continue
6033     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6034
6035
6036 class LUCreateInstance(LogicalUnit):
6037   """Create an instance.
6038
6039   """
6040   HPATH = "instance-add"
6041   HTYPE = constants.HTYPE_INSTANCE
6042   _OP_REQP = ["instance_name", "disks",
6043               "mode", "start",
6044               "wait_for_sync", "ip_check", "nics",
6045               "hvparams", "beparams"]
6046   REQ_BGL = False
6047
6048   def CheckArguments(self):
6049     """Check arguments.
6050
6051     """
6052     # set optional parameters to none if they don't exist
6053     for attr in ["pnode", "snode", "iallocator", "hypervisor",
6054                  "disk_template", "identify_defaults"]:
6055       if not hasattr(self.op, attr):
6056         setattr(self.op, attr, None)
6057
6058     # do not require name_check to ease forward/backward compatibility
6059     # for tools
6060     if not hasattr(self.op, "name_check"):
6061       self.op.name_check = True
6062     if not hasattr(self.op, "no_install"):
6063       self.op.no_install = False
6064     if self.op.no_install and self.op.start:
6065       self.LogInfo("No-installation mode selected, disabling startup")
6066       self.op.start = False
6067     # validate/normalize the instance name
6068     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6069     if self.op.ip_check and not self.op.name_check:
6070       # TODO: make the ip check more flexible and not depend on the name check
6071       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6072                                  errors.ECODE_INVAL)
6073     # check disk information: either all adopt, or no adopt
6074     has_adopt = has_no_adopt = False
6075     for disk in self.op.disks:
6076       if "adopt" in disk:
6077         has_adopt = True
6078       else:
6079         has_no_adopt = True
6080     if has_adopt and has_no_adopt:
6081       raise errors.OpPrereqError("Either all disks are adopted or none is",
6082                                  errors.ECODE_INVAL)
6083     if has_adopt:
6084       if self.op.disk_template != constants.DT_PLAIN:
6085         raise errors.OpPrereqError("Disk adoption is only supported for the"
6086                                    " 'plain' disk template",
6087                                    errors.ECODE_INVAL)
6088       if self.op.iallocator is not None:
6089         raise errors.OpPrereqError("Disk adoption not allowed with an"
6090                                    " iallocator script", errors.ECODE_INVAL)
6091       if self.op.mode == constants.INSTANCE_IMPORT:
6092         raise errors.OpPrereqError("Disk adoption not allowed for"
6093                                    " instance import", errors.ECODE_INVAL)
6094
6095     self.adopt_disks = has_adopt
6096
6097     # verify creation mode
6098     if self.op.mode not in (constants.INSTANCE_CREATE,
6099                             constants.INSTANCE_IMPORT):
6100       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6101                                  self.op.mode, errors.ECODE_INVAL)
6102
6103     # instance name verification
6104     if self.op.name_check:
6105       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6106       self.op.instance_name = self.hostname1.name
6107       # used in CheckPrereq for ip ping check
6108       self.check_ip = self.hostname1.ip
6109     else:
6110       self.check_ip = None
6111
6112     # file storage checks
6113     if (self.op.file_driver and
6114         not self.op.file_driver in constants.FILE_DRIVER):
6115       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6116                                  self.op.file_driver, errors.ECODE_INVAL)
6117
6118     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6119       raise errors.OpPrereqError("File storage directory path not absolute",
6120                                  errors.ECODE_INVAL)
6121
6122     ### Node/iallocator related checks
6123     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6124       raise errors.OpPrereqError("One and only one of iallocator and primary"
6125                                  " node must be given",
6126                                  errors.ECODE_INVAL)
6127
6128     if self.op.mode == constants.INSTANCE_IMPORT:
6129       # On import force_variant must be True, because if we forced it at
6130       # initial install, our only chance when importing it back is that it
6131       # works again!
6132       self.op.force_variant = True
6133
6134       if self.op.no_install:
6135         self.LogInfo("No-installation mode has no effect during import")
6136
6137     else: # INSTANCE_CREATE
6138       if getattr(self.op, "os_type", None) is None:
6139         raise errors.OpPrereqError("No guest OS specified",
6140                                    errors.ECODE_INVAL)
6141       self.op.force_variant = getattr(self.op, "force_variant", False)
6142       if self.op.disk_template is None:
6143         raise errors.OpPrereqError("No disk template specified",
6144                                    errors.ECODE_INVAL)
6145
6146   def ExpandNames(self):
6147     """ExpandNames for CreateInstance.
6148
6149     Figure out the right locks for instance creation.
6150
6151     """
6152     self.needed_locks = {}
6153
6154     instance_name = self.op.instance_name
6155     # this is just a preventive check, but someone might still add this
6156     # instance in the meantime, and creation will fail at lock-add time
6157     if instance_name in self.cfg.GetInstanceList():
6158       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6159                                  instance_name, errors.ECODE_EXISTS)
6160
6161     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6162
6163     if self.op.iallocator:
6164       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6165     else:
6166       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6167       nodelist = [self.op.pnode]
6168       if self.op.snode is not None:
6169         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6170         nodelist.append(self.op.snode)
6171       self.needed_locks[locking.LEVEL_NODE] = nodelist
6172
6173     # in case of import lock the source node too
6174     if self.op.mode == constants.INSTANCE_IMPORT:
6175       src_node = getattr(self.op, "src_node", None)
6176       src_path = getattr(self.op, "src_path", None)
6177
6178       if src_path is None:
6179         self.op.src_path = src_path = self.op.instance_name
6180
6181       if src_node is None:
6182         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6183         self.op.src_node = None
6184         if os.path.isabs(src_path):
6185           raise errors.OpPrereqError("Importing an instance from an absolute"
6186                                      " path requires a source node option.",
6187                                      errors.ECODE_INVAL)
6188       else:
6189         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6190         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6191           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6192         if not os.path.isabs(src_path):
6193           self.op.src_path = src_path = \
6194             utils.PathJoin(constants.EXPORT_DIR, src_path)
6195
6196   def _RunAllocator(self):
6197     """Run the allocator based on input opcode.
6198
6199     """
6200     nics = [n.ToDict() for n in self.nics]
6201     ial = IAllocator(self.cfg, self.rpc,
6202                      mode=constants.IALLOCATOR_MODE_ALLOC,
6203                      name=self.op.instance_name,
6204                      disk_template=self.op.disk_template,
6205                      tags=[],
6206                      os=self.op.os_type,
6207                      vcpus=self.be_full[constants.BE_VCPUS],
6208                      mem_size=self.be_full[constants.BE_MEMORY],
6209                      disks=self.disks,
6210                      nics=nics,
6211                      hypervisor=self.op.hypervisor,
6212                      )
6213
6214     ial.Run(self.op.iallocator)
6215
6216     if not ial.success:
6217       raise errors.OpPrereqError("Can't compute nodes using"
6218                                  " iallocator '%s': %s" %
6219                                  (self.op.iallocator, ial.info),
6220                                  errors.ECODE_NORES)
6221     if len(ial.result) != ial.required_nodes:
6222       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6223                                  " of nodes (%s), required %s" %
6224                                  (self.op.iallocator, len(ial.result),
6225                                   ial.required_nodes), errors.ECODE_FAULT)
6226     self.op.pnode = ial.result[0]
6227     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6228                  self.op.instance_name, self.op.iallocator,
6229                  utils.CommaJoin(ial.result))
6230     if ial.required_nodes == 2:
6231       self.op.snode = ial.result[1]
6232
6233   def BuildHooksEnv(self):
6234     """Build hooks env.
6235
6236     This runs on master, primary and secondary nodes of the instance.
6237
6238     """
6239     env = {
6240       "ADD_MODE": self.op.mode,
6241       }
6242     if self.op.mode == constants.INSTANCE_IMPORT:
6243       env["SRC_NODE"] = self.op.src_node
6244       env["SRC_PATH"] = self.op.src_path
6245       env["SRC_IMAGES"] = self.src_images
6246
6247     env.update(_BuildInstanceHookEnv(
6248       name=self.op.instance_name,
6249       primary_node=self.op.pnode,
6250       secondary_nodes=self.secondaries,
6251       status=self.op.start,
6252       os_type=self.op.os_type,
6253       memory=self.be_full[constants.BE_MEMORY],
6254       vcpus=self.be_full[constants.BE_VCPUS],
6255       nics=_NICListToTuple(self, self.nics),
6256       disk_template=self.op.disk_template,
6257       disks=[(d["size"], d["mode"]) for d in self.disks],
6258       bep=self.be_full,
6259       hvp=self.hv_full,
6260       hypervisor_name=self.op.hypervisor,
6261     ))
6262
6263     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6264           self.secondaries)
6265     return env, nl, nl
6266
6267   def _ReadExportInfo(self):
6268     """Reads the export information from disk.
6269
6270     It will override the opcode source node and path with the actual
6271     information, if these two were not specified before.
6272
6273     @return: the export information
6274
6275     """
6276     assert self.op.mode == constants.INSTANCE_IMPORT
6277
6278     src_node = self.op.src_node
6279     src_path = self.op.src_path
6280
6281     if src_node is None:
6282       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6283       exp_list = self.rpc.call_export_list(locked_nodes)
6284       found = False
6285       for node in exp_list:
6286         if exp_list[node].fail_msg:
6287           continue
6288         if src_path in exp_list[node].payload:
6289           found = True
6290           self.op.src_node = src_node = node
6291           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6292                                                        src_path)
6293           break
6294       if not found:
6295         raise errors.OpPrereqError("No export found for relative path %s" %
6296                                     src_path, errors.ECODE_INVAL)
6297
6298     _CheckNodeOnline(self, src_node)
6299     result = self.rpc.call_export_info(src_node, src_path)
6300     result.Raise("No export or invalid export found in dir %s" % src_path)
6301
6302     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6303     if not export_info.has_section(constants.INISECT_EXP):
6304       raise errors.ProgrammerError("Corrupted export config",
6305                                    errors.ECODE_ENVIRON)
6306
6307     ei_version = export_info.get(constants.INISECT_EXP, "version")
6308     if (int(ei_version) != constants.EXPORT_VERSION):
6309       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6310                                  (ei_version, constants.EXPORT_VERSION),
6311                                  errors.ECODE_ENVIRON)
6312     return export_info
6313
6314   def _ReadExportParams(self, einfo):
6315     """Use export parameters as defaults.
6316
6317     In case the opcode doesn't specify (as in override) some instance
6318     parameters, then try to use them from the export information, if
6319     that declares them.
6320
6321     """
6322     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6323
6324     if self.op.disk_template is None:
6325       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6326         self.op.disk_template = einfo.get(constants.INISECT_INS,
6327                                           "disk_template")
6328       else:
6329         raise errors.OpPrereqError("No disk template specified and the export"
6330                                    " is missing the disk_template information",
6331                                    errors.ECODE_INVAL)
6332
6333     if not self.op.disks:
6334       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6335         disks = []
6336         # TODO: import the disk iv_name too
6337         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6338           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6339           disks.append({"size": disk_sz})
6340         self.op.disks = disks
6341       else:
6342         raise errors.OpPrereqError("No disk info specified and the export"
6343                                    " is missing the disk information",
6344                                    errors.ECODE_INVAL)
6345
6346     if (not self.op.nics and
6347         einfo.has_option(constants.INISECT_INS, "nic_count")):
6348       nics = []
6349       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6350         ndict = {}
6351         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6352           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6353           ndict[name] = v
6354         nics.append(ndict)
6355       self.op.nics = nics
6356
6357     if (self.op.hypervisor is None and
6358         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6359       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6360     if einfo.has_section(constants.INISECT_HYP):
6361       # use the export parameters but do not override the ones
6362       # specified by the user
6363       for name, value in einfo.items(constants.INISECT_HYP):
6364         if name not in self.op.hvparams:
6365           self.op.hvparams[name] = value
6366
6367     if einfo.has_section(constants.INISECT_BEP):
6368       # use the parameters, without overriding
6369       for name, value in einfo.items(constants.INISECT_BEP):
6370         if name not in self.op.beparams:
6371           self.op.beparams[name] = value
6372     else:
6373       # try to read the parameters old style, from the main section
6374       for name in constants.BES_PARAMETERS:
6375         if (name not in self.op.beparams and
6376             einfo.has_option(constants.INISECT_INS, name)):
6377           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6378
6379   def _RevertToDefaults(self, cluster):
6380     """Revert the instance parameters to the default values.
6381
6382     """
6383     # hvparams
6384     hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6385     for name in self.op.hvparams.keys():
6386       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6387         del self.op.hvparams[name]
6388     # beparams
6389     be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6390     for name in self.op.beparams.keys():
6391       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6392         del self.op.beparams[name]
6393     # nic params
6394     nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6395     for nic in self.op.nics:
6396       for name in constants.NICS_PARAMETERS:
6397         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6398           del nic[name]
6399
6400   def CheckPrereq(self):
6401     """Check prerequisites.
6402
6403     """
6404     if self.op.mode == constants.INSTANCE_IMPORT:
6405       export_info = self._ReadExportInfo()
6406       self._ReadExportParams(export_info)
6407
6408     _CheckDiskTemplate(self.op.disk_template)
6409
6410     if (not self.cfg.GetVGName() and
6411         self.op.disk_template not in constants.DTS_NOT_LVM):
6412       raise errors.OpPrereqError("Cluster does not support lvm-based"
6413                                  " instances", errors.ECODE_STATE)
6414
6415     if self.op.hypervisor is None:
6416       self.op.hypervisor = self.cfg.GetHypervisorType()
6417
6418     cluster = self.cfg.GetClusterInfo()
6419     enabled_hvs = cluster.enabled_hypervisors
6420     if self.op.hypervisor not in enabled_hvs:
6421       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6422                                  " cluster (%s)" % (self.op.hypervisor,
6423                                   ",".join(enabled_hvs)),
6424                                  errors.ECODE_STATE)
6425
6426     # check hypervisor parameter syntax (locally)
6427     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6428     filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6429                                                         self.op.os_type),
6430                                   self.op.hvparams)
6431     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6432     hv_type.CheckParameterSyntax(filled_hvp)
6433     self.hv_full = filled_hvp
6434     # check that we don't specify global parameters on an instance
6435     _CheckGlobalHvParams(self.op.hvparams)
6436
6437     # fill and remember the beparams dict
6438     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6439     self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6440                                     self.op.beparams)
6441
6442     # now that hvp/bep are in final format, let's reset to defaults,
6443     # if told to do so
6444     if self.op.identify_defaults:
6445       self._RevertToDefaults(cluster)
6446
6447     # NIC buildup
6448     self.nics = []
6449     for idx, nic in enumerate(self.op.nics):
6450       nic_mode_req = nic.get("mode", None)
6451       nic_mode = nic_mode_req
6452       if nic_mode is None:
6453         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6454
6455       # in routed mode, for the first nic, the default ip is 'auto'
6456       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6457         default_ip_mode = constants.VALUE_AUTO
6458       else:
6459         default_ip_mode = constants.VALUE_NONE
6460
6461       # ip validity checks
6462       ip = nic.get("ip", default_ip_mode)
6463       if ip is None or ip.lower() == constants.VALUE_NONE:
6464         nic_ip = None
6465       elif ip.lower() == constants.VALUE_AUTO:
6466         if not self.op.name_check:
6467           raise errors.OpPrereqError("IP address set to auto but name checks"
6468                                      " have been skipped. Aborting.",
6469                                      errors.ECODE_INVAL)
6470         nic_ip = self.hostname1.ip
6471       else:
6472         if not utils.IsValidIP(ip):
6473           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6474                                      " like a valid IP" % ip,
6475                                      errors.ECODE_INVAL)
6476         nic_ip = ip
6477
6478       # TODO: check the ip address for uniqueness
6479       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6480         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6481                                    errors.ECODE_INVAL)
6482
6483       # MAC address verification
6484       mac = nic.get("mac", constants.VALUE_AUTO)
6485       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6486         mac = utils.NormalizeAndValidateMac(mac)
6487
6488         try:
6489           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6490         except errors.ReservationError:
6491           raise errors.OpPrereqError("MAC address %s already in use"
6492                                      " in cluster" % mac,
6493                                      errors.ECODE_NOTUNIQUE)
6494
6495       # bridge verification
6496       bridge = nic.get("bridge", None)
6497       link = nic.get("link", None)
6498       if bridge and link:
6499         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6500                                    " at the same time", errors.ECODE_INVAL)
6501       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6502         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6503                                    errors.ECODE_INVAL)
6504       elif bridge:
6505         link = bridge
6506
6507       nicparams = {}
6508       if nic_mode_req:
6509         nicparams[constants.NIC_MODE] = nic_mode_req
6510       if link:
6511         nicparams[constants.NIC_LINK] = link
6512
6513       check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6514                                       nicparams)
6515       objects.NIC.CheckParameterSyntax(check_params)
6516       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6517
6518     # disk checks/pre-build
6519     self.disks = []
6520     for disk in self.op.disks:
6521       mode = disk.get("mode", constants.DISK_RDWR)
6522       if mode not in constants.DISK_ACCESS_SET:
6523         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6524                                    mode, errors.ECODE_INVAL)
6525       size = disk.get("size", None)
6526       if size is None:
6527         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6528       try:
6529         size = int(size)
6530       except (TypeError, ValueError):
6531         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6532                                    errors.ECODE_INVAL)
6533       new_disk = {"size": size, "mode": mode}
6534       if "adopt" in disk:
6535         new_disk["adopt"] = disk["adopt"]
6536       self.disks.append(new_disk)
6537
6538     if self.op.mode == constants.INSTANCE_IMPORT:
6539
6540       # Check that the new instance doesn't have less disks than the export
6541       instance_disks = len(self.disks)
6542       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6543       if instance_disks < export_disks:
6544         raise errors.OpPrereqError("Not enough disks to import."
6545                                    " (instance: %d, export: %d)" %
6546                                    (instance_disks, export_disks),
6547                                    errors.ECODE_INVAL)
6548
6549       disk_images = []
6550       for idx in range(export_disks):
6551         option = 'disk%d_dump' % idx
6552         if export_info.has_option(constants.INISECT_INS, option):
6553           # FIXME: are the old os-es, disk sizes, etc. useful?
6554           export_name = export_info.get(constants.INISECT_INS, option)
6555           image = utils.PathJoin(self.op.src_path, export_name)
6556           disk_images.append(image)
6557         else:
6558           disk_images.append(False)
6559
6560       self.src_images = disk_images
6561
6562       old_name = export_info.get(constants.INISECT_INS, 'name')
6563       try:
6564         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6565       except (TypeError, ValueError), err:
6566         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6567                                    " an integer: %s" % str(err),
6568                                    errors.ECODE_STATE)
6569       if self.op.instance_name == old_name:
6570         for idx, nic in enumerate(self.nics):
6571           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6572             nic_mac_ini = 'nic%d_mac' % idx
6573             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6574
6575     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6576
6577     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6578     if self.op.ip_check:
6579       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6580         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6581                                    (self.check_ip, self.op.instance_name),
6582                                    errors.ECODE_NOTUNIQUE)
6583
6584     #### mac address generation
6585     # By generating here the mac address both the allocator and the hooks get
6586     # the real final mac address rather than the 'auto' or 'generate' value.
6587     # There is a race condition between the generation and the instance object
6588     # creation, which means that we know the mac is valid now, but we're not
6589     # sure it will be when we actually add the instance. If things go bad
6590     # adding the instance will abort because of a duplicate mac, and the
6591     # creation job will fail.
6592     for nic in self.nics:
6593       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6594         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6595
6596     #### allocator run
6597
6598     if self.op.iallocator is not None:
6599       self._RunAllocator()
6600
6601     #### node related checks
6602
6603     # check primary node
6604     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6605     assert self.pnode is not None, \
6606       "Cannot retrieve locked node %s" % self.op.pnode
6607     if pnode.offline:
6608       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6609                                  pnode.name, errors.ECODE_STATE)
6610     if pnode.drained:
6611       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6612                                  pnode.name, errors.ECODE_STATE)
6613
6614     self.secondaries = []
6615
6616     # mirror node verification
6617     if self.op.disk_template in constants.DTS_NET_MIRROR:
6618       if self.op.snode is None:
6619         raise errors.OpPrereqError("The networked disk templates need"
6620                                    " a mirror node", errors.ECODE_INVAL)
6621       if self.op.snode == pnode.name:
6622         raise errors.OpPrereqError("The secondary node cannot be the"
6623                                    " primary node.", errors.ECODE_INVAL)
6624       _CheckNodeOnline(self, self.op.snode)
6625       _CheckNodeNotDrained(self, self.op.snode)
6626       self.secondaries.append(self.op.snode)
6627
6628     nodenames = [pnode.name] + self.secondaries
6629
6630     req_size = _ComputeDiskSize(self.op.disk_template,
6631                                 self.disks)
6632
6633     # Check lv size requirements, if not adopting
6634     if req_size is not None and not self.adopt_disks:
6635       _CheckNodesFreeDisk(self, nodenames, req_size)
6636
6637     if self.adopt_disks: # instead, we must check the adoption data
6638       all_lvs = set([i["adopt"] for i in self.disks])
6639       if len(all_lvs) != len(self.disks):
6640         raise errors.OpPrereqError("Duplicate volume names given for adoption",
6641                                    errors.ECODE_INVAL)
6642       for lv_name in all_lvs:
6643         try:
6644           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6645         except errors.ReservationError:
6646           raise errors.OpPrereqError("LV named %s used by another instance" %
6647                                      lv_name, errors.ECODE_NOTUNIQUE)
6648
6649       node_lvs = self.rpc.call_lv_list([pnode.name],
6650                                        self.cfg.GetVGName())[pnode.name]
6651       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6652       node_lvs = node_lvs.payload
6653       delta = all_lvs.difference(node_lvs.keys())
6654       if delta:
6655         raise errors.OpPrereqError("Missing logical volume(s): %s" %
6656                                    utils.CommaJoin(delta),
6657                                    errors.ECODE_INVAL)
6658       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6659       if online_lvs:
6660         raise errors.OpPrereqError("Online logical volumes found, cannot"
6661                                    " adopt: %s" % utils.CommaJoin(online_lvs),
6662                                    errors.ECODE_STATE)
6663       # update the size of disk based on what is found
6664       for dsk in self.disks:
6665         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6666
6667     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6668
6669     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6670
6671     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6672
6673     # memory check on primary node
6674     if self.op.start:
6675       _CheckNodeFreeMemory(self, self.pnode.name,
6676                            "creating instance %s" % self.op.instance_name,
6677                            self.be_full[constants.BE_MEMORY],
6678                            self.op.hypervisor)
6679
6680     self.dry_run_result = list(nodenames)
6681
6682   def Exec(self, feedback_fn):
6683     """Create and add the instance to the cluster.
6684
6685     """
6686     instance = self.op.instance_name
6687     pnode_name = self.pnode.name
6688
6689     ht_kind = self.op.hypervisor
6690     if ht_kind in constants.HTS_REQ_PORT:
6691       network_port = self.cfg.AllocatePort()
6692     else:
6693       network_port = None
6694
6695     if constants.ENABLE_FILE_STORAGE:
6696       # this is needed because os.path.join does not accept None arguments
6697       if self.op.file_storage_dir is None:
6698         string_file_storage_dir = ""
6699       else:
6700         string_file_storage_dir = self.op.file_storage_dir
6701
6702       # build the full file storage dir path
6703       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6704                                         string_file_storage_dir, instance)
6705     else:
6706       file_storage_dir = ""
6707
6708
6709     disks = _GenerateDiskTemplate(self,
6710                                   self.op.disk_template,
6711                                   instance, pnode_name,
6712                                   self.secondaries,
6713                                   self.disks,
6714                                   file_storage_dir,
6715                                   self.op.file_driver,
6716                                   0)
6717
6718     iobj = objects.Instance(name=instance, os=self.op.os_type,
6719                             primary_node=pnode_name,
6720                             nics=self.nics, disks=disks,
6721                             disk_template=self.op.disk_template,
6722                             admin_up=False,
6723                             network_port=network_port,
6724                             beparams=self.op.beparams,
6725                             hvparams=self.op.hvparams,
6726                             hypervisor=self.op.hypervisor,
6727                             )
6728
6729     if self.adopt_disks:
6730       # rename LVs to the newly-generated names; we need to construct
6731       # 'fake' LV disks with the old data, plus the new unique_id
6732       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6733       rename_to = []
6734       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6735         rename_to.append(t_dsk.logical_id)
6736         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6737         self.cfg.SetDiskID(t_dsk, pnode_name)
6738       result = self.rpc.call_blockdev_rename(pnode_name,
6739                                              zip(tmp_disks, rename_to))
6740       result.Raise("Failed to rename adoped LVs")
6741     else:
6742       feedback_fn("* creating instance disks...")
6743       try:
6744         _CreateDisks(self, iobj)
6745       except errors.OpExecError:
6746         self.LogWarning("Device creation failed, reverting...")
6747         try:
6748           _RemoveDisks(self, iobj)
6749         finally:
6750           self.cfg.ReleaseDRBDMinors(instance)
6751           raise
6752
6753     feedback_fn("adding instance %s to cluster config" % instance)
6754
6755     self.cfg.AddInstance(iobj, self.proc.GetECId())
6756
6757     # Declare that we don't want to remove the instance lock anymore, as we've
6758     # added the instance to the config
6759     del self.remove_locks[locking.LEVEL_INSTANCE]
6760     # Unlock all the nodes
6761     if self.op.mode == constants.INSTANCE_IMPORT:
6762       nodes_keep = [self.op.src_node]
6763       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6764                        if node != self.op.src_node]
6765       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6766       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6767     else:
6768       self.context.glm.release(locking.LEVEL_NODE)
6769       del self.acquired_locks[locking.LEVEL_NODE]
6770
6771     if self.op.wait_for_sync:
6772       disk_abort = not _WaitForSync(self, iobj)
6773     elif iobj.disk_template in constants.DTS_NET_MIRROR:
6774       # make sure the disks are not degraded (still sync-ing is ok)
6775       time.sleep(15)
6776       feedback_fn("* checking mirrors status")
6777       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6778     else:
6779       disk_abort = False
6780
6781     if disk_abort:
6782       _RemoveDisks(self, iobj)
6783       self.cfg.RemoveInstance(iobj.name)
6784       # Make sure the instance lock gets removed
6785       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6786       raise errors.OpExecError("There are some degraded disks for"
6787                                " this instance")
6788
6789     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6790       if self.op.mode == constants.INSTANCE_CREATE:
6791         if not self.op.no_install:
6792           feedback_fn("* running the instance OS create scripts...")
6793           # FIXME: pass debug option from opcode to backend
6794           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6795                                                  self.op.debug_level)
6796           result.Raise("Could not add os for instance %s"
6797                        " on node %s" % (instance, pnode_name))
6798
6799       elif self.op.mode == constants.INSTANCE_IMPORT:
6800         feedback_fn("* running the instance OS import scripts...")
6801         src_node = self.op.src_node
6802         src_images = self.src_images
6803         cluster_name = self.cfg.GetClusterName()
6804         # FIXME: pass debug option from opcode to backend
6805         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6806                                                          src_node, src_images,
6807                                                          cluster_name,
6808                                                          self.op.debug_level)
6809         msg = import_result.fail_msg
6810         if msg:
6811           self.LogWarning("Error while importing the disk images for instance"
6812                           " %s on node %s: %s" % (instance, pnode_name, msg))
6813       else:
6814         # also checked in the prereq part
6815         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6816                                      % self.op.mode)
6817
6818     if self.op.start:
6819       iobj.admin_up = True
6820       self.cfg.Update(iobj, feedback_fn)
6821       logging.info("Starting instance %s on node %s", instance, pnode_name)
6822       feedback_fn("* starting instance...")
6823       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6824       result.Raise("Could not start instance")
6825
6826     return list(iobj.all_nodes)
6827
6828
6829 class LUConnectConsole(NoHooksLU):
6830   """Connect to an instance's console.
6831
6832   This is somewhat special in that it returns the command line that
6833   you need to run on the master node in order to connect to the
6834   console.
6835
6836   """
6837   _OP_REQP = ["instance_name"]
6838   REQ_BGL = False
6839
6840   def ExpandNames(self):
6841     self._ExpandAndLockInstance()
6842
6843   def CheckPrereq(self):
6844     """Check prerequisites.
6845
6846     This checks that the instance is in the cluster.
6847
6848     """
6849     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6850     assert self.instance is not None, \
6851       "Cannot retrieve locked instance %s" % self.op.instance_name
6852     _CheckNodeOnline(self, self.instance.primary_node)
6853
6854   def Exec(self, feedback_fn):
6855     """Connect to the console of an instance
6856
6857     """
6858     instance = self.instance
6859     node = instance.primary_node
6860
6861     node_insts = self.rpc.call_instance_list([node],
6862                                              [instance.hypervisor])[node]
6863     node_insts.Raise("Can't get node information from %s" % node)
6864
6865     if instance.name not in node_insts.payload:
6866       raise errors.OpExecError("Instance %s is not running." % instance.name)
6867
6868     logging.debug("Connecting to console of %s on %s", instance.name, node)
6869
6870     hyper = hypervisor.GetHypervisor(instance.hypervisor)
6871     cluster = self.cfg.GetClusterInfo()
6872     # beparams and hvparams are passed separately, to avoid editing the
6873     # instance and then saving the defaults in the instance itself.
6874     hvparams = cluster.FillHV(instance)
6875     beparams = cluster.FillBE(instance)
6876     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6877
6878     # build ssh cmdline
6879     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6880
6881
6882 class LUReplaceDisks(LogicalUnit):
6883   """Replace the disks of an instance.
6884
6885   """
6886   HPATH = "mirrors-replace"
6887   HTYPE = constants.HTYPE_INSTANCE
6888   _OP_REQP = ["instance_name", "mode", "disks"]
6889   REQ_BGL = False
6890
6891   def CheckArguments(self):
6892     if not hasattr(self.op, "remote_node"):
6893       self.op.remote_node = None
6894     if not hasattr(self.op, "iallocator"):
6895       self.op.iallocator = None
6896     if not hasattr(self.op, "early_release"):
6897       self.op.early_release = False
6898
6899     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6900                                   self.op.iallocator)
6901
6902   def ExpandNames(self):
6903     self._ExpandAndLockInstance()
6904
6905     if self.op.iallocator is not None:
6906       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6907
6908     elif self.op.remote_node is not None:
6909       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6910       self.op.remote_node = remote_node
6911
6912       # Warning: do not remove the locking of the new secondary here
6913       # unless DRBD8.AddChildren is changed to work in parallel;
6914       # currently it doesn't since parallel invocations of
6915       # FindUnusedMinor will conflict
6916       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6917       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6918
6919     else:
6920       self.needed_locks[locking.LEVEL_NODE] = []
6921       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6922
6923     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6924                                    self.op.iallocator, self.op.remote_node,
6925                                    self.op.disks, False, self.op.early_release)
6926
6927     self.tasklets = [self.replacer]
6928
6929   def DeclareLocks(self, level):
6930     # If we're not already locking all nodes in the set we have to declare the
6931     # instance's primary/secondary nodes.
6932     if (level == locking.LEVEL_NODE and
6933         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6934       self._LockInstancesNodes()
6935
6936   def BuildHooksEnv(self):
6937     """Build hooks env.
6938
6939     This runs on the master, the primary and all the secondaries.
6940
6941     """
6942     instance = self.replacer.instance
6943     env = {
6944       "MODE": self.op.mode,
6945       "NEW_SECONDARY": self.op.remote_node,
6946       "OLD_SECONDARY": instance.secondary_nodes[0],
6947       }
6948     env.update(_BuildInstanceHookEnvByObject(self, instance))
6949     nl = [
6950       self.cfg.GetMasterNode(),
6951       instance.primary_node,
6952       ]
6953     if self.op.remote_node is not None:
6954       nl.append(self.op.remote_node)
6955     return env, nl, nl
6956
6957
6958 class LUEvacuateNode(LogicalUnit):
6959   """Relocate the secondary instances from a node.
6960
6961   """
6962   HPATH = "node-evacuate"
6963   HTYPE = constants.HTYPE_NODE
6964   _OP_REQP = ["node_name"]
6965   REQ_BGL = False
6966
6967   def CheckArguments(self):
6968     if not hasattr(self.op, "remote_node"):
6969       self.op.remote_node = None
6970     if not hasattr(self.op, "iallocator"):
6971       self.op.iallocator = None
6972     if not hasattr(self.op, "early_release"):
6973       self.op.early_release = False
6974
6975     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6976                                   self.op.remote_node,
6977                                   self.op.iallocator)
6978
6979   def ExpandNames(self):
6980     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6981
6982     self.needed_locks = {}
6983
6984     # Declare node locks
6985     if self.op.iallocator is not None:
6986       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6987
6988     elif self.op.remote_node is not None:
6989       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6990
6991       # Warning: do not remove the locking of the new secondary here
6992       # unless DRBD8.AddChildren is changed to work in parallel;
6993       # currently it doesn't since parallel invocations of
6994       # FindUnusedMinor will conflict
6995       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6996       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6997
6998     else:
6999       raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7000
7001     # Create tasklets for replacing disks for all secondary instances on this
7002     # node
7003     names = []
7004     tasklets = []
7005
7006     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7007       logging.debug("Replacing disks for instance %s", inst.name)
7008       names.append(inst.name)
7009
7010       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7011                                 self.op.iallocator, self.op.remote_node, [],
7012                                 True, self.op.early_release)
7013       tasklets.append(replacer)
7014
7015     self.tasklets = tasklets
7016     self.instance_names = names
7017
7018     # Declare instance locks
7019     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7020
7021   def DeclareLocks(self, level):
7022     # If we're not already locking all nodes in the set we have to declare the
7023     # instance's primary/secondary nodes.
7024     if (level == locking.LEVEL_NODE and
7025         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7026       self._LockInstancesNodes()
7027
7028   def BuildHooksEnv(self):
7029     """Build hooks env.
7030
7031     This runs on the master, the primary and all the secondaries.
7032
7033     """
7034     env = {
7035       "NODE_NAME": self.op.node_name,
7036       }
7037
7038     nl = [self.cfg.GetMasterNode()]
7039
7040     if self.op.remote_node is not None:
7041       env["NEW_SECONDARY"] = self.op.remote_node
7042       nl.append(self.op.remote_node)
7043
7044     return (env, nl, nl)
7045
7046
7047 class TLReplaceDisks(Tasklet):
7048   """Replaces disks for an instance.
7049
7050   Note: Locking is not within the scope of this class.
7051
7052   """
7053   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7054                disks, delay_iallocator, early_release):
7055     """Initializes this class.
7056
7057     """
7058     Tasklet.__init__(self, lu)
7059
7060     # Parameters
7061     self.instance_name = instance_name
7062     self.mode = mode
7063     self.iallocator_name = iallocator_name
7064     self.remote_node = remote_node
7065     self.disks = disks
7066     self.delay_iallocator = delay_iallocator
7067     self.early_release = early_release
7068
7069     # Runtime data
7070     self.instance = None
7071     self.new_node = None
7072     self.target_node = None
7073     self.other_node = None
7074     self.remote_node_info = None
7075     self.node_secondary_ip = None
7076
7077   @staticmethod
7078   def CheckArguments(mode, remote_node, iallocator):
7079     """Helper function for users of this class.
7080
7081     """
7082     # check for valid parameter combination
7083     if mode == constants.REPLACE_DISK_CHG:
7084       if remote_node is None and iallocator is None:
7085         raise errors.OpPrereqError("When changing the secondary either an"
7086                                    " iallocator script must be used or the"
7087                                    " new node given", errors.ECODE_INVAL)
7088
7089       if remote_node is not None and iallocator is not None:
7090         raise errors.OpPrereqError("Give either the iallocator or the new"
7091                                    " secondary, not both", errors.ECODE_INVAL)
7092
7093     elif remote_node is not None or iallocator is not None:
7094       # Not replacing the secondary
7095       raise errors.OpPrereqError("The iallocator and new node options can"
7096                                  " only be used when changing the"
7097                                  " secondary node", errors.ECODE_INVAL)
7098
7099   @staticmethod
7100   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7101     """Compute a new secondary node using an IAllocator.
7102
7103     """
7104     ial = IAllocator(lu.cfg, lu.rpc,
7105                      mode=constants.IALLOCATOR_MODE_RELOC,
7106                      name=instance_name,
7107                      relocate_from=relocate_from)
7108
7109     ial.Run(iallocator_name)
7110
7111     if not ial.success:
7112       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7113                                  " %s" % (iallocator_name, ial.info),
7114                                  errors.ECODE_NORES)
7115
7116     if len(ial.result) != ial.required_nodes:
7117       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7118                                  " of nodes (%s), required %s" %
7119                                  (iallocator_name,
7120                                   len(ial.result), ial.required_nodes),
7121                                  errors.ECODE_FAULT)
7122
7123     remote_node_name = ial.result[0]
7124
7125     lu.LogInfo("Selected new secondary for instance '%s': %s",
7126                instance_name, remote_node_name)
7127
7128     return remote_node_name
7129
7130   def _FindFaultyDisks(self, node_name):
7131     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7132                                     node_name, True)
7133
7134   def CheckPrereq(self):
7135     """Check prerequisites.
7136
7137     This checks that the instance is in the cluster.
7138
7139     """
7140     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7141     assert instance is not None, \
7142       "Cannot retrieve locked instance %s" % self.instance_name
7143
7144     if instance.disk_template != constants.DT_DRBD8:
7145       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7146                                  " instances", errors.ECODE_INVAL)
7147
7148     if len(instance.secondary_nodes) != 1:
7149       raise errors.OpPrereqError("The instance has a strange layout,"
7150                                  " expected one secondary but found %d" %
7151                                  len(instance.secondary_nodes),
7152                                  errors.ECODE_FAULT)
7153
7154     if not self.delay_iallocator:
7155       self._CheckPrereq2()
7156
7157   def _CheckPrereq2(self):
7158     """Check prerequisites, second part.
7159
7160     This function should always be part of CheckPrereq. It was separated and is
7161     now called from Exec because during node evacuation iallocator was only
7162     called with an unmodified cluster model, not taking planned changes into
7163     account.
7164
7165     """
7166     instance = self.instance
7167     secondary_node = instance.secondary_nodes[0]
7168
7169     if self.iallocator_name is None:
7170       remote_node = self.remote_node
7171     else:
7172       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7173                                        instance.name, instance.secondary_nodes)
7174
7175     if remote_node is not None:
7176       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7177       assert self.remote_node_info is not None, \
7178         "Cannot retrieve locked node %s" % remote_node
7179     else:
7180       self.remote_node_info = None
7181
7182     if remote_node == self.instance.primary_node:
7183       raise errors.OpPrereqError("The specified node is the primary node of"
7184                                  " the instance.", errors.ECODE_INVAL)
7185
7186     if remote_node == secondary_node:
7187       raise errors.OpPrereqError("The specified node is already the"
7188                                  " secondary node of the instance.",
7189                                  errors.ECODE_INVAL)
7190
7191     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7192                                     constants.REPLACE_DISK_CHG):
7193       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7194                                  errors.ECODE_INVAL)
7195
7196     if self.mode == constants.REPLACE_DISK_AUTO:
7197       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7198       faulty_secondary = self._FindFaultyDisks(secondary_node)
7199
7200       if faulty_primary and faulty_secondary:
7201         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7202                                    " one node and can not be repaired"
7203                                    " automatically" % self.instance_name,
7204                                    errors.ECODE_STATE)
7205
7206       if faulty_primary:
7207         self.disks = faulty_primary
7208         self.target_node = instance.primary_node
7209         self.other_node = secondary_node
7210         check_nodes = [self.target_node, self.other_node]
7211       elif faulty_secondary:
7212         self.disks = faulty_secondary
7213         self.target_node = secondary_node
7214         self.other_node = instance.primary_node
7215         check_nodes = [self.target_node, self.other_node]
7216       else:
7217         self.disks = []
7218         check_nodes = []
7219
7220     else:
7221       # Non-automatic modes
7222       if self.mode == constants.REPLACE_DISK_PRI:
7223         self.target_node = instance.primary_node
7224         self.other_node = secondary_node
7225         check_nodes = [self.target_node, self.other_node]
7226
7227       elif self.mode == constants.REPLACE_DISK_SEC:
7228         self.target_node = secondary_node
7229         self.other_node = instance.primary_node
7230         check_nodes = [self.target_node, self.other_node]
7231
7232       elif self.mode == constants.REPLACE_DISK_CHG:
7233         self.new_node = remote_node
7234         self.other_node = instance.primary_node
7235         self.target_node = secondary_node
7236         check_nodes = [self.new_node, self.other_node]
7237
7238         _CheckNodeNotDrained(self.lu, remote_node)
7239
7240         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7241         assert old_node_info is not None
7242         if old_node_info.offline and not self.early_release:
7243           # doesn't make sense to delay the release
7244           self.early_release = True
7245           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7246                           " early-release mode", secondary_node)
7247
7248       else:
7249         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7250                                      self.mode)
7251
7252       # If not specified all disks should be replaced
7253       if not self.disks:
7254         self.disks = range(len(self.instance.disks))
7255
7256     for node in check_nodes:
7257       _CheckNodeOnline(self.lu, node)
7258
7259     # Check whether disks are valid
7260     for disk_idx in self.disks:
7261       instance.FindDisk(disk_idx)
7262
7263     # Get secondary node IP addresses
7264     node_2nd_ip = {}
7265
7266     for node_name in [self.target_node, self.other_node, self.new_node]:
7267       if node_name is not None:
7268         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7269
7270     self.node_secondary_ip = node_2nd_ip
7271
7272   def Exec(self, feedback_fn):
7273     """Execute disk replacement.
7274
7275     This dispatches the disk replacement to the appropriate handler.
7276
7277     """
7278     if self.delay_iallocator:
7279       self._CheckPrereq2()
7280
7281     if not self.disks:
7282       feedback_fn("No disks need replacement")
7283       return
7284
7285     feedback_fn("Replacing disk(s) %s for %s" %
7286                 (utils.CommaJoin(self.disks), self.instance.name))
7287
7288     activate_disks = (not self.instance.admin_up)
7289
7290     # Activate the instance disks if we're replacing them on a down instance
7291     if activate_disks:
7292       _StartInstanceDisks(self.lu, self.instance, True)
7293
7294     try:
7295       # Should we replace the secondary node?
7296       if self.new_node is not None:
7297         fn = self._ExecDrbd8Secondary
7298       else:
7299         fn = self._ExecDrbd8DiskOnly
7300
7301       return fn(feedback_fn)
7302
7303     finally:
7304       # Deactivate the instance disks if we're replacing them on a
7305       # down instance
7306       if activate_disks:
7307         _SafeShutdownInstanceDisks(self.lu, self.instance)
7308
7309   def _CheckVolumeGroup(self, nodes):
7310     self.lu.LogInfo("Checking volume groups")
7311
7312     vgname = self.cfg.GetVGName()
7313
7314     # Make sure volume group exists on all involved nodes
7315     results = self.rpc.call_vg_list(nodes)
7316     if not results:
7317       raise errors.OpExecError("Can't list volume groups on the nodes")
7318
7319     for node in nodes:
7320       res = results[node]
7321       res.Raise("Error checking node %s" % node)
7322       if vgname not in res.payload:
7323         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7324                                  (vgname, node))
7325
7326   def _CheckDisksExistence(self, nodes):
7327     # Check disk existence
7328     for idx, dev in enumerate(self.instance.disks):
7329       if idx not in self.disks:
7330         continue
7331
7332       for node in nodes:
7333         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7334         self.cfg.SetDiskID(dev, node)
7335
7336         result = self.rpc.call_blockdev_find(node, dev)
7337
7338         msg = result.fail_msg
7339         if msg or not result.payload:
7340           if not msg:
7341             msg = "disk not found"
7342           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7343                                    (idx, node, msg))
7344
7345   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7346     for idx, dev in enumerate(self.instance.disks):
7347       if idx not in self.disks:
7348         continue
7349
7350       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7351                       (idx, node_name))
7352
7353       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7354                                    ldisk=ldisk):
7355         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7356                                  " replace disks for instance %s" %
7357                                  (node_name, self.instance.name))
7358
7359   def _CreateNewStorage(self, node_name):
7360     vgname = self.cfg.GetVGName()
7361     iv_names = {}
7362
7363     for idx, dev in enumerate(self.instance.disks):
7364       if idx not in self.disks:
7365         continue
7366
7367       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7368
7369       self.cfg.SetDiskID(dev, node_name)
7370
7371       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7372       names = _GenerateUniqueNames(self.lu, lv_names)
7373
7374       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7375                              logical_id=(vgname, names[0]))
7376       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7377                              logical_id=(vgname, names[1]))
7378
7379       new_lvs = [lv_data, lv_meta]
7380       old_lvs = dev.children
7381       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7382
7383       # we pass force_create=True to force the LVM creation
7384       for new_lv in new_lvs:
7385         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7386                         _GetInstanceInfoText(self.instance), False)
7387
7388     return iv_names
7389
7390   def _CheckDevices(self, node_name, iv_names):
7391     for name, (dev, _, _) in iv_names.iteritems():
7392       self.cfg.SetDiskID(dev, node_name)
7393
7394       result = self.rpc.call_blockdev_find(node_name, dev)
7395
7396       msg = result.fail_msg
7397       if msg or not result.payload:
7398         if not msg:
7399           msg = "disk not found"
7400         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7401                                  (name, msg))
7402
7403       if result.payload.is_degraded:
7404         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7405
7406   def _RemoveOldStorage(self, node_name, iv_names):
7407     for name, (_, old_lvs, _) in iv_names.iteritems():
7408       self.lu.LogInfo("Remove logical volumes for %s" % name)
7409
7410       for lv in old_lvs:
7411         self.cfg.SetDiskID(lv, node_name)
7412
7413         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7414         if msg:
7415           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7416                              hint="remove unused LVs manually")
7417
7418   def _ReleaseNodeLock(self, node_name):
7419     """Releases the lock for a given node."""
7420     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7421
7422   def _ExecDrbd8DiskOnly(self, feedback_fn):
7423     """Replace a disk on the primary or secondary for DRBD 8.
7424
7425     The algorithm for replace is quite complicated:
7426
7427       1. for each disk to be replaced:
7428
7429         1. create new LVs on the target node with unique names
7430         1. detach old LVs from the drbd device
7431         1. rename old LVs to name_replaced.<time_t>
7432         1. rename new LVs to old LVs
7433         1. attach the new LVs (with the old names now) to the drbd device
7434
7435       1. wait for sync across all devices
7436
7437       1. for each modified disk:
7438
7439         1. remove old LVs (which have the name name_replaces.<time_t>)
7440
7441     Failures are not very well handled.
7442
7443     """
7444     steps_total = 6
7445
7446     # Step: check device activation
7447     self.lu.LogStep(1, steps_total, "Check device existence")
7448     self._CheckDisksExistence([self.other_node, self.target_node])
7449     self._CheckVolumeGroup([self.target_node, self.other_node])
7450
7451     # Step: check other node consistency
7452     self.lu.LogStep(2, steps_total, "Check peer consistency")
7453     self._CheckDisksConsistency(self.other_node,
7454                                 self.other_node == self.instance.primary_node,
7455                                 False)
7456
7457     # Step: create new storage
7458     self.lu.LogStep(3, steps_total, "Allocate new storage")
7459     iv_names = self._CreateNewStorage(self.target_node)
7460
7461     # Step: for each lv, detach+rename*2+attach
7462     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7463     for dev, old_lvs, new_lvs in iv_names.itervalues():
7464       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7465
7466       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7467                                                      old_lvs)
7468       result.Raise("Can't detach drbd from local storage on node"
7469                    " %s for device %s" % (self.target_node, dev.iv_name))
7470       #dev.children = []
7471       #cfg.Update(instance)
7472
7473       # ok, we created the new LVs, so now we know we have the needed
7474       # storage; as such, we proceed on the target node to rename
7475       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7476       # using the assumption that logical_id == physical_id (which in
7477       # turn is the unique_id on that node)
7478
7479       # FIXME(iustin): use a better name for the replaced LVs
7480       temp_suffix = int(time.time())
7481       ren_fn = lambda d, suff: (d.physical_id[0],
7482                                 d.physical_id[1] + "_replaced-%s" % suff)
7483
7484       # Build the rename list based on what LVs exist on the node
7485       rename_old_to_new = []
7486       for to_ren in old_lvs:
7487         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7488         if not result.fail_msg and result.payload:
7489           # device exists
7490           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7491
7492       self.lu.LogInfo("Renaming the old LVs on the target node")
7493       result = self.rpc.call_blockdev_rename(self.target_node,
7494                                              rename_old_to_new)
7495       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7496
7497       # Now we rename the new LVs to the old LVs
7498       self.lu.LogInfo("Renaming the new LVs on the target node")
7499       rename_new_to_old = [(new, old.physical_id)
7500                            for old, new in zip(old_lvs, new_lvs)]
7501       result = self.rpc.call_blockdev_rename(self.target_node,
7502                                              rename_new_to_old)
7503       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7504
7505       for old, new in zip(old_lvs, new_lvs):
7506         new.logical_id = old.logical_id
7507         self.cfg.SetDiskID(new, self.target_node)
7508
7509       for disk in old_lvs:
7510         disk.logical_id = ren_fn(disk, temp_suffix)
7511         self.cfg.SetDiskID(disk, self.target_node)
7512
7513       # Now that the new lvs have the old name, we can add them to the device
7514       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7515       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7516                                                   new_lvs)
7517       msg = result.fail_msg
7518       if msg:
7519         for new_lv in new_lvs:
7520           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7521                                                new_lv).fail_msg
7522           if msg2:
7523             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7524                                hint=("cleanup manually the unused logical"
7525                                      "volumes"))
7526         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7527
7528       dev.children = new_lvs
7529
7530       self.cfg.Update(self.instance, feedback_fn)
7531
7532     cstep = 5
7533     if self.early_release:
7534       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7535       cstep += 1
7536       self._RemoveOldStorage(self.target_node, iv_names)
7537       # WARNING: we release both node locks here, do not do other RPCs
7538       # than WaitForSync to the primary node
7539       self._ReleaseNodeLock([self.target_node, self.other_node])
7540
7541     # Wait for sync
7542     # This can fail as the old devices are degraded and _WaitForSync
7543     # does a combined result over all disks, so we don't check its return value
7544     self.lu.LogStep(cstep, steps_total, "Sync devices")
7545     cstep += 1
7546     _WaitForSync(self.lu, self.instance)
7547
7548     # Check all devices manually
7549     self._CheckDevices(self.instance.primary_node, iv_names)
7550
7551     # Step: remove old storage
7552     if not self.early_release:
7553       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7554       cstep += 1
7555       self._RemoveOldStorage(self.target_node, iv_names)
7556
7557   def _ExecDrbd8Secondary(self, feedback_fn):
7558     """Replace the secondary node for DRBD 8.
7559
7560     The algorithm for replace is quite complicated:
7561       - for all disks of the instance:
7562         - create new LVs on the new node with same names
7563         - shutdown the drbd device on the old secondary
7564         - disconnect the drbd network on the primary
7565         - create the drbd device on the new secondary
7566         - network attach the drbd on the primary, using an artifice:
7567           the drbd code for Attach() will connect to the network if it
7568           finds a device which is connected to the good local disks but
7569           not network enabled
7570       - wait for sync across all devices
7571       - remove all disks from the old secondary
7572
7573     Failures are not very well handled.
7574
7575     """
7576     steps_total = 6
7577
7578     # Step: check device activation
7579     self.lu.LogStep(1, steps_total, "Check device existence")
7580     self._CheckDisksExistence([self.instance.primary_node])
7581     self._CheckVolumeGroup([self.instance.primary_node])
7582
7583     # Step: check other node consistency
7584     self.lu.LogStep(2, steps_total, "Check peer consistency")
7585     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7586
7587     # Step: create new storage
7588     self.lu.LogStep(3, steps_total, "Allocate new storage")
7589     for idx, dev in enumerate(self.instance.disks):
7590       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7591                       (self.new_node, idx))
7592       # we pass force_create=True to force LVM creation
7593       for new_lv in dev.children:
7594         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7595                         _GetInstanceInfoText(self.instance), False)
7596
7597     # Step 4: dbrd minors and drbd setups changes
7598     # after this, we must manually remove the drbd minors on both the
7599     # error and the success paths
7600     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7601     minors = self.cfg.AllocateDRBDMinor([self.new_node
7602                                          for dev in self.instance.disks],
7603                                         self.instance.name)
7604     logging.debug("Allocated minors %r", minors)
7605
7606     iv_names = {}
7607     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7608       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7609                       (self.new_node, idx))
7610       # create new devices on new_node; note that we create two IDs:
7611       # one without port, so the drbd will be activated without
7612       # networking information on the new node at this stage, and one
7613       # with network, for the latter activation in step 4
7614       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7615       if self.instance.primary_node == o_node1:
7616         p_minor = o_minor1
7617       else:
7618         assert self.instance.primary_node == o_node2, "Three-node instance?"
7619         p_minor = o_minor2
7620
7621       new_alone_id = (self.instance.primary_node, self.new_node, None,
7622                       p_minor, new_minor, o_secret)
7623       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7624                     p_minor, new_minor, o_secret)
7625
7626       iv_names[idx] = (dev, dev.children, new_net_id)
7627       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7628                     new_net_id)
7629       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7630                               logical_id=new_alone_id,
7631                               children=dev.children,
7632                               size=dev.size)
7633       try:
7634         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7635                               _GetInstanceInfoText(self.instance), False)
7636       except errors.GenericError:
7637         self.cfg.ReleaseDRBDMinors(self.instance.name)
7638         raise
7639
7640     # We have new devices, shutdown the drbd on the old secondary
7641     for idx, dev in enumerate(self.instance.disks):
7642       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7643       self.cfg.SetDiskID(dev, self.target_node)
7644       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7645       if msg:
7646         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7647                            "node: %s" % (idx, msg),
7648                            hint=("Please cleanup this device manually as"
7649                                  " soon as possible"))
7650
7651     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7652     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7653                                                self.node_secondary_ip,
7654                                                self.instance.disks)\
7655                                               [self.instance.primary_node]
7656
7657     msg = result.fail_msg
7658     if msg:
7659       # detaches didn't succeed (unlikely)
7660       self.cfg.ReleaseDRBDMinors(self.instance.name)
7661       raise errors.OpExecError("Can't detach the disks from the network on"
7662                                " old node: %s" % (msg,))
7663
7664     # if we managed to detach at least one, we update all the disks of
7665     # the instance to point to the new secondary
7666     self.lu.LogInfo("Updating instance configuration")
7667     for dev, _, new_logical_id in iv_names.itervalues():
7668       dev.logical_id = new_logical_id
7669       self.cfg.SetDiskID(dev, self.instance.primary_node)
7670
7671     self.cfg.Update(self.instance, feedback_fn)
7672
7673     # and now perform the drbd attach
7674     self.lu.LogInfo("Attaching primary drbds to new secondary"
7675                     " (standalone => connected)")
7676     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7677                                             self.new_node],
7678                                            self.node_secondary_ip,
7679                                            self.instance.disks,
7680                                            self.instance.name,
7681                                            False)
7682     for to_node, to_result in result.items():
7683       msg = to_result.fail_msg
7684       if msg:
7685         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7686                            to_node, msg,
7687                            hint=("please do a gnt-instance info to see the"
7688                                  " status of disks"))
7689     cstep = 5
7690     if self.early_release:
7691       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7692       cstep += 1
7693       self._RemoveOldStorage(self.target_node, iv_names)
7694       # WARNING: we release all node locks here, do not do other RPCs
7695       # than WaitForSync to the primary node
7696       self._ReleaseNodeLock([self.instance.primary_node,
7697                              self.target_node,
7698                              self.new_node])
7699
7700     # Wait for sync
7701     # This can fail as the old devices are degraded and _WaitForSync
7702     # does a combined result over all disks, so we don't check its return value
7703     self.lu.LogStep(cstep, steps_total, "Sync devices")
7704     cstep += 1
7705     _WaitForSync(self.lu, self.instance)
7706
7707     # Check all devices manually
7708     self._CheckDevices(self.instance.primary_node, iv_names)
7709
7710     # Step: remove old storage
7711     if not self.early_release:
7712       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7713       self._RemoveOldStorage(self.target_node, iv_names)
7714
7715
7716 class LURepairNodeStorage(NoHooksLU):
7717   """Repairs the volume group on a node.
7718
7719   """
7720   _OP_REQP = ["node_name"]
7721   REQ_BGL = False
7722
7723   def CheckArguments(self):
7724     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7725
7726     _CheckStorageType(self.op.storage_type)
7727
7728   def ExpandNames(self):
7729     self.needed_locks = {
7730       locking.LEVEL_NODE: [self.op.node_name],
7731       }
7732
7733   def _CheckFaultyDisks(self, instance, node_name):
7734     """Ensure faulty disks abort the opcode or at least warn."""
7735     try:
7736       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7737                                   node_name, True):
7738         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7739                                    " node '%s'" % (instance.name, node_name),
7740                                    errors.ECODE_STATE)
7741     except errors.OpPrereqError, err:
7742       if self.op.ignore_consistency:
7743         self.proc.LogWarning(str(err.args[0]))
7744       else:
7745         raise
7746
7747   def CheckPrereq(self):
7748     """Check prerequisites.
7749
7750     """
7751     storage_type = self.op.storage_type
7752
7753     if (constants.SO_FIX_CONSISTENCY not in
7754         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7755       raise errors.OpPrereqError("Storage units of type '%s' can not be"
7756                                  " repaired" % storage_type,
7757                                  errors.ECODE_INVAL)
7758
7759     # Check whether any instance on this node has faulty disks
7760     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7761       if not inst.admin_up:
7762         continue
7763       check_nodes = set(inst.all_nodes)
7764       check_nodes.discard(self.op.node_name)
7765       for inst_node_name in check_nodes:
7766         self._CheckFaultyDisks(inst, inst_node_name)
7767
7768   def Exec(self, feedback_fn):
7769     feedback_fn("Repairing storage unit '%s' on %s ..." %
7770                 (self.op.name, self.op.node_name))
7771
7772     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7773     result = self.rpc.call_storage_execute(self.op.node_name,
7774                                            self.op.storage_type, st_args,
7775                                            self.op.name,
7776                                            constants.SO_FIX_CONSISTENCY)
7777     result.Raise("Failed to repair storage unit '%s' on %s" %
7778                  (self.op.name, self.op.node_name))
7779
7780
7781 class LUNodeEvacuationStrategy(NoHooksLU):
7782   """Computes the node evacuation strategy.
7783
7784   """
7785   _OP_REQP = ["nodes"]
7786   REQ_BGL = False
7787
7788   def CheckArguments(self):
7789     if not hasattr(self.op, "remote_node"):
7790       self.op.remote_node = None
7791     if not hasattr(self.op, "iallocator"):
7792       self.op.iallocator = None
7793     if self.op.remote_node is not None and self.op.iallocator is not None:
7794       raise errors.OpPrereqError("Give either the iallocator or the new"
7795                                  " secondary, not both", errors.ECODE_INVAL)
7796
7797   def ExpandNames(self):
7798     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7799     self.needed_locks = locks = {}
7800     if self.op.remote_node is None:
7801       locks[locking.LEVEL_NODE] = locking.ALL_SET
7802     else:
7803       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7804       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7805
7806   def CheckPrereq(self):
7807     pass
7808
7809   def Exec(self, feedback_fn):
7810     if self.op.remote_node is not None:
7811       instances = []
7812       for node in self.op.nodes:
7813         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7814       result = []
7815       for i in instances:
7816         if i.primary_node == self.op.remote_node:
7817           raise errors.OpPrereqError("Node %s is the primary node of"
7818                                      " instance %s, cannot use it as"
7819                                      " secondary" %
7820                                      (self.op.remote_node, i.name),
7821                                      errors.ECODE_INVAL)
7822         result.append([i.name, self.op.remote_node])
7823     else:
7824       ial = IAllocator(self.cfg, self.rpc,
7825                        mode=constants.IALLOCATOR_MODE_MEVAC,
7826                        evac_nodes=self.op.nodes)
7827       ial.Run(self.op.iallocator, validate=True)
7828       if not ial.success:
7829         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7830                                  errors.ECODE_NORES)
7831       result = ial.result
7832     return result
7833
7834
7835 class LUGrowDisk(LogicalUnit):
7836   """Grow a disk of an instance.
7837
7838   """
7839   HPATH = "disk-grow"
7840   HTYPE = constants.HTYPE_INSTANCE
7841   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7842   REQ_BGL = False
7843
7844   def ExpandNames(self):
7845     self._ExpandAndLockInstance()
7846     self.needed_locks[locking.LEVEL_NODE] = []
7847     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7848
7849   def DeclareLocks(self, level):
7850     if level == locking.LEVEL_NODE:
7851       self._LockInstancesNodes()
7852
7853   def BuildHooksEnv(self):
7854     """Build hooks env.
7855
7856     This runs on the master, the primary and all the secondaries.
7857
7858     """
7859     env = {
7860       "DISK": self.op.disk,
7861       "AMOUNT": self.op.amount,
7862       }
7863     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7864     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7865     return env, nl, nl
7866
7867   def CheckPrereq(self):
7868     """Check prerequisites.
7869
7870     This checks that the instance is in the cluster.
7871
7872     """
7873     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7874     assert instance is not None, \
7875       "Cannot retrieve locked instance %s" % self.op.instance_name
7876     nodenames = list(instance.all_nodes)
7877     for node in nodenames:
7878       _CheckNodeOnline(self, node)
7879
7880
7881     self.instance = instance
7882
7883     if instance.disk_template not in constants.DTS_GROWABLE:
7884       raise errors.OpPrereqError("Instance's disk layout does not support"
7885                                  " growing.", errors.ECODE_INVAL)
7886
7887     self.disk = instance.FindDisk(self.op.disk)
7888
7889     if instance.disk_template != constants.DT_FILE:
7890       # TODO: check the free disk space for file, when that feature will be
7891       # supported
7892       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7893
7894   def Exec(self, feedback_fn):
7895     """Execute disk grow.
7896
7897     """
7898     instance = self.instance
7899     disk = self.disk
7900     for node in instance.all_nodes:
7901       self.cfg.SetDiskID(disk, node)
7902       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7903       result.Raise("Grow request failed to node %s" % node)
7904
7905       # TODO: Rewrite code to work properly
7906       # DRBD goes into sync mode for a short amount of time after executing the
7907       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7908       # calling "resize" in sync mode fails. Sleeping for a short amount of
7909       # time is a work-around.
7910       time.sleep(5)
7911
7912     disk.RecordGrow(self.op.amount)
7913     self.cfg.Update(instance, feedback_fn)
7914     if self.op.wait_for_sync:
7915       disk_abort = not _WaitForSync(self, instance)
7916       if disk_abort:
7917         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7918                              " status.\nPlease check the instance.")
7919
7920
7921 class LUQueryInstanceData(NoHooksLU):
7922   """Query runtime instance data.
7923
7924   """
7925   _OP_REQP = ["instances", "static"]
7926   REQ_BGL = False
7927
7928   def ExpandNames(self):
7929     self.needed_locks = {}
7930     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7931
7932     if not isinstance(self.op.instances, list):
7933       raise errors.OpPrereqError("Invalid argument type 'instances'",
7934                                  errors.ECODE_INVAL)
7935
7936     if self.op.instances:
7937       self.wanted_names = []
7938       for name in self.op.instances:
7939         full_name = _ExpandInstanceName(self.cfg, name)
7940         self.wanted_names.append(full_name)
7941       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7942     else:
7943       self.wanted_names = None
7944       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7945
7946     self.needed_locks[locking.LEVEL_NODE] = []
7947     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7948
7949   def DeclareLocks(self, level):
7950     if level == locking.LEVEL_NODE:
7951       self._LockInstancesNodes()
7952
7953   def CheckPrereq(self):
7954     """Check prerequisites.
7955
7956     This only checks the optional instance list against the existing names.
7957
7958     """
7959     if self.wanted_names is None:
7960       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7961
7962     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7963                              in self.wanted_names]
7964     return
7965
7966   def _ComputeBlockdevStatus(self, node, instance_name, dev):
7967     """Returns the status of a block device
7968
7969     """
7970     if self.op.static or not node:
7971       return None
7972
7973     self.cfg.SetDiskID(dev, node)
7974
7975     result = self.rpc.call_blockdev_find(node, dev)
7976     if result.offline:
7977       return None
7978
7979     result.Raise("Can't compute disk status for %s" % instance_name)
7980
7981     status = result.payload
7982     if status is None:
7983       return None
7984
7985     return (status.dev_path, status.major, status.minor,
7986             status.sync_percent, status.estimated_time,
7987             status.is_degraded, status.ldisk_status)
7988
7989   def _ComputeDiskStatus(self, instance, snode, dev):
7990     """Compute block device status.
7991
7992     """
7993     if dev.dev_type in constants.LDS_DRBD:
7994       # we change the snode then (otherwise we use the one passed in)
7995       if dev.logical_id[0] == instance.primary_node:
7996         snode = dev.logical_id[1]
7997       else:
7998         snode = dev.logical_id[0]
7999
8000     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8001                                               instance.name, dev)
8002     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8003
8004     if dev.children:
8005       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8006                       for child in dev.children]
8007     else:
8008       dev_children = []
8009
8010     data = {
8011       "iv_name": dev.iv_name,
8012       "dev_type": dev.dev_type,
8013       "logical_id": dev.logical_id,
8014       "physical_id": dev.physical_id,
8015       "pstatus": dev_pstatus,
8016       "sstatus": dev_sstatus,
8017       "children": dev_children,
8018       "mode": dev.mode,
8019       "size": dev.size,
8020       }
8021
8022     return data
8023
8024   def Exec(self, feedback_fn):
8025     """Gather and return data"""
8026     result = {}
8027
8028     cluster = self.cfg.GetClusterInfo()
8029
8030     for instance in self.wanted_instances:
8031       if not self.op.static:
8032         remote_info = self.rpc.call_instance_info(instance.primary_node,
8033                                                   instance.name,
8034                                                   instance.hypervisor)
8035         remote_info.Raise("Error checking node %s" % instance.primary_node)
8036         remote_info = remote_info.payload
8037         if remote_info and "state" in remote_info:
8038           remote_state = "up"
8039         else:
8040           remote_state = "down"
8041       else:
8042         remote_state = None
8043       if instance.admin_up:
8044         config_state = "up"
8045       else:
8046         config_state = "down"
8047
8048       disks = [self._ComputeDiskStatus(instance, None, device)
8049                for device in instance.disks]
8050
8051       idict = {
8052         "name": instance.name,
8053         "config_state": config_state,
8054         "run_state": remote_state,
8055         "pnode": instance.primary_node,
8056         "snodes": instance.secondary_nodes,
8057         "os": instance.os,
8058         # this happens to be the same format used for hooks
8059         "nics": _NICListToTuple(self, instance.nics),
8060         "disks": disks,
8061         "hypervisor": instance.hypervisor,
8062         "network_port": instance.network_port,
8063         "hv_instance": instance.hvparams,
8064         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8065         "be_instance": instance.beparams,
8066         "be_actual": cluster.FillBE(instance),
8067         "serial_no": instance.serial_no,
8068         "mtime": instance.mtime,
8069         "ctime": instance.ctime,
8070         "uuid": instance.uuid,
8071         }
8072
8073       result[instance.name] = idict
8074
8075     return result
8076
8077
8078 class LUSetInstanceParams(LogicalUnit):
8079   """Modifies an instances's parameters.
8080
8081   """
8082   HPATH = "instance-modify"
8083   HTYPE = constants.HTYPE_INSTANCE
8084   _OP_REQP = ["instance_name"]
8085   REQ_BGL = False
8086
8087   def CheckArguments(self):
8088     if not hasattr(self.op, 'nics'):
8089       self.op.nics = []
8090     if not hasattr(self.op, 'disks'):
8091       self.op.disks = []
8092     if not hasattr(self.op, 'beparams'):
8093       self.op.beparams = {}
8094     if not hasattr(self.op, 'hvparams'):
8095       self.op.hvparams = {}
8096     if not hasattr(self.op, "disk_template"):
8097       self.op.disk_template = None
8098     if not hasattr(self.op, "remote_node"):
8099       self.op.remote_node = None
8100     if not hasattr(self.op, "os_name"):
8101       self.op.os_name = None
8102     if not hasattr(self.op, "force_variant"):
8103       self.op.force_variant = False
8104     self.op.force = getattr(self.op, "force", False)
8105     if not (self.op.nics or self.op.disks or self.op.disk_template or
8106             self.op.hvparams or self.op.beparams or self.op.os_name):
8107       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8108
8109     if self.op.hvparams:
8110       _CheckGlobalHvParams(self.op.hvparams)
8111
8112     # Disk validation
8113     disk_addremove = 0
8114     for disk_op, disk_dict in self.op.disks:
8115       if disk_op == constants.DDM_REMOVE:
8116         disk_addremove += 1
8117         continue
8118       elif disk_op == constants.DDM_ADD:
8119         disk_addremove += 1
8120       else:
8121         if not isinstance(disk_op, int):
8122           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8123         if not isinstance(disk_dict, dict):
8124           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8125           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8126
8127       if disk_op == constants.DDM_ADD:
8128         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8129         if mode not in constants.DISK_ACCESS_SET:
8130           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8131                                      errors.ECODE_INVAL)
8132         size = disk_dict.get('size', None)
8133         if size is None:
8134           raise errors.OpPrereqError("Required disk parameter size missing",
8135                                      errors.ECODE_INVAL)
8136         try:
8137           size = int(size)
8138         except (TypeError, ValueError), err:
8139           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8140                                      str(err), errors.ECODE_INVAL)
8141         disk_dict['size'] = size
8142       else:
8143         # modification of disk
8144         if 'size' in disk_dict:
8145           raise errors.OpPrereqError("Disk size change not possible, use"
8146                                      " grow-disk", errors.ECODE_INVAL)
8147
8148     if disk_addremove > 1:
8149       raise errors.OpPrereqError("Only one disk add or remove operation"
8150                                  " supported at a time", errors.ECODE_INVAL)
8151
8152     if self.op.disks and self.op.disk_template is not None:
8153       raise errors.OpPrereqError("Disk template conversion and other disk"
8154                                  " changes not supported at the same time",
8155                                  errors.ECODE_INVAL)
8156
8157     if self.op.disk_template:
8158       _CheckDiskTemplate(self.op.disk_template)
8159       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8160           self.op.remote_node is None):
8161         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8162                                    " one requires specifying a secondary node",
8163                                    errors.ECODE_INVAL)
8164
8165     # NIC validation
8166     nic_addremove = 0
8167     for nic_op, nic_dict in self.op.nics:
8168       if nic_op == constants.DDM_REMOVE:
8169         nic_addremove += 1
8170         continue
8171       elif nic_op == constants.DDM_ADD:
8172         nic_addremove += 1
8173       else:
8174         if not isinstance(nic_op, int):
8175           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8176         if not isinstance(nic_dict, dict):
8177           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8178           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8179
8180       # nic_dict should be a dict
8181       nic_ip = nic_dict.get('ip', None)
8182       if nic_ip is not None:
8183         if nic_ip.lower() == constants.VALUE_NONE:
8184           nic_dict['ip'] = None
8185         else:
8186           if not utils.IsValidIP(nic_ip):
8187             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8188                                        errors.ECODE_INVAL)
8189
8190       nic_bridge = nic_dict.get('bridge', None)
8191       nic_link = nic_dict.get('link', None)
8192       if nic_bridge and nic_link:
8193         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8194                                    " at the same time", errors.ECODE_INVAL)
8195       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8196         nic_dict['bridge'] = None
8197       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8198         nic_dict['link'] = None
8199
8200       if nic_op == constants.DDM_ADD:
8201         nic_mac = nic_dict.get('mac', None)
8202         if nic_mac is None:
8203           nic_dict['mac'] = constants.VALUE_AUTO
8204
8205       if 'mac' in nic_dict:
8206         nic_mac = nic_dict['mac']
8207         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8208           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8209
8210         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8211           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8212                                      " modifying an existing nic",
8213                                      errors.ECODE_INVAL)
8214
8215     if nic_addremove > 1:
8216       raise errors.OpPrereqError("Only one NIC add or remove operation"
8217                                  " supported at a time", errors.ECODE_INVAL)
8218
8219   def ExpandNames(self):
8220     self._ExpandAndLockInstance()
8221     self.needed_locks[locking.LEVEL_NODE] = []
8222     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8223
8224   def DeclareLocks(self, level):
8225     if level == locking.LEVEL_NODE:
8226       self._LockInstancesNodes()
8227       if self.op.disk_template and self.op.remote_node:
8228         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8229         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8230
8231   def BuildHooksEnv(self):
8232     """Build hooks env.
8233
8234     This runs on the master, primary and secondaries.
8235
8236     """
8237     args = dict()
8238     if constants.BE_MEMORY in self.be_new:
8239       args['memory'] = self.be_new[constants.BE_MEMORY]
8240     if constants.BE_VCPUS in self.be_new:
8241       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8242     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8243     # information at all.
8244     if self.op.nics:
8245       args['nics'] = []
8246       nic_override = dict(self.op.nics)
8247       c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8248       for idx, nic in enumerate(self.instance.nics):
8249         if idx in nic_override:
8250           this_nic_override = nic_override[idx]
8251         else:
8252           this_nic_override = {}
8253         if 'ip' in this_nic_override:
8254           ip = this_nic_override['ip']
8255         else:
8256           ip = nic.ip
8257         if 'mac' in this_nic_override:
8258           mac = this_nic_override['mac']
8259         else:
8260           mac = nic.mac
8261         if idx in self.nic_pnew:
8262           nicparams = self.nic_pnew[idx]
8263         else:
8264           nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8265         mode = nicparams[constants.NIC_MODE]
8266         link = nicparams[constants.NIC_LINK]
8267         args['nics'].append((ip, mac, mode, link))
8268       if constants.DDM_ADD in nic_override:
8269         ip = nic_override[constants.DDM_ADD].get('ip', None)
8270         mac = nic_override[constants.DDM_ADD]['mac']
8271         nicparams = self.nic_pnew[constants.DDM_ADD]
8272         mode = nicparams[constants.NIC_MODE]
8273         link = nicparams[constants.NIC_LINK]
8274         args['nics'].append((ip, mac, mode, link))
8275       elif constants.DDM_REMOVE in nic_override:
8276         del args['nics'][-1]
8277
8278     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8279     if self.op.disk_template:
8280       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8281     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8282     return env, nl, nl
8283
8284   @staticmethod
8285   def _GetUpdatedParams(old_params, update_dict,
8286                         default_values, parameter_types):
8287     """Return the new params dict for the given params.
8288
8289     @type old_params: dict
8290     @param old_params: old parameters
8291     @type update_dict: dict
8292     @param update_dict: dict containing new parameter values,
8293                         or constants.VALUE_DEFAULT to reset the
8294                         parameter to its default value
8295     @type default_values: dict
8296     @param default_values: default values for the filled parameters
8297     @type parameter_types: dict
8298     @param parameter_types: dict mapping target dict keys to types
8299                             in constants.ENFORCEABLE_TYPES
8300     @rtype: (dict, dict)
8301     @return: (new_parameters, filled_parameters)
8302
8303     """
8304     params_copy = copy.deepcopy(old_params)
8305     for key, val in update_dict.iteritems():
8306       if val == constants.VALUE_DEFAULT:
8307         try:
8308           del params_copy[key]
8309         except KeyError:
8310           pass
8311       else:
8312         params_copy[key] = val
8313     utils.ForceDictType(params_copy, parameter_types)
8314     params_filled = objects.FillDict(default_values, params_copy)
8315     return (params_copy, params_filled)
8316
8317   def CheckPrereq(self):
8318     """Check prerequisites.
8319
8320     This only checks the instance list against the existing names.
8321
8322     """
8323     self.force = self.op.force
8324
8325     # checking the new params on the primary/secondary nodes
8326
8327     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8328     cluster = self.cluster = self.cfg.GetClusterInfo()
8329     assert self.instance is not None, \
8330       "Cannot retrieve locked instance %s" % self.op.instance_name
8331     pnode = instance.primary_node
8332     nodelist = list(instance.all_nodes)
8333
8334     if self.op.disk_template:
8335       if instance.disk_template == self.op.disk_template:
8336         raise errors.OpPrereqError("Instance already has disk template %s" %
8337                                    instance.disk_template, errors.ECODE_INVAL)
8338
8339       if (instance.disk_template,
8340           self.op.disk_template) not in self._DISK_CONVERSIONS:
8341         raise errors.OpPrereqError("Unsupported disk template conversion from"
8342                                    " %s to %s" % (instance.disk_template,
8343                                                   self.op.disk_template),
8344                                    errors.ECODE_INVAL)
8345       if self.op.disk_template in constants.DTS_NET_MIRROR:
8346         _CheckNodeOnline(self, self.op.remote_node)
8347         _CheckNodeNotDrained(self, self.op.remote_node)
8348         disks = [{"size": d.size} for d in instance.disks]
8349         required = _ComputeDiskSize(self.op.disk_template, disks)
8350         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8351         _CheckInstanceDown(self, instance, "cannot change disk template")
8352
8353     # hvparams processing
8354     if self.op.hvparams:
8355       i_hvdict, hv_new = self._GetUpdatedParams(
8356                              instance.hvparams, self.op.hvparams,
8357                              cluster.hvparams[instance.hypervisor],
8358                              constants.HVS_PARAMETER_TYPES)
8359       # local check
8360       hypervisor.GetHypervisor(
8361         instance.hypervisor).CheckParameterSyntax(hv_new)
8362       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8363       self.hv_new = hv_new # the new actual values
8364       self.hv_inst = i_hvdict # the new dict (without defaults)
8365     else:
8366       self.hv_new = self.hv_inst = {}
8367
8368     # beparams processing
8369     if self.op.beparams:
8370       i_bedict, be_new = self._GetUpdatedParams(
8371                              instance.beparams, self.op.beparams,
8372                              cluster.beparams[constants.PP_DEFAULT],
8373                              constants.BES_PARAMETER_TYPES)
8374       self.be_new = be_new # the new actual values
8375       self.be_inst = i_bedict # the new dict (without defaults)
8376     else:
8377       self.be_new = self.be_inst = {}
8378
8379     self.warn = []
8380
8381     if constants.BE_MEMORY in self.op.beparams and not self.force:
8382       mem_check_list = [pnode]
8383       if be_new[constants.BE_AUTO_BALANCE]:
8384         # either we changed auto_balance to yes or it was from before
8385         mem_check_list.extend(instance.secondary_nodes)
8386       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8387                                                   instance.hypervisor)
8388       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8389                                          instance.hypervisor)
8390       pninfo = nodeinfo[pnode]
8391       msg = pninfo.fail_msg
8392       if msg:
8393         # Assume the primary node is unreachable and go ahead
8394         self.warn.append("Can't get info from primary node %s: %s" %
8395                          (pnode,  msg))
8396       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8397         self.warn.append("Node data from primary node %s doesn't contain"
8398                          " free memory information" % pnode)
8399       elif instance_info.fail_msg:
8400         self.warn.append("Can't get instance runtime information: %s" %
8401                         instance_info.fail_msg)
8402       else:
8403         if instance_info.payload:
8404           current_mem = int(instance_info.payload['memory'])
8405         else:
8406           # Assume instance not running
8407           # (there is a slight race condition here, but it's not very probable,
8408           # and we have no other way to check)
8409           current_mem = 0
8410         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8411                     pninfo.payload['memory_free'])
8412         if miss_mem > 0:
8413           raise errors.OpPrereqError("This change will prevent the instance"
8414                                      " from starting, due to %d MB of memory"
8415                                      " missing on its primary node" % miss_mem,
8416                                      errors.ECODE_NORES)
8417
8418       if be_new[constants.BE_AUTO_BALANCE]:
8419         for node, nres in nodeinfo.items():
8420           if node not in instance.secondary_nodes:
8421             continue
8422           msg = nres.fail_msg
8423           if msg:
8424             self.warn.append("Can't get info from secondary node %s: %s" %
8425                              (node, msg))
8426           elif not isinstance(nres.payload.get('memory_free', None), int):
8427             self.warn.append("Secondary node %s didn't return free"
8428                              " memory information" % node)
8429           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8430             self.warn.append("Not enough memory to failover instance to"
8431                              " secondary node %s" % node)
8432
8433     # NIC processing
8434     self.nic_pnew = {}
8435     self.nic_pinst = {}
8436     for nic_op, nic_dict in self.op.nics:
8437       if nic_op == constants.DDM_REMOVE:
8438         if not instance.nics:
8439           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8440                                      errors.ECODE_INVAL)
8441         continue
8442       if nic_op != constants.DDM_ADD:
8443         # an existing nic
8444         if not instance.nics:
8445           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8446                                      " no NICs" % nic_op,
8447                                      errors.ECODE_INVAL)
8448         if nic_op < 0 or nic_op >= len(instance.nics):
8449           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8450                                      " are 0 to %d" %
8451                                      (nic_op, len(instance.nics) - 1),
8452                                      errors.ECODE_INVAL)
8453         old_nic_params = instance.nics[nic_op].nicparams
8454         old_nic_ip = instance.nics[nic_op].ip
8455       else:
8456         old_nic_params = {}
8457         old_nic_ip = None
8458
8459       update_params_dict = dict([(key, nic_dict[key])
8460                                  for key in constants.NICS_PARAMETERS
8461                                  if key in nic_dict])
8462
8463       if 'bridge' in nic_dict:
8464         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8465
8466       new_nic_params, new_filled_nic_params = \
8467           self._GetUpdatedParams(old_nic_params, update_params_dict,
8468                                  cluster.nicparams[constants.PP_DEFAULT],
8469                                  constants.NICS_PARAMETER_TYPES)
8470       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8471       self.nic_pinst[nic_op] = new_nic_params
8472       self.nic_pnew[nic_op] = new_filled_nic_params
8473       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8474
8475       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8476         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8477         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8478         if msg:
8479           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8480           if self.force:
8481             self.warn.append(msg)
8482           else:
8483             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8484       if new_nic_mode == constants.NIC_MODE_ROUTED:
8485         if 'ip' in nic_dict:
8486           nic_ip = nic_dict['ip']
8487         else:
8488           nic_ip = old_nic_ip
8489         if nic_ip is None:
8490           raise errors.OpPrereqError('Cannot set the nic ip to None'
8491                                      ' on a routed nic', errors.ECODE_INVAL)
8492       if 'mac' in nic_dict:
8493         nic_mac = nic_dict['mac']
8494         if nic_mac is None:
8495           raise errors.OpPrereqError('Cannot set the nic mac to None',
8496                                      errors.ECODE_INVAL)
8497         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8498           # otherwise generate the mac
8499           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8500         else:
8501           # or validate/reserve the current one
8502           try:
8503             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8504           except errors.ReservationError:
8505             raise errors.OpPrereqError("MAC address %s already in use"
8506                                        " in cluster" % nic_mac,
8507                                        errors.ECODE_NOTUNIQUE)
8508
8509     # DISK processing
8510     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8511       raise errors.OpPrereqError("Disk operations not supported for"
8512                                  " diskless instances",
8513                                  errors.ECODE_INVAL)
8514     for disk_op, _ in self.op.disks:
8515       if disk_op == constants.DDM_REMOVE:
8516         if len(instance.disks) == 1:
8517           raise errors.OpPrereqError("Cannot remove the last disk of"
8518                                      " an instance", errors.ECODE_INVAL)
8519         _CheckInstanceDown(self, instance, "cannot remove disks")
8520
8521       if (disk_op == constants.DDM_ADD and
8522           len(instance.nics) >= constants.MAX_DISKS):
8523         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8524                                    " add more" % constants.MAX_DISKS,
8525                                    errors.ECODE_STATE)
8526       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8527         # an existing disk
8528         if disk_op < 0 or disk_op >= len(instance.disks):
8529           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8530                                      " are 0 to %d" %
8531                                      (disk_op, len(instance.disks)),
8532                                      errors.ECODE_INVAL)
8533
8534     # OS change
8535     if self.op.os_name and not self.op.force:
8536       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8537                       self.op.force_variant)
8538
8539     return
8540
8541   def _ConvertPlainToDrbd(self, feedback_fn):
8542     """Converts an instance from plain to drbd.
8543
8544     """
8545     feedback_fn("Converting template to drbd")
8546     instance = self.instance
8547     pnode = instance.primary_node
8548     snode = self.op.remote_node
8549
8550     # create a fake disk info for _GenerateDiskTemplate
8551     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8552     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8553                                       instance.name, pnode, [snode],
8554                                       disk_info, None, None, 0)
8555     info = _GetInstanceInfoText(instance)
8556     feedback_fn("Creating aditional volumes...")
8557     # first, create the missing data and meta devices
8558     for disk in new_disks:
8559       # unfortunately this is... not too nice
8560       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8561                             info, True)
8562       for child in disk.children:
8563         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8564     # at this stage, all new LVs have been created, we can rename the
8565     # old ones
8566     feedback_fn("Renaming original volumes...")
8567     rename_list = [(o, n.children[0].logical_id)
8568                    for (o, n) in zip(instance.disks, new_disks)]
8569     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8570     result.Raise("Failed to rename original LVs")
8571
8572     feedback_fn("Initializing DRBD devices...")
8573     # all child devices are in place, we can now create the DRBD devices
8574     for disk in new_disks:
8575       for node in [pnode, snode]:
8576         f_create = node == pnode
8577         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8578
8579     # at this point, the instance has been modified
8580     instance.disk_template = constants.DT_DRBD8
8581     instance.disks = new_disks
8582     self.cfg.Update(instance, feedback_fn)
8583
8584     # disks are created, waiting for sync
8585     disk_abort = not _WaitForSync(self, instance)
8586     if disk_abort:
8587       raise errors.OpExecError("There are some degraded disks for"
8588                                " this instance, please cleanup manually")
8589
8590   def _ConvertDrbdToPlain(self, feedback_fn):
8591     """Converts an instance from drbd to plain.
8592
8593     """
8594     instance = self.instance
8595     assert len(instance.secondary_nodes) == 1
8596     pnode = instance.primary_node
8597     snode = instance.secondary_nodes[0]
8598     feedback_fn("Converting template to plain")
8599
8600     old_disks = instance.disks
8601     new_disks = [d.children[0] for d in old_disks]
8602
8603     # copy over size and mode
8604     for parent, child in zip(old_disks, new_disks):
8605       child.size = parent.size
8606       child.mode = parent.mode
8607
8608     # update instance structure
8609     instance.disks = new_disks
8610     instance.disk_template = constants.DT_PLAIN
8611     self.cfg.Update(instance, feedback_fn)
8612
8613     feedback_fn("Removing volumes on the secondary node...")
8614     for disk in old_disks:
8615       self.cfg.SetDiskID(disk, snode)
8616       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8617       if msg:
8618         self.LogWarning("Could not remove block device %s on node %s,"
8619                         " continuing anyway: %s", disk.iv_name, snode, msg)
8620
8621     feedback_fn("Removing unneeded volumes on the primary node...")
8622     for idx, disk in enumerate(old_disks):
8623       meta = disk.children[1]
8624       self.cfg.SetDiskID(meta, pnode)
8625       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8626       if msg:
8627         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8628                         " continuing anyway: %s", idx, pnode, msg)
8629
8630
8631   def Exec(self, feedback_fn):
8632     """Modifies an instance.
8633
8634     All parameters take effect only at the next restart of the instance.
8635
8636     """
8637     # Process here the warnings from CheckPrereq, as we don't have a
8638     # feedback_fn there.
8639     for warn in self.warn:
8640       feedback_fn("WARNING: %s" % warn)
8641
8642     result = []
8643     instance = self.instance
8644     # disk changes
8645     for disk_op, disk_dict in self.op.disks:
8646       if disk_op == constants.DDM_REMOVE:
8647         # remove the last disk
8648         device = instance.disks.pop()
8649         device_idx = len(instance.disks)
8650         for node, disk in device.ComputeNodeTree(instance.primary_node):
8651           self.cfg.SetDiskID(disk, node)
8652           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8653           if msg:
8654             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8655                             " continuing anyway", device_idx, node, msg)
8656         result.append(("disk/%d" % device_idx, "remove"))
8657       elif disk_op == constants.DDM_ADD:
8658         # add a new disk
8659         if instance.disk_template == constants.DT_FILE:
8660           file_driver, file_path = instance.disks[0].logical_id
8661           file_path = os.path.dirname(file_path)
8662         else:
8663           file_driver = file_path = None
8664         disk_idx_base = len(instance.disks)
8665         new_disk = _GenerateDiskTemplate(self,
8666                                          instance.disk_template,
8667                                          instance.name, instance.primary_node,
8668                                          instance.secondary_nodes,
8669                                          [disk_dict],
8670                                          file_path,
8671                                          file_driver,
8672                                          disk_idx_base)[0]
8673         instance.disks.append(new_disk)
8674         info = _GetInstanceInfoText(instance)
8675
8676         logging.info("Creating volume %s for instance %s",
8677                      new_disk.iv_name, instance.name)
8678         # Note: this needs to be kept in sync with _CreateDisks
8679         #HARDCODE
8680         for node in instance.all_nodes:
8681           f_create = node == instance.primary_node
8682           try:
8683             _CreateBlockDev(self, node, instance, new_disk,
8684                             f_create, info, f_create)
8685           except errors.OpExecError, err:
8686             self.LogWarning("Failed to create volume %s (%s) on"
8687                             " node %s: %s",
8688                             new_disk.iv_name, new_disk, node, err)
8689         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8690                        (new_disk.size, new_disk.mode)))
8691       else:
8692         # change a given disk
8693         instance.disks[disk_op].mode = disk_dict['mode']
8694         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8695
8696     if self.op.disk_template:
8697       r_shut = _ShutdownInstanceDisks(self, instance)
8698       if not r_shut:
8699         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8700                                  " proceed with disk template conversion")
8701       mode = (instance.disk_template, self.op.disk_template)
8702       try:
8703         self._DISK_CONVERSIONS[mode](self, feedback_fn)
8704       except:
8705         self.cfg.ReleaseDRBDMinors(instance.name)
8706         raise
8707       result.append(("disk_template", self.op.disk_template))
8708
8709     # NIC changes
8710     for nic_op, nic_dict in self.op.nics:
8711       if nic_op == constants.DDM_REMOVE:
8712         # remove the last nic
8713         del instance.nics[-1]
8714         result.append(("nic.%d" % len(instance.nics), "remove"))
8715       elif nic_op == constants.DDM_ADD:
8716         # mac and bridge should be set, by now
8717         mac = nic_dict['mac']
8718         ip = nic_dict.get('ip', None)
8719         nicparams = self.nic_pinst[constants.DDM_ADD]
8720         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8721         instance.nics.append(new_nic)
8722         result.append(("nic.%d" % (len(instance.nics) - 1),
8723                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
8724                        (new_nic.mac, new_nic.ip,
8725                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8726                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8727                        )))
8728       else:
8729         for key in 'mac', 'ip':
8730           if key in nic_dict:
8731             setattr(instance.nics[nic_op], key, nic_dict[key])
8732         if nic_op in self.nic_pinst:
8733           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8734         for key, val in nic_dict.iteritems():
8735           result.append(("nic.%s/%d" % (key, nic_op), val))
8736
8737     # hvparams changes
8738     if self.op.hvparams:
8739       instance.hvparams = self.hv_inst
8740       for key, val in self.op.hvparams.iteritems():
8741         result.append(("hv/%s" % key, val))
8742
8743     # beparams changes
8744     if self.op.beparams:
8745       instance.beparams = self.be_inst
8746       for key, val in self.op.beparams.iteritems():
8747         result.append(("be/%s" % key, val))
8748
8749     # OS change
8750     if self.op.os_name:
8751       instance.os = self.op.os_name
8752
8753     self.cfg.Update(instance, feedback_fn)
8754
8755     return result
8756
8757   _DISK_CONVERSIONS = {
8758     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8759     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8760     }
8761
8762 class LUQueryExports(NoHooksLU):
8763   """Query the exports list
8764
8765   """
8766   _OP_REQP = ['nodes']
8767   REQ_BGL = False
8768
8769   def ExpandNames(self):
8770     self.needed_locks = {}
8771     self.share_locks[locking.LEVEL_NODE] = 1
8772     if not self.op.nodes:
8773       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8774     else:
8775       self.needed_locks[locking.LEVEL_NODE] = \
8776         _GetWantedNodes(self, self.op.nodes)
8777
8778   def CheckPrereq(self):
8779     """Check prerequisites.
8780
8781     """
8782     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8783
8784   def Exec(self, feedback_fn):
8785     """Compute the list of all the exported system images.
8786
8787     @rtype: dict
8788     @return: a dictionary with the structure node->(export-list)
8789         where export-list is a list of the instances exported on
8790         that node.
8791
8792     """
8793     rpcresult = self.rpc.call_export_list(self.nodes)
8794     result = {}
8795     for node in rpcresult:
8796       if rpcresult[node].fail_msg:
8797         result[node] = False
8798       else:
8799         result[node] = rpcresult[node].payload
8800
8801     return result
8802
8803
8804 class LUExportInstance(LogicalUnit):
8805   """Export an instance to an image in the cluster.
8806
8807   """
8808   HPATH = "instance-export"
8809   HTYPE = constants.HTYPE_INSTANCE
8810   _OP_REQP = ["instance_name", "target_node", "shutdown"]
8811   REQ_BGL = False
8812
8813   def CheckArguments(self):
8814     """Check the arguments.
8815
8816     """
8817     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8818                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
8819
8820   def ExpandNames(self):
8821     self._ExpandAndLockInstance()
8822     # FIXME: lock only instance primary and destination node
8823     #
8824     # Sad but true, for now we have do lock all nodes, as we don't know where
8825     # the previous export might be, and and in this LU we search for it and
8826     # remove it from its current node. In the future we could fix this by:
8827     #  - making a tasklet to search (share-lock all), then create the new one,
8828     #    then one to remove, after
8829     #  - removing the removal operation altogether
8830     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8831
8832   def DeclareLocks(self, level):
8833     """Last minute lock declaration."""
8834     # All nodes are locked anyway, so nothing to do here.
8835
8836   def BuildHooksEnv(self):
8837     """Build hooks env.
8838
8839     This will run on the master, primary node and target node.
8840
8841     """
8842     env = {
8843       "EXPORT_NODE": self.op.target_node,
8844       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8845       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8846       }
8847     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8848     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8849           self.op.target_node]
8850     return env, nl, nl
8851
8852   def CheckPrereq(self):
8853     """Check prerequisites.
8854
8855     This checks that the instance and node names are valid.
8856
8857     """
8858     instance_name = self.op.instance_name
8859     self.instance = self.cfg.GetInstanceInfo(instance_name)
8860     assert self.instance is not None, \
8861           "Cannot retrieve locked instance %s" % self.op.instance_name
8862     _CheckNodeOnline(self, self.instance.primary_node)
8863
8864     self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8865     self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8866     assert self.dst_node is not None
8867
8868     _CheckNodeOnline(self, self.dst_node.name)
8869     _CheckNodeNotDrained(self, self.dst_node.name)
8870
8871     # instance disk type verification
8872     for disk in self.instance.disks:
8873       if disk.dev_type == constants.LD_FILE:
8874         raise errors.OpPrereqError("Export not supported for instances with"
8875                                    " file-based disks", errors.ECODE_INVAL)
8876
8877   def _CreateSnapshots(self, feedback_fn):
8878     """Creates an LVM snapshot for every disk of the instance.
8879
8880     @return: List of snapshots as L{objects.Disk} instances
8881
8882     """
8883     instance = self.instance
8884     src_node = instance.primary_node
8885
8886     vgname = self.cfg.GetVGName()
8887
8888     snap_disks = []
8889
8890     for idx, disk in enumerate(instance.disks):
8891       feedback_fn("Creating a snapshot of disk/%s on node %s" %
8892                   (idx, src_node))
8893
8894       # result.payload will be a snapshot of an lvm leaf of the one we
8895       # passed
8896       result = self.rpc.call_blockdev_snapshot(src_node, disk)
8897       msg = result.fail_msg
8898       if msg:
8899         self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8900                         idx, src_node, msg)
8901         snap_disks.append(False)
8902       else:
8903         disk_id = (vgname, result.payload)
8904         new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8905                                logical_id=disk_id, physical_id=disk_id,
8906                                iv_name=disk.iv_name)
8907         snap_disks.append(new_dev)
8908
8909     return snap_disks
8910
8911   def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8912     """Removes an LVM snapshot.
8913
8914     @type snap_disks: list
8915     @param snap_disks: The list of all snapshots as returned by
8916                        L{_CreateSnapshots}
8917     @type disk_index: number
8918     @param disk_index: Index of the snapshot to be removed
8919     @rtype: bool
8920     @return: Whether removal was successful or not
8921
8922     """
8923     disk = snap_disks[disk_index]
8924     if disk:
8925       src_node = self.instance.primary_node
8926
8927       feedback_fn("Removing snapshot of disk/%s on node %s" %
8928                   (disk_index, src_node))
8929
8930       result = self.rpc.call_blockdev_remove(src_node, disk)
8931       if not result.fail_msg:
8932         return True
8933
8934       self.LogWarning("Could not remove snapshot for disk/%d from node"
8935                       " %s: %s", disk_index, src_node, result.fail_msg)
8936
8937     return False
8938
8939   def _CleanupExports(self, feedback_fn):
8940     """Removes exports of current instance from all other nodes.
8941
8942     If an instance in a cluster with nodes A..D was exported to node C, its
8943     exports will be removed from the nodes A, B and D.
8944
8945     """
8946     nodelist = self.cfg.GetNodeList()
8947     nodelist.remove(self.dst_node.name)
8948
8949     # on one-node clusters nodelist will be empty after the removal
8950     # if we proceed the backup would be removed because OpQueryExports
8951     # substitutes an empty list with the full cluster node list.
8952     iname = self.instance.name
8953     if nodelist:
8954       feedback_fn("Removing old exports for instance %s" % iname)
8955       exportlist = self.rpc.call_export_list(nodelist)
8956       for node in exportlist:
8957         if exportlist[node].fail_msg:
8958           continue
8959         if iname in exportlist[node].payload:
8960           msg = self.rpc.call_export_remove(node, iname).fail_msg
8961           if msg:
8962             self.LogWarning("Could not remove older export for instance %s"
8963                             " on node %s: %s", iname, node, msg)
8964
8965   def Exec(self, feedback_fn):
8966     """Export an instance to an image in the cluster.
8967
8968     """
8969     instance = self.instance
8970     dst_node = self.dst_node
8971     src_node = instance.primary_node
8972
8973     if self.op.shutdown:
8974       # shutdown the instance, but not the disks
8975       feedback_fn("Shutting down instance %s" % instance.name)
8976       result = self.rpc.call_instance_shutdown(src_node, instance,
8977                                                self.shutdown_timeout)
8978       result.Raise("Could not shutdown instance %s on"
8979                    " node %s" % (instance.name, src_node))
8980
8981     # set the disks ID correctly since call_instance_start needs the
8982     # correct drbd minor to create the symlinks
8983     for disk in instance.disks:
8984       self.cfg.SetDiskID(disk, src_node)
8985
8986     activate_disks = (not instance.admin_up)
8987
8988     if activate_disks:
8989       # Activate the instance disks if we'exporting a stopped instance
8990       feedback_fn("Activating disks for %s" % instance.name)
8991       _StartInstanceDisks(self, instance, None)
8992
8993     try:
8994       # per-disk results
8995       dresults = []
8996       removed_snaps = [False] * len(instance.disks)
8997
8998       snap_disks = None
8999       try:
9000         try:
9001           snap_disks = self._CreateSnapshots(feedback_fn)
9002         finally:
9003           if self.op.shutdown and instance.admin_up:
9004             feedback_fn("Starting instance %s" % instance.name)
9005             result = self.rpc.call_instance_start(src_node, instance,
9006                                                   None, None)
9007             msg = result.fail_msg
9008             if msg:
9009               _ShutdownInstanceDisks(self, instance)
9010               raise errors.OpExecError("Could not start instance: %s" % msg)
9011
9012         assert len(snap_disks) == len(instance.disks)
9013         assert len(removed_snaps) == len(instance.disks)
9014
9015         # TODO: check for size
9016
9017         cluster_name = self.cfg.GetClusterName()
9018         for idx, dev in enumerate(snap_disks):
9019           feedback_fn("Exporting snapshot %s from %s to %s" %
9020                       (idx, src_node, dst_node.name))
9021           if dev:
9022             # FIXME: pass debug from opcode to backend
9023             result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
9024                                                    instance, cluster_name,
9025                                                    idx, self.op.debug_level)
9026             msg = result.fail_msg
9027             if msg:
9028               self.LogWarning("Could not export disk/%s from node %s to"
9029                               " node %s: %s", idx, src_node, dst_node.name, msg)
9030               dresults.append(False)
9031             else:
9032               dresults.append(True)
9033
9034             # Remove snapshot
9035             if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9036               removed_snaps[idx] = True
9037           else:
9038             dresults.append(False)
9039
9040         assert len(dresults) == len(instance.disks)
9041
9042         # Check for backwards compatibility
9043         assert compat.all(isinstance(i, bool) for i in dresults), \
9044                "Not all results are boolean: %r" % dresults
9045
9046         feedback_fn("Finalizing export on %s" % dst_node.name)
9047         result = self.rpc.call_finalize_export(dst_node.name, instance,
9048                                                snap_disks)
9049         msg = result.fail_msg
9050         fin_resu = not msg
9051         if msg:
9052           self.LogWarning("Could not finalize export for instance %s"
9053                           " on node %s: %s", instance.name, dst_node.name, msg)
9054
9055       finally:
9056         # Remove all snapshots
9057         assert len(removed_snaps) == len(instance.disks)
9058         for idx, removed in enumerate(removed_snaps):
9059           if not removed:
9060             self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9061
9062     finally:
9063       if activate_disks:
9064         feedback_fn("Deactivating disks for %s" % instance.name)
9065         _ShutdownInstanceDisks(self, instance)
9066
9067     self._CleanupExports(feedback_fn)
9068
9069     return fin_resu, dresults
9070
9071
9072 class LURemoveExport(NoHooksLU):
9073   """Remove exports related to the named instance.
9074
9075   """
9076   _OP_REQP = ["instance_name"]
9077   REQ_BGL = False
9078
9079   def ExpandNames(self):
9080     self.needed_locks = {}
9081     # We need all nodes to be locked in order for RemoveExport to work, but we
9082     # don't need to lock the instance itself, as nothing will happen to it (and
9083     # we can remove exports also for a removed instance)
9084     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9085
9086   def CheckPrereq(self):
9087     """Check prerequisites.
9088     """
9089     pass
9090
9091   def Exec(self, feedback_fn):
9092     """Remove any export.
9093
9094     """
9095     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9096     # If the instance was not found we'll try with the name that was passed in.
9097     # This will only work if it was an FQDN, though.
9098     fqdn_warn = False
9099     if not instance_name:
9100       fqdn_warn = True
9101       instance_name = self.op.instance_name
9102
9103     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9104     exportlist = self.rpc.call_export_list(locked_nodes)
9105     found = False
9106     for node in exportlist:
9107       msg = exportlist[node].fail_msg
9108       if msg:
9109         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9110         continue
9111       if instance_name in exportlist[node].payload:
9112         found = True
9113         result = self.rpc.call_export_remove(node, instance_name)
9114         msg = result.fail_msg
9115         if msg:
9116           logging.error("Could not remove export for instance %s"
9117                         " on node %s: %s", instance_name, node, msg)
9118
9119     if fqdn_warn and not found:
9120       feedback_fn("Export not found. If trying to remove an export belonging"
9121                   " to a deleted instance please use its Fully Qualified"
9122                   " Domain Name.")
9123
9124
9125 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9126   """Generic tags LU.
9127
9128   This is an abstract class which is the parent of all the other tags LUs.
9129
9130   """
9131
9132   def ExpandNames(self):
9133     self.needed_locks = {}
9134     if self.op.kind == constants.TAG_NODE:
9135       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9136       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9137     elif self.op.kind == constants.TAG_INSTANCE:
9138       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9139       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9140
9141   def CheckPrereq(self):
9142     """Check prerequisites.
9143
9144     """
9145     if self.op.kind == constants.TAG_CLUSTER:
9146       self.target = self.cfg.GetClusterInfo()
9147     elif self.op.kind == constants.TAG_NODE:
9148       self.target = self.cfg.GetNodeInfo(self.op.name)
9149     elif self.op.kind == constants.TAG_INSTANCE:
9150       self.target = self.cfg.GetInstanceInfo(self.op.name)
9151     else:
9152       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9153                                  str(self.op.kind), errors.ECODE_INVAL)
9154
9155
9156 class LUGetTags(TagsLU):
9157   """Returns the tags of a given object.
9158
9159   """
9160   _OP_REQP = ["kind", "name"]
9161   REQ_BGL = False
9162
9163   def Exec(self, feedback_fn):
9164     """Returns the tag list.
9165
9166     """
9167     return list(self.target.GetTags())
9168
9169
9170 class LUSearchTags(NoHooksLU):
9171   """Searches the tags for a given pattern.
9172
9173   """
9174   _OP_REQP = ["pattern"]
9175   REQ_BGL = False
9176
9177   def ExpandNames(self):
9178     self.needed_locks = {}
9179
9180   def CheckPrereq(self):
9181     """Check prerequisites.
9182
9183     This checks the pattern passed for validity by compiling it.
9184
9185     """
9186     try:
9187       self.re = re.compile(self.op.pattern)
9188     except re.error, err:
9189       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9190                                  (self.op.pattern, err), errors.ECODE_INVAL)
9191
9192   def Exec(self, feedback_fn):
9193     """Returns the tag list.
9194
9195     """
9196     cfg = self.cfg
9197     tgts = [("/cluster", cfg.GetClusterInfo())]
9198     ilist = cfg.GetAllInstancesInfo().values()
9199     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9200     nlist = cfg.GetAllNodesInfo().values()
9201     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9202     results = []
9203     for path, target in tgts:
9204       for tag in target.GetTags():
9205         if self.re.search(tag):
9206           results.append((path, tag))
9207     return results
9208
9209
9210 class LUAddTags(TagsLU):
9211   """Sets a tag on a given object.
9212
9213   """
9214   _OP_REQP = ["kind", "name", "tags"]
9215   REQ_BGL = False
9216
9217   def CheckPrereq(self):
9218     """Check prerequisites.
9219
9220     This checks the type and length of the tag name and value.
9221
9222     """
9223     TagsLU.CheckPrereq(self)
9224     for tag in self.op.tags:
9225       objects.TaggableObject.ValidateTag(tag)
9226
9227   def Exec(self, feedback_fn):
9228     """Sets the tag.
9229
9230     """
9231     try:
9232       for tag in self.op.tags:
9233         self.target.AddTag(tag)
9234     except errors.TagError, err:
9235       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9236     self.cfg.Update(self.target, feedback_fn)
9237
9238
9239 class LUDelTags(TagsLU):
9240   """Delete a list of tags from a given object.
9241
9242   """
9243   _OP_REQP = ["kind", "name", "tags"]
9244   REQ_BGL = False
9245
9246   def CheckPrereq(self):
9247     """Check prerequisites.
9248
9249     This checks that we have the given tag.
9250
9251     """
9252     TagsLU.CheckPrereq(self)
9253     for tag in self.op.tags:
9254       objects.TaggableObject.ValidateTag(tag)
9255     del_tags = frozenset(self.op.tags)
9256     cur_tags = self.target.GetTags()
9257     if not del_tags <= cur_tags:
9258       diff_tags = del_tags - cur_tags
9259       diff_names = ["'%s'" % tag for tag in diff_tags]
9260       diff_names.sort()
9261       raise errors.OpPrereqError("Tag(s) %s not found" %
9262                                  (",".join(diff_names)), errors.ECODE_NOENT)
9263
9264   def Exec(self, feedback_fn):
9265     """Remove the tag from the object.
9266
9267     """
9268     for tag in self.op.tags:
9269       self.target.RemoveTag(tag)
9270     self.cfg.Update(self.target, feedback_fn)
9271
9272
9273 class LUTestDelay(NoHooksLU):
9274   """Sleep for a specified amount of time.
9275
9276   This LU sleeps on the master and/or nodes for a specified amount of
9277   time.
9278
9279   """
9280   _OP_REQP = ["duration", "on_master", "on_nodes"]
9281   REQ_BGL = False
9282
9283   def ExpandNames(self):
9284     """Expand names and set required locks.
9285
9286     This expands the node list, if any.
9287
9288     """
9289     self.needed_locks = {}
9290     if self.op.on_nodes:
9291       # _GetWantedNodes can be used here, but is not always appropriate to use
9292       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9293       # more information.
9294       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9295       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9296
9297   def CheckPrereq(self):
9298     """Check prerequisites.
9299
9300     """
9301
9302   def Exec(self, feedback_fn):
9303     """Do the actual sleep.
9304
9305     """
9306     if self.op.on_master:
9307       if not utils.TestDelay(self.op.duration):
9308         raise errors.OpExecError("Error during master delay test")
9309     if self.op.on_nodes:
9310       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9311       for node, node_result in result.items():
9312         node_result.Raise("Failure during rpc call to node %s" % node)
9313
9314
9315 class IAllocator(object):
9316   """IAllocator framework.
9317
9318   An IAllocator instance has three sets of attributes:
9319     - cfg that is needed to query the cluster
9320     - input data (all members of the _KEYS class attribute are required)
9321     - four buffer attributes (in|out_data|text), that represent the
9322       input (to the external script) in text and data structure format,
9323       and the output from it, again in two formats
9324     - the result variables from the script (success, info, nodes) for
9325       easy usage
9326
9327   """
9328   # pylint: disable-msg=R0902
9329   # lots of instance attributes
9330   _ALLO_KEYS = [
9331     "name", "mem_size", "disks", "disk_template",
9332     "os", "tags", "nics", "vcpus", "hypervisor",
9333     ]
9334   _RELO_KEYS = [
9335     "name", "relocate_from",
9336     ]
9337   _EVAC_KEYS = [
9338     "evac_nodes",
9339     ]
9340
9341   def __init__(self, cfg, rpc, mode, **kwargs):
9342     self.cfg = cfg
9343     self.rpc = rpc
9344     # init buffer variables
9345     self.in_text = self.out_text = self.in_data = self.out_data = None
9346     # init all input fields so that pylint is happy
9347     self.mode = mode
9348     self.mem_size = self.disks = self.disk_template = None
9349     self.os = self.tags = self.nics = self.vcpus = None
9350     self.hypervisor = None
9351     self.relocate_from = None
9352     self.name = None
9353     self.evac_nodes = None
9354     # computed fields
9355     self.required_nodes = None
9356     # init result fields
9357     self.success = self.info = self.result = None
9358     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9359       keyset = self._ALLO_KEYS
9360       fn = self._AddNewInstance
9361     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9362       keyset = self._RELO_KEYS
9363       fn = self._AddRelocateInstance
9364     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9365       keyset = self._EVAC_KEYS
9366       fn = self._AddEvacuateNodes
9367     else:
9368       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9369                                    " IAllocator" % self.mode)
9370     for key in kwargs:
9371       if key not in keyset:
9372         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9373                                      " IAllocator" % key)
9374       setattr(self, key, kwargs[key])
9375
9376     for key in keyset:
9377       if key not in kwargs:
9378         raise errors.ProgrammerError("Missing input parameter '%s' to"
9379                                      " IAllocator" % key)
9380     self._BuildInputData(fn)
9381
9382   def _ComputeClusterData(self):
9383     """Compute the generic allocator input data.
9384
9385     This is the data that is independent of the actual operation.
9386
9387     """
9388     cfg = self.cfg
9389     cluster_info = cfg.GetClusterInfo()
9390     # cluster data
9391     data = {
9392       "version": constants.IALLOCATOR_VERSION,
9393       "cluster_name": cfg.GetClusterName(),
9394       "cluster_tags": list(cluster_info.GetTags()),
9395       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9396       # we don't have job IDs
9397       }
9398     iinfo = cfg.GetAllInstancesInfo().values()
9399     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9400
9401     # node data
9402     node_results = {}
9403     node_list = cfg.GetNodeList()
9404
9405     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9406       hypervisor_name = self.hypervisor
9407     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9408       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9409     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9410       hypervisor_name = cluster_info.enabled_hypervisors[0]
9411
9412     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9413                                         hypervisor_name)
9414     node_iinfo = \
9415       self.rpc.call_all_instances_info(node_list,
9416                                        cluster_info.enabled_hypervisors)
9417     for nname, nresult in node_data.items():
9418       # first fill in static (config-based) values
9419       ninfo = cfg.GetNodeInfo(nname)
9420       pnr = {
9421         "tags": list(ninfo.GetTags()),
9422         "primary_ip": ninfo.primary_ip,
9423         "secondary_ip": ninfo.secondary_ip,
9424         "offline": ninfo.offline,
9425         "drained": ninfo.drained,
9426         "master_candidate": ninfo.master_candidate,
9427         }
9428
9429       if not (ninfo.offline or ninfo.drained):
9430         nresult.Raise("Can't get data for node %s" % nname)
9431         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9432                                 nname)
9433         remote_info = nresult.payload
9434
9435         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9436                      'vg_size', 'vg_free', 'cpu_total']:
9437           if attr not in remote_info:
9438             raise errors.OpExecError("Node '%s' didn't return attribute"
9439                                      " '%s'" % (nname, attr))
9440           if not isinstance(remote_info[attr], int):
9441             raise errors.OpExecError("Node '%s' returned invalid value"
9442                                      " for '%s': %s" %
9443                                      (nname, attr, remote_info[attr]))
9444         # compute memory used by primary instances
9445         i_p_mem = i_p_up_mem = 0
9446         for iinfo, beinfo in i_list:
9447           if iinfo.primary_node == nname:
9448             i_p_mem += beinfo[constants.BE_MEMORY]
9449             if iinfo.name not in node_iinfo[nname].payload:
9450               i_used_mem = 0
9451             else:
9452               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9453             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9454             remote_info['memory_free'] -= max(0, i_mem_diff)
9455
9456             if iinfo.admin_up:
9457               i_p_up_mem += beinfo[constants.BE_MEMORY]
9458
9459         # compute memory used by instances
9460         pnr_dyn = {
9461           "total_memory": remote_info['memory_total'],
9462           "reserved_memory": remote_info['memory_dom0'],
9463           "free_memory": remote_info['memory_free'],
9464           "total_disk": remote_info['vg_size'],
9465           "free_disk": remote_info['vg_free'],
9466           "total_cpus": remote_info['cpu_total'],
9467           "i_pri_memory": i_p_mem,
9468           "i_pri_up_memory": i_p_up_mem,
9469           }
9470         pnr.update(pnr_dyn)
9471
9472       node_results[nname] = pnr
9473     data["nodes"] = node_results
9474
9475     # instance data
9476     instance_data = {}
9477     for iinfo, beinfo in i_list:
9478       nic_data = []
9479       for nic in iinfo.nics:
9480         filled_params = objects.FillDict(
9481             cluster_info.nicparams[constants.PP_DEFAULT],
9482             nic.nicparams)
9483         nic_dict = {"mac": nic.mac,
9484                     "ip": nic.ip,
9485                     "mode": filled_params[constants.NIC_MODE],
9486                     "link": filled_params[constants.NIC_LINK],
9487                    }
9488         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9489           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9490         nic_data.append(nic_dict)
9491       pir = {
9492         "tags": list(iinfo.GetTags()),
9493         "admin_up": iinfo.admin_up,
9494         "vcpus": beinfo[constants.BE_VCPUS],
9495         "memory": beinfo[constants.BE_MEMORY],
9496         "os": iinfo.os,
9497         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9498         "nics": nic_data,
9499         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9500         "disk_template": iinfo.disk_template,
9501         "hypervisor": iinfo.hypervisor,
9502         }
9503       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9504                                                  pir["disks"])
9505       instance_data[iinfo.name] = pir
9506
9507     data["instances"] = instance_data
9508
9509     self.in_data = data
9510
9511   def _AddNewInstance(self):
9512     """Add new instance data to allocator structure.
9513
9514     This in combination with _AllocatorGetClusterData will create the
9515     correct structure needed as input for the allocator.
9516
9517     The checks for the completeness of the opcode must have already been
9518     done.
9519
9520     """
9521     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9522
9523     if self.disk_template in constants.DTS_NET_MIRROR:
9524       self.required_nodes = 2
9525     else:
9526       self.required_nodes = 1
9527     request = {
9528       "name": self.name,
9529       "disk_template": self.disk_template,
9530       "tags": self.tags,
9531       "os": self.os,
9532       "vcpus": self.vcpus,
9533       "memory": self.mem_size,
9534       "disks": self.disks,
9535       "disk_space_total": disk_space,
9536       "nics": self.nics,
9537       "required_nodes": self.required_nodes,
9538       }
9539     return request
9540
9541   def _AddRelocateInstance(self):
9542     """Add relocate instance data to allocator structure.
9543
9544     This in combination with _IAllocatorGetClusterData will create the
9545     correct structure needed as input for the allocator.
9546
9547     The checks for the completeness of the opcode must have already been
9548     done.
9549
9550     """
9551     instance = self.cfg.GetInstanceInfo(self.name)
9552     if instance is None:
9553       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9554                                    " IAllocator" % self.name)
9555
9556     if instance.disk_template not in constants.DTS_NET_MIRROR:
9557       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9558                                  errors.ECODE_INVAL)
9559
9560     if len(instance.secondary_nodes) != 1:
9561       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9562                                  errors.ECODE_STATE)
9563
9564     self.required_nodes = 1
9565     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9566     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9567
9568     request = {
9569       "name": self.name,
9570       "disk_space_total": disk_space,
9571       "required_nodes": self.required_nodes,
9572       "relocate_from": self.relocate_from,
9573       }
9574     return request
9575
9576   def _AddEvacuateNodes(self):
9577     """Add evacuate nodes data to allocator structure.
9578
9579     """
9580     request = {
9581       "evac_nodes": self.evac_nodes
9582       }
9583     return request
9584
9585   def _BuildInputData(self, fn):
9586     """Build input data structures.
9587
9588     """
9589     self._ComputeClusterData()
9590
9591     request = fn()
9592     request["type"] = self.mode
9593     self.in_data["request"] = request
9594
9595     self.in_text = serializer.Dump(self.in_data)
9596
9597   def Run(self, name, validate=True, call_fn=None):
9598     """Run an instance allocator and return the results.
9599
9600     """
9601     if call_fn is None:
9602       call_fn = self.rpc.call_iallocator_runner
9603
9604     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9605     result.Raise("Failure while running the iallocator script")
9606
9607     self.out_text = result.payload
9608     if validate:
9609       self._ValidateResult()
9610
9611   def _ValidateResult(self):
9612     """Process the allocator results.
9613
9614     This will process and if successful save the result in
9615     self.out_data and the other parameters.
9616
9617     """
9618     try:
9619       rdict = serializer.Load(self.out_text)
9620     except Exception, err:
9621       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9622
9623     if not isinstance(rdict, dict):
9624       raise errors.OpExecError("Can't parse iallocator results: not a dict")
9625
9626     # TODO: remove backwards compatiblity in later versions
9627     if "nodes" in rdict and "result" not in rdict:
9628       rdict["result"] = rdict["nodes"]
9629       del rdict["nodes"]
9630
9631     for key in "success", "info", "result":
9632       if key not in rdict:
9633         raise errors.OpExecError("Can't parse iallocator results:"
9634                                  " missing key '%s'" % key)
9635       setattr(self, key, rdict[key])
9636
9637     if not isinstance(rdict["result"], list):
9638       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9639                                " is not a list")
9640     self.out_data = rdict
9641
9642
9643 class LUTestAllocator(NoHooksLU):
9644   """Run allocator tests.
9645
9646   This LU runs the allocator tests
9647
9648   """
9649   _OP_REQP = ["direction", "mode", "name"]
9650
9651   def CheckPrereq(self):
9652     """Check prerequisites.
9653
9654     This checks the opcode parameters depending on the director and mode test.
9655
9656     """
9657     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9658       for attr in ["name", "mem_size", "disks", "disk_template",
9659                    "os", "tags", "nics", "vcpus"]:
9660         if not hasattr(self.op, attr):
9661           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9662                                      attr, errors.ECODE_INVAL)
9663       iname = self.cfg.ExpandInstanceName(self.op.name)
9664       if iname is not None:
9665         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9666                                    iname, errors.ECODE_EXISTS)
9667       if not isinstance(self.op.nics, list):
9668         raise errors.OpPrereqError("Invalid parameter 'nics'",
9669                                    errors.ECODE_INVAL)
9670       for row in self.op.nics:
9671         if (not isinstance(row, dict) or
9672             "mac" not in row or
9673             "ip" not in row or
9674             "bridge" not in row):
9675           raise errors.OpPrereqError("Invalid contents of the 'nics'"
9676                                      " parameter", errors.ECODE_INVAL)
9677       if not isinstance(self.op.disks, list):
9678         raise errors.OpPrereqError("Invalid parameter 'disks'",
9679                                    errors.ECODE_INVAL)
9680       for row in self.op.disks:
9681         if (not isinstance(row, dict) or
9682             "size" not in row or
9683             not isinstance(row["size"], int) or
9684             "mode" not in row or
9685             row["mode"] not in ['r', 'w']):
9686           raise errors.OpPrereqError("Invalid contents of the 'disks'"
9687                                      " parameter", errors.ECODE_INVAL)
9688       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9689         self.op.hypervisor = self.cfg.GetHypervisorType()
9690     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9691       if not hasattr(self.op, "name"):
9692         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9693                                    errors.ECODE_INVAL)
9694       fname = _ExpandInstanceName(self.cfg, self.op.name)
9695       self.op.name = fname
9696       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9697     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9698       if not hasattr(self.op, "evac_nodes"):
9699         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9700                                    " opcode input", errors.ECODE_INVAL)
9701     else:
9702       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9703                                  self.op.mode, errors.ECODE_INVAL)
9704
9705     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9706       if not hasattr(self.op, "allocator") or self.op.allocator is None:
9707         raise errors.OpPrereqError("Missing allocator name",
9708                                    errors.ECODE_INVAL)
9709     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9710       raise errors.OpPrereqError("Wrong allocator test '%s'" %
9711                                  self.op.direction, errors.ECODE_INVAL)
9712
9713   def Exec(self, feedback_fn):
9714     """Run the allocator test.
9715
9716     """
9717     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9718       ial = IAllocator(self.cfg, self.rpc,
9719                        mode=self.op.mode,
9720                        name=self.op.name,
9721                        mem_size=self.op.mem_size,
9722                        disks=self.op.disks,
9723                        disk_template=self.op.disk_template,
9724                        os=self.op.os,
9725                        tags=self.op.tags,
9726                        nics=self.op.nics,
9727                        vcpus=self.op.vcpus,
9728                        hypervisor=self.op.hypervisor,
9729                        )
9730     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9731       ial = IAllocator(self.cfg, self.rpc,
9732                        mode=self.op.mode,
9733                        name=self.op.name,
9734                        relocate_from=list(self.relocate_from),
9735                        )
9736     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9737       ial = IAllocator(self.cfg, self.rpc,
9738                        mode=self.op.mode,
9739                        evac_nodes=self.op.evac_nodes)
9740     else:
9741       raise errors.ProgrammerError("Uncatched mode %s in"
9742                                    " LUTestAllocator.Exec", self.op.mode)
9743
9744     if self.op.direction == constants.IALLOCATOR_DIR_IN:
9745       result = ial.in_text
9746     else:
9747       ial.Run(self.op.allocator, validate=False)
9748       result = ial.out_text
9749     return result