code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42
  43 from ganeti import ssh
  44 from ganeti import utils
  45 from ganeti import errors
  46 from ganeti import hypervisor
  47 from ganeti import locking
  48 from ganeti import constants
  49 from ganeti import objects
  50 from ganeti import serializer
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56
  57 import ganeti.masterd.instance # pylint: disable-msg=W0611
  58
  59
  60 # Modifiable default values; need to define these here before the
  61 # actual LUs
  62
  63 def _EmptyList():
  64   """Returns an empty list.
  65
  66   """
  67   return []
  68
  69
  70 def _EmptyDict():
  71   """Returns an empty dict.
  72
  73   """
  74   return {}
  75
  76
  77 #: The without-default default value
  78 _NoDefault = object()
  79
  80
  81 #: The no-type (value to complex to check it in the type system)
  82 _NoType = object()
  83
  84
  85 # Some basic types
  86 def _TNotNone(val):
  87   """Checks if the given value is not None.
  88
  89   """
  90   return val is not None
  91
  92
  93 def _TNone(val):
  94   """Checks if the given value is None.
  95
  96   """
  97   return val is None
  98
  99
 100 def _TBool(val):
 101   """Checks if the given value is a boolean.
 102
 103   """
 104   return isinstance(val, bool)
 105
 106
 107 def _TInt(val):
 108   """Checks if the given value is an integer.
 109
 110   """
 111   return isinstance(val, int)
 112
 113
 114 def _TFloat(val):
 115   """Checks if the given value is a float.
 116
 117   """
 118   return isinstance(val, float)
 119
 120
 121 def _TString(val):
 122   """Checks if the given value is a string.
 123
 124   """
 125   return isinstance(val, basestring)
 126
 127
 128 def _TTrue(val):
 129   """Checks if a given value evaluates to a boolean True value.
 130
 131   """
 132   return bool(val)
 133
 134
 135 def _TElemOf(target_list):
 136   """Builds a function that checks if a given value is a member of a list.
 137
 138   """
 139   return lambda val: val in target_list
 140
 141
 142 # Container types
 143 def _TList(val):
 144   """Checks if the given value is a list.
 145
 146   """
 147   return isinstance(val, list)
 148
 149
 150 def _TDict(val):
 151   """Checks if the given value is a dictionary.
 152
 153   """
 154   return isinstance(val, dict)
 155
 156
 157 # Combinator types
 158 def _TAnd(*args):
 159   """Combine multiple functions using an AND operation.
 160
 161   """
 162   def fn(val):
 163     return compat.all(t(val) for t in args)
 164   return fn
 165
 166
 167 def _TOr(*args):
 168   """Combine multiple functions using an AND operation.
 169
 170   """
 171   def fn(val):
 172     return compat.any(t(val) for t in args)
 173   return fn
 174
 175
 176 # Type aliases
 177
 178 #: a non-empty string
 179 _TNonEmptyString = _TAnd(_TString, _TTrue)
 180
 181
 182 #: a maybe non-empty string
 183 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
 184
 185
 186 #: a maybe boolean (bool or none)
 187 _TMaybeBool = _TOr(_TBool, _TNone)
 188
 189
 190 #: a positive integer
 191 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
 192
 193 #: a strictly positive integer
 194 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
 195
 196
 197 def _TListOf(my_type):
 198   """Checks if a given value is a list with all elements of the same type.
 199
 200   """
 201   return _TAnd(_TList,
 202                lambda lst: compat.all(my_type(v) for v in lst))
 203
 204
 205 def _TDictOf(key_type, val_type):
 206   """Checks a dict type for the type of its key/values.
 207
 208   """
 209   return _TAnd(_TDict,
 210                lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
 211                                 and compat.all(val_type(v)
 212                                                for v in my_dict.values())))
 213
 214
 215 # Common opcode attributes
 216
 217 #: output fields for a query operation
 218 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
 219
 220
 221 #: the shutdown timeout
 222 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
 223                      _TPositiveInt)
 224
 225 #: the force parameter
 226 _PForce = ("force", False, _TBool)
 227
 228 #: a required instance name (for single-instance LUs)
 229 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
 230
 231
 232 #: a required node name (for single-node LUs)
 233 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
 234
 235 #: the migration type (live/non-live)
 236 _PMigrationMode = ("mode", None, _TOr(_TNone,
 237                                       _TElemOf(constants.HT_MIGRATION_MODES)))
 238
 239
 240 # End types
 241 class LogicalUnit(object):
 242   """Logical Unit base class.
 243
 244   Subclasses must follow these rules:
 245     - implement ExpandNames
 246     - implement CheckPrereq (except when tasklets are used)
 247     - implement Exec (except when tasklets are used)
 248     - implement BuildHooksEnv
 249     - redefine HPATH and HTYPE
 250     - optionally redefine their run requirements:
 251         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 252
 253   Note that all commands require root permissions.
 254
 255   @ivar dry_run_result: the value (if any) that will be returned to the caller
 256       in dry-run mode (signalled by opcode dry_run parameter)
 257   @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
 258       they should get if not already defined, and types they must match
 259
 260   """
 261   HPATH = None
 262   HTYPE = None
 263   _OP_PARAMS = []
 264   REQ_BGL = True
 265
 266   def __init__(self, processor, op, context, rpc):
 267     """Constructor for LogicalUnit.
 268
 269     This needs to be overridden in derived classes in order to check op
 270     validity.
 271
 272     """
 273     self.proc = processor
 274     self.op = op
 275     self.cfg = context.cfg
 276     self.context = context
 277     self.rpc = rpc
 278     # Dicts used to declare locking needs to mcpu
 279     self.needed_locks = None
 280     self.acquired_locks = {}
 281     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 282     self.add_locks = {}
 283     self.remove_locks = {}
 284     # Used to force good behavior when calling helper functions
 285     self.recalculate_locks = {}
 286     self.__ssh = None
 287     # logging
 288     self.Log = processor.Log # pylint: disable-msg=C0103
 289     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 290     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 291     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 292     # support for dry-run
 293     self.dry_run_result = None
 294     # support for generic debug attribute
 295     if (not hasattr(self.op, "debug_level") or
 296         not isinstance(self.op.debug_level, int)):
 297       self.op.debug_level = 0
 298
 299     # Tasklets
 300     self.tasklets = None
 301
 302     # The new kind-of-type-system
 303     op_id = self.op.OP_ID
 304     for attr_name, aval, test in self._OP_PARAMS:
 305       if not hasattr(op, attr_name):
 306         if aval == _NoDefault:
 307           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 308                                      (op_id, attr_name), errors.ECODE_INVAL)
 309         else:
 310           if callable(aval):
 311             dval = aval()
 312           else:
 313             dval = aval
 314           setattr(self.op, attr_name, dval)
 315       attr_val = getattr(op, attr_name)
 316       if test == _NoType:
 317         # no tests here
 318         continue
 319       if not callable(test):
 320         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 321                                      " given type is not a proper type (%s)" %
 322                                      (op_id, attr_name, test))
 323       if not test(attr_val):
 324         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 325                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 326         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 327                                    (op_id, attr_name), errors.ECODE_INVAL)
 328
 329     self.CheckArguments()
 330
 331   def __GetSSH(self):
 332     """Returns the SshRunner object
 333
 334     """
 335     if not self.__ssh:
 336       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 337     return self.__ssh
 338
 339   ssh = property(fget=__GetSSH)
 340
 341   def CheckArguments(self):
 342     """Check syntactic validity for the opcode arguments.
 343
 344     This method is for doing a simple syntactic check and ensure
 345     validity of opcode parameters, without any cluster-related
 346     checks. While the same can be accomplished in ExpandNames and/or
 347     CheckPrereq, doing these separate is better because:
 348
 349       - ExpandNames is left as as purely a lock-related function
 350       - CheckPrereq is run after we have acquired locks (and possible
 351         waited for them)
 352
 353     The function is allowed to change the self.op attribute so that
 354     later methods can no longer worry about missing parameters.
 355
 356     """
 357     pass
 358
 359   def ExpandNames(self):
 360     """Expand names for this LU.
 361
 362     This method is called before starting to execute the opcode, and it should
 363     update all the parameters of the opcode to their canonical form (e.g. a
 364     short node name must be fully expanded after this method has successfully
 365     completed). This way locking, hooks, logging, ecc. can work correctly.
 366
 367     LUs which implement this method must also populate the self.needed_locks
 368     member, as a dict with lock levels as keys, and a list of needed lock names
 369     as values. Rules:
 370
 371       - use an empty dict if you don't need any lock
 372       - if you don't need any lock at a particular level omit that level
 373       - don't put anything for the BGL level
 374       - if you want all locks at a level use locking.ALL_SET as a value
 375
 376     If you need to share locks (rather than acquire them exclusively) at one
 377     level you can modify self.share_locks, setting a true value (usually 1) for
 378     that level. By default locks are not shared.
 379
 380     This function can also define a list of tasklets, which then will be
 381     executed in order instead of the usual LU-level CheckPrereq and Exec
 382     functions, if those are not defined by the LU.
 383
 384     Examples::
 385
 386       # Acquire all nodes and one instance
 387       self.needed_locks = {
 388         locking.LEVEL_NODE: locking.ALL_SET,
 389         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 390       }
 391       # Acquire just two nodes
 392       self.needed_locks = {
 393         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 394       }
 395       # Acquire no locks
 396       self.needed_locks = {} # No, you can't leave it to the default value None
 397
 398     """
 399     # The implementation of this method is mandatory only if the new LU is
 400     # concurrent, so that old LUs don't need to be changed all at the same
 401     # time.
 402     if self.REQ_BGL:
 403       self.needed_locks = {} # Exclusive LUs don't need locks.
 404     else:
 405       raise NotImplementedError
 406
 407   def DeclareLocks(self, level):
 408     """Declare LU locking needs for a level
 409
 410     While most LUs can just declare their locking needs at ExpandNames time,
 411     sometimes there's the need to calculate some locks after having acquired
 412     the ones before. This function is called just before acquiring locks at a
 413     particular level, but after acquiring the ones at lower levels, and permits
 414     such calculations. It can be used to modify self.needed_locks, and by
 415     default it does nothing.
 416
 417     This function is only called if you have something already set in
 418     self.needed_locks for the level.
 419
 420     @param level: Locking level which is going to be locked
 421     @type level: member of ganeti.locking.LEVELS
 422
 423     """
 424
 425   def CheckPrereq(self):
 426     """Check prerequisites for this LU.
 427
 428     This method should check that the prerequisites for the execution
 429     of this LU are fulfilled. It can do internode communication, but
 430     it should be idempotent - no cluster or system changes are
 431     allowed.
 432
 433     The method should raise errors.OpPrereqError in case something is
 434     not fulfilled. Its return value is ignored.
 435
 436     This method should also update all the parameters of the opcode to
 437     their canonical form if it hasn't been done by ExpandNames before.
 438
 439     """
 440     if self.tasklets is not None:
 441       for (idx, tl) in enumerate(self.tasklets):
 442         logging.debug("Checking prerequisites for tasklet %s/%s",
 443                       idx + 1, len(self.tasklets))
 444         tl.CheckPrereq()
 445     else:
 446       pass
 447
 448   def Exec(self, feedback_fn):
 449     """Execute the LU.
 450
 451     This method should implement the actual work. It should raise
 452     errors.OpExecError for failures that are somewhat dealt with in
 453     code, or expected.
 454
 455     """
 456     if self.tasklets is not None:
 457       for (idx, tl) in enumerate(self.tasklets):
 458         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 459         tl.Exec(feedback_fn)
 460     else:
 461       raise NotImplementedError
 462
 463   def BuildHooksEnv(self):
 464     """Build hooks environment for this LU.
 465
 466     This method should return a three-node tuple consisting of: a dict
 467     containing the environment that will be used for running the
 468     specific hook for this LU, a list of node names on which the hook
 469     should run before the execution, and a list of node names on which
 470     the hook should run after the execution.
 471
 472     The keys of the dict must not have 'GANETI_' prefixed as this will
 473     be handled in the hooks runner. Also note additional keys will be
 474     added by the hooks runner. If the LU doesn't define any
 475     environment, an empty dict (and not None) should be returned.
 476
 477     No nodes should be returned as an empty list (and not None).
 478
 479     Note that if the HPATH for a LU class is None, this function will
 480     not be called.
 481
 482     """
 483     raise NotImplementedError
 484
 485   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 486     """Notify the LU about the results of its hooks.
 487
 488     This method is called every time a hooks phase is executed, and notifies
 489     the Logical Unit about the hooks' result. The LU can then use it to alter
 490     its result based on the hooks.  By default the method does nothing and the
 491     previous result is passed back unchanged but any LU can define it if it
 492     wants to use the local cluster hook-scripts somehow.
 493
 494     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 495         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 496     @param hook_results: the results of the multi-node hooks rpc call
 497     @param feedback_fn: function used send feedback back to the caller
 498     @param lu_result: the previous Exec result this LU had, or None
 499         in the PRE phase
 500     @return: the new Exec result, based on the previous result
 501         and hook results
 502
 503     """
 504     # API must be kept, thus we ignore the unused argument and could
 505     # be a function warnings
 506     # pylint: disable-msg=W0613,R0201
 507     return lu_result
 508
 509   def _ExpandAndLockInstance(self):
 510     """Helper function to expand and lock an instance.
 511
 512     Many LUs that work on an instance take its name in self.op.instance_name
 513     and need to expand it and then declare the expanded name for locking. This
 514     function does it, and then updates self.op.instance_name to the expanded
 515     name. It also initializes needed_locks as a dict, if this hasn't been done
 516     before.
 517
 518     """
 519     if self.needed_locks is None:
 520       self.needed_locks = {}
 521     else:
 522       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 523         "_ExpandAndLockInstance called with instance-level locks set"
 524     self.op.instance_name = _ExpandInstanceName(self.cfg,
 525                                                 self.op.instance_name)
 526     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 527
 528   def _LockInstancesNodes(self, primary_only=False):
 529     """Helper function to declare instances' nodes for locking.
 530
 531     This function should be called after locking one or more instances to lock
 532     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 533     with all primary or secondary nodes for instances already locked and
 534     present in self.needed_locks[locking.LEVEL_INSTANCE].
 535
 536     It should be called from DeclareLocks, and for safety only works if
 537     self.recalculate_locks[locking.LEVEL_NODE] is set.
 538
 539     In the future it may grow parameters to just lock some instance's nodes, or
 540     to just lock primaries or secondary nodes, if needed.
 541
 542     If should be called in DeclareLocks in a way similar to::
 543
 544       if level == locking.LEVEL_NODE:
 545         self._LockInstancesNodes()
 546
 547     @type primary_only: boolean
 548     @param primary_only: only lock primary nodes of locked instances
 549
 550     """
 551     assert locking.LEVEL_NODE in self.recalculate_locks, \
 552       "_LockInstancesNodes helper function called with no nodes to recalculate"
 553
 554     # TODO: check if we're really been called with the instance locks held
 555
 556     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 557     # future we might want to have different behaviors depending on the value
 558     # of self.recalculate_locks[locking.LEVEL_NODE]
 559     wanted_nodes = []
 560     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 561       instance = self.context.cfg.GetInstanceInfo(instance_name)
 562       wanted_nodes.append(instance.primary_node)
 563       if not primary_only:
 564         wanted_nodes.extend(instance.secondary_nodes)
 565
 566     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 567       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 568     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 569       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 570
 571     del self.recalculate_locks[locking.LEVEL_NODE]
 572
 573
 574 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 575   """Simple LU which runs no hooks.
 576
 577   This LU is intended as a parent for other LogicalUnits which will
 578   run no hooks, in order to reduce duplicate code.
 579
 580   """
 581   HPATH = None
 582   HTYPE = None
 583
 584   def BuildHooksEnv(self):
 585     """Empty BuildHooksEnv for NoHooksLu.
 586
 587     This just raises an error.
 588
 589     """
 590     assert False, "BuildHooksEnv called for NoHooksLUs"
 591
 592
 593 class Tasklet:
 594   """Tasklet base class.
 595
 596   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 597   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 598   tasklets know nothing about locks.
 599
 600   Subclasses must follow these rules:
 601     - Implement CheckPrereq
 602     - Implement Exec
 603
 604   """
 605   def __init__(self, lu):
 606     self.lu = lu
 607
 608     # Shortcuts
 609     self.cfg = lu.cfg
 610     self.rpc = lu.rpc
 611
 612   def CheckPrereq(self):
 613     """Check prerequisites for this tasklets.
 614
 615     This method should check whether the prerequisites for the execution of
 616     this tasklet are fulfilled. It can do internode communication, but it
 617     should be idempotent - no cluster or system changes are allowed.
 618
 619     The method should raise errors.OpPrereqError in case something is not
 620     fulfilled. Its return value is ignored.
 621
 622     This method should also update all parameters to their canonical form if it
 623     hasn't been done before.
 624
 625     """
 626     pass
 627
 628   def Exec(self, feedback_fn):
 629     """Execute the tasklet.
 630
 631     This method should implement the actual work. It should raise
 632     errors.OpExecError for failures that are somewhat dealt with in code, or
 633     expected.
 634
 635     """
 636     raise NotImplementedError
 637
 638
 639 def _GetWantedNodes(lu, nodes):
 640   """Returns list of checked and expanded node names.
 641
 642   @type lu: L{LogicalUnit}
 643   @param lu: the logical unit on whose behalf we execute
 644   @type nodes: list
 645   @param nodes: list of node names or None for all nodes
 646   @rtype: list
 647   @return: the list of nodes, sorted
 648   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 649
 650   """
 651   if not nodes:
 652     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 653       " non-empty list of nodes whose name is to be expanded.")
 654
 655   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 656   return utils.NiceSort(wanted)
 657
 658
 659 def _GetWantedInstances(lu, instances):
 660   """Returns list of checked and expanded instance names.
 661
 662   @type lu: L{LogicalUnit}
 663   @param lu: the logical unit on whose behalf we execute
 664   @type instances: list
 665   @param instances: list of instance names or None for all instances
 666   @rtype: list
 667   @return: the list of instances, sorted
 668   @raise errors.OpPrereqError: if the instances parameter is wrong type
 669   @raise errors.OpPrereqError: if any of the passed instances is not found
 670
 671   """
 672   if instances:
 673     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 674   else:
 675     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 676   return wanted
 677
 678
 679 def _GetUpdatedParams(old_params, update_dict,
 680                       use_default=True, use_none=False):
 681   """Return the new version of a parameter dictionary.
 682
 683   @type old_params: dict
 684   @param old_params: old parameters
 685   @type update_dict: dict
 686   @param update_dict: dict containing new parameter values, or
 687       constants.VALUE_DEFAULT to reset the parameter to its default
 688       value
 689   @param use_default: boolean
 690   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 691       values as 'to be deleted' values
 692   @param use_none: boolean
 693   @type use_none: whether to recognise C{None} values as 'to be
 694       deleted' values
 695   @rtype: dict
 696   @return: the new parameter dictionary
 697
 698   """
 699   params_copy = copy.deepcopy(old_params)
 700   for key, val in update_dict.iteritems():
 701     if ((use_default and val == constants.VALUE_DEFAULT) or
 702         (use_none and val is None)):
 703       try:
 704         del params_copy[key]
 705       except KeyError:
 706         pass
 707     else:
 708       params_copy[key] = val
 709   return params_copy
 710
 711
 712 def _CheckOutputFields(static, dynamic, selected):
 713   """Checks whether all selected fields are valid.
 714
 715   @type static: L{utils.FieldSet}
 716   @param static: static fields set
 717   @type dynamic: L{utils.FieldSet}
 718   @param dynamic: dynamic fields set
 719
 720   """
 721   f = utils.FieldSet()
 722   f.Extend(static)
 723   f.Extend(dynamic)
 724
 725   delta = f.NonMatching(selected)
 726   if delta:
 727     raise errors.OpPrereqError("Unknown output fields selected: %s"
 728                                % ",".join(delta), errors.ECODE_INVAL)
 729
 730
 731 def _CheckGlobalHvParams(params):
 732   """Validates that given hypervisor params are not global ones.
 733
 734   This will ensure that instances don't get customised versions of
 735   global params.
 736
 737   """
 738   used_globals = constants.HVC_GLOBALS.intersection(params)
 739   if used_globals:
 740     msg = ("The following hypervisor parameters are global and cannot"
 741            " be customized at instance level, please modify them at"
 742            " cluster level: %s" % utils.CommaJoin(used_globals))
 743     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 744
 745
 746 def _CheckNodeOnline(lu, node):
 747   """Ensure that a given node is online.
 748
 749   @param lu: the LU on behalf of which we make the check
 750   @param node: the node to check
 751   @raise errors.OpPrereqError: if the node is offline
 752
 753   """
 754   if lu.cfg.GetNodeInfo(node).offline:
 755     raise errors.OpPrereqError("Can't use offline node %s" % node,
 756                                errors.ECODE_INVAL)
 757
 758
 759 def _CheckNodeNotDrained(lu, node):
 760   """Ensure that a given node is not drained.
 761
 762   @param lu: the LU on behalf of which we make the check
 763   @param node: the node to check
 764   @raise errors.OpPrereqError: if the node is drained
 765
 766   """
 767   if lu.cfg.GetNodeInfo(node).drained:
 768     raise errors.OpPrereqError("Can't use drained node %s" % node,
 769                                errors.ECODE_INVAL)
 770
 771
 772 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 773   """Ensure that a node supports a given OS.
 774
 775   @param lu: the LU on behalf of which we make the check
 776   @param node: the node to check
 777   @param os_name: the OS to query about
 778   @param force_variant: whether to ignore variant errors
 779   @raise errors.OpPrereqError: if the node is not supporting the OS
 780
 781   """
 782   result = lu.rpc.call_os_get(node, os_name)
 783   result.Raise("OS '%s' not in supported OS list for node %s" %
 784                (os_name, node),
 785                prereq=True, ecode=errors.ECODE_INVAL)
 786   if not force_variant:
 787     _CheckOSVariant(result.payload, os_name)
 788
 789
 790 def _RequireFileStorage():
 791   """Checks that file storage is enabled.
 792
 793   @raise errors.OpPrereqError: when file storage is disabled
 794
 795   """
 796   if not constants.ENABLE_FILE_STORAGE:
 797     raise errors.OpPrereqError("File storage disabled at configure time",
 798                                errors.ECODE_INVAL)
 799
 800
 801 def _CheckDiskTemplate(template):
 802   """Ensure a given disk template is valid.
 803
 804   """
 805   if template not in constants.DISK_TEMPLATES:
 806     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 807            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 808     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 809   if template == constants.DT_FILE:
 810     _RequireFileStorage()
 811   return True
 812
 813
 814 def _CheckStorageType(storage_type):
 815   """Ensure a given storage type is valid.
 816
 817   """
 818   if storage_type not in constants.VALID_STORAGE_TYPES:
 819     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 820                                errors.ECODE_INVAL)
 821   if storage_type == constants.ST_FILE:
 822     _RequireFileStorage()
 823   return True
 824
 825
 826 def _GetClusterDomainSecret():
 827   """Reads the cluster domain secret.
 828
 829   """
 830   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 831                                strict=True)
 832
 833
 834 def _CheckInstanceDown(lu, instance, reason):
 835   """Ensure that an instance is not running."""
 836   if instance.admin_up:
 837     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 838                                (instance.name, reason), errors.ECODE_STATE)
 839
 840   pnode = instance.primary_node
 841   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 842   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 843               prereq=True, ecode=errors.ECODE_ENVIRON)
 844
 845   if instance.name in ins_l.payload:
 846     raise errors.OpPrereqError("Instance %s is running, %s" %
 847                                (instance.name, reason), errors.ECODE_STATE)
 848
 849
 850 def _ExpandItemName(fn, name, kind):
 851   """Expand an item name.
 852
 853   @param fn: the function to use for expansion
 854   @param name: requested item name
 855   @param kind: text description ('Node' or 'Instance')
 856   @return: the resolved (full) name
 857   @raise errors.OpPrereqError: if the item is not found
 858
 859   """
 860   full_name = fn(name)
 861   if full_name is None:
 862     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 863                                errors.ECODE_NOENT)
 864   return full_name
 865
 866
 867 def _ExpandNodeName(cfg, name):
 868   """Wrapper over L{_ExpandItemName} for nodes."""
 869   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 870
 871
 872 def _ExpandInstanceName(cfg, name):
 873   """Wrapper over L{_ExpandItemName} for instance."""
 874   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 875
 876
 877 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 878                           memory, vcpus, nics, disk_template, disks,
 879                           bep, hvp, hypervisor_name):
 880   """Builds instance related env variables for hooks
 881
 882   This builds the hook environment from individual variables.
 883
 884   @type name: string
 885   @param name: the name of the instance
 886   @type primary_node: string
 887   @param primary_node: the name of the instance's primary node
 888   @type secondary_nodes: list
 889   @param secondary_nodes: list of secondary nodes as strings
 890   @type os_type: string
 891   @param os_type: the name of the instance's OS
 892   @type status: boolean
 893   @param status: the should_run status of the instance
 894   @type memory: string
 895   @param memory: the memory size of the instance
 896   @type vcpus: string
 897   @param vcpus: the count of VCPUs the instance has
 898   @type nics: list
 899   @param nics: list of tuples (ip, mac, mode, link) representing
 900       the NICs the instance has
 901   @type disk_template: string
 902   @param disk_template: the disk template of the instance
 903   @type disks: list
 904   @param disks: the list of (size, mode) pairs
 905   @type bep: dict
 906   @param bep: the backend parameters for the instance
 907   @type hvp: dict
 908   @param hvp: the hypervisor parameters for the instance
 909   @type hypervisor_name: string
 910   @param hypervisor_name: the hypervisor for the instance
 911   @rtype: dict
 912   @return: the hook environment for this instance
 913
 914   """
 915   if status:
 916     str_status = "up"
 917   else:
 918     str_status = "down"
 919   env = {
 920     "OP_TARGET": name,
 921     "INSTANCE_NAME": name,
 922     "INSTANCE_PRIMARY": primary_node,
 923     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 924     "INSTANCE_OS_TYPE": os_type,
 925     "INSTANCE_STATUS": str_status,
 926     "INSTANCE_MEMORY": memory,
 927     "INSTANCE_VCPUS": vcpus,
 928     "INSTANCE_DISK_TEMPLATE": disk_template,
 929     "INSTANCE_HYPERVISOR": hypervisor_name,
 930   }
 931
 932   if nics:
 933     nic_count = len(nics)
 934     for idx, (ip, mac, mode, link) in enumerate(nics):
 935       if ip is None:
 936         ip = ""
 937       env["INSTANCE_NIC%d_IP" % idx] = ip
 938       env["INSTANCE_NIC%d_MAC" % idx] = mac
 939       env["INSTANCE_NIC%d_MODE" % idx] = mode
 940       env["INSTANCE_NIC%d_LINK" % idx] = link
 941       if mode == constants.NIC_MODE_BRIDGED:
 942         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 943   else:
 944     nic_count = 0
 945
 946   env["INSTANCE_NIC_COUNT"] = nic_count
 947
 948   if disks:
 949     disk_count = len(disks)
 950     for idx, (size, mode) in enumerate(disks):
 951       env["INSTANCE_DISK%d_SIZE" % idx] = size
 952       env["INSTANCE_DISK%d_MODE" % idx] = mode
 953   else:
 954     disk_count = 0
 955
 956   env["INSTANCE_DISK_COUNT"] = disk_count
 957
 958   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 959     for key, value in source.items():
 960       env["INSTANCE_%s_%s" % (kind, key)] = value
 961
 962   return env
 963
 964
 965 def _NICListToTuple(lu, nics):
 966   """Build a list of nic information tuples.
 967
 968   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 969   value in LUQueryInstanceData.
 970
 971   @type lu:  L{LogicalUnit}
 972   @param lu: the logical unit on whose behalf we execute
 973   @type nics: list of L{objects.NIC}
 974   @param nics: list of nics to convert to hooks tuples
 975
 976   """
 977   hooks_nics = []
 978   cluster = lu.cfg.GetClusterInfo()
 979   for nic in nics:
 980     ip = nic.ip
 981     mac = nic.mac
 982     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 983     mode = filled_params[constants.NIC_MODE]
 984     link = filled_params[constants.NIC_LINK]
 985     hooks_nics.append((ip, mac, mode, link))
 986   return hooks_nics
 987
 988
 989 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 990   """Builds instance related env variables for hooks from an object.
 991
 992   @type lu: L{LogicalUnit}
 993   @param lu: the logical unit on whose behalf we execute
 994   @type instance: L{objects.Instance}
 995   @param instance: the instance for which we should build the
 996       environment
 997   @type override: dict
 998   @param override: dictionary with key/values that will override
 999       our values
1000   @rtype: dict
1001   @return: the hook environment dictionary
1002
1003   """
1004   cluster = lu.cfg.GetClusterInfo()
1005   bep = cluster.FillBE(instance)
1006   hvp = cluster.FillHV(instance)
1007   args = {
1008     'name': instance.name,
1009     'primary_node': instance.primary_node,
1010     'secondary_nodes': instance.secondary_nodes,
1011     'os_type': instance.os,
1012     'status': instance.admin_up,
1013     'memory': bep[constants.BE_MEMORY],
1014     'vcpus': bep[constants.BE_VCPUS],
1015     'nics': _NICListToTuple(lu, instance.nics),
1016     'disk_template': instance.disk_template,
1017     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1018     'bep': bep,
1019     'hvp': hvp,
1020     'hypervisor_name': instance.hypervisor,
1021   }
1022   if override:
1023     args.update(override)
1024   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1025
1026
1027 def _AdjustCandidatePool(lu, exceptions):
1028   """Adjust the candidate pool after node operations.
1029
1030   """
1031   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1032   if mod_list:
1033     lu.LogInfo("Promoted nodes to master candidate role: %s",
1034                utils.CommaJoin(node.name for node in mod_list))
1035     for name in mod_list:
1036       lu.context.ReaddNode(name)
1037   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1038   if mc_now > mc_max:
1039     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1040                (mc_now, mc_max))
1041
1042
1043 def _DecideSelfPromotion(lu, exceptions=None):
1044   """Decide whether I should promote myself as a master candidate.
1045
1046   """
1047   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1048   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1049   # the new node will increase mc_max with one, so:
1050   mc_should = min(mc_should + 1, cp_size)
1051   return mc_now < mc_should
1052
1053
1054 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1055   """Check that the brigdes needed by a list of nics exist.
1056
1057   """
1058   cluster = lu.cfg.GetClusterInfo()
1059   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1060   brlist = [params[constants.NIC_LINK] for params in paramslist
1061             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1062   if brlist:
1063     result = lu.rpc.call_bridges_exist(target_node, brlist)
1064     result.Raise("Error checking bridges on destination node '%s'" %
1065                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1066
1067
1068 def _CheckInstanceBridgesExist(lu, instance, node=None):
1069   """Check that the brigdes needed by an instance exist.
1070
1071   """
1072   if node is None:
1073     node = instance.primary_node
1074   _CheckNicsBridgesExist(lu, instance.nics, node)
1075
1076
1077 def _CheckOSVariant(os_obj, name):
1078   """Check whether an OS name conforms to the os variants specification.
1079
1080   @type os_obj: L{objects.OS}
1081   @param os_obj: OS object to check
1082   @type name: string
1083   @param name: OS name passed by the user, to check for validity
1084
1085   """
1086   if not os_obj.supported_variants:
1087     return
1088   try:
1089     variant = name.split("+", 1)[1]
1090   except IndexError:
1091     raise errors.OpPrereqError("OS name must include a variant",
1092                                errors.ECODE_INVAL)
1093
1094   if variant not in os_obj.supported_variants:
1095     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1096
1097
1098 def _GetNodeInstancesInner(cfg, fn):
1099   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1100
1101
1102 def _GetNodeInstances(cfg, node_name):
1103   """Returns a list of all primary and secondary instances on a node.
1104
1105   """
1106
1107   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1108
1109
1110 def _GetNodePrimaryInstances(cfg, node_name):
1111   """Returns primary instances on a node.
1112
1113   """
1114   return _GetNodeInstancesInner(cfg,
1115                                 lambda inst: node_name == inst.primary_node)
1116
1117
1118 def _GetNodeSecondaryInstances(cfg, node_name):
1119   """Returns secondary instances on a node.
1120
1121   """
1122   return _GetNodeInstancesInner(cfg,
1123                                 lambda inst: node_name in inst.secondary_nodes)
1124
1125
1126 def _GetStorageTypeArgs(cfg, storage_type):
1127   """Returns the arguments for a storage type.
1128
1129   """
1130   # Special case for file storage
1131   if storage_type == constants.ST_FILE:
1132     # storage.FileStorage wants a list of storage directories
1133     return [[cfg.GetFileStorageDir()]]
1134
1135   return []
1136
1137
1138 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1139   faulty = []
1140
1141   for dev in instance.disks:
1142     cfg.SetDiskID(dev, node_name)
1143
1144   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1145   result.Raise("Failed to get disk status from node %s" % node_name,
1146                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1147
1148   for idx, bdev_status in enumerate(result.payload):
1149     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1150       faulty.append(idx)
1151
1152   return faulty
1153
1154
1155 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1156   """Check the sanity of iallocator and node arguments and use the
1157   cluster-wide iallocator if appropriate.
1158
1159   Check that at most one of (iallocator, node) is specified. If none is
1160   specified, then the LU's opcode's iallocator slot is filled with the
1161   cluster-wide default iallocator.
1162
1163   @type iallocator_slot: string
1164   @param iallocator_slot: the name of the opcode iallocator slot
1165   @type node_slot: string
1166   @param node_slot: the name of the opcode target node slot
1167
1168   """
1169   node = getattr(lu.op, node_slot, None)
1170   iallocator = getattr(lu.op, iallocator_slot, None)
1171
1172   if node is not None and iallocator is not None:
1173     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1174                                errors.ECODE_INVAL)
1175   elif node is None and iallocator is None:
1176     default_iallocator = lu.cfg.GetDefaultIAllocator()
1177     if default_iallocator:
1178       setattr(lu.op, iallocator_slot, default_iallocator)
1179     else:
1180       raise errors.OpPrereqError("No iallocator or node given and no"
1181                                  " cluster-wide default iallocator found."
1182                                  " Please specify either an iallocator or a"
1183                                  " node, or set a cluster-wide default"
1184                                  " iallocator.")
1185
1186
1187 class LUPostInitCluster(LogicalUnit):
1188   """Logical unit for running hooks after cluster initialization.
1189
1190   """
1191   HPATH = "cluster-init"
1192   HTYPE = constants.HTYPE_CLUSTER
1193
1194   def BuildHooksEnv(self):
1195     """Build hooks env.
1196
1197     """
1198     env = {"OP_TARGET": self.cfg.GetClusterName()}
1199     mn = self.cfg.GetMasterNode()
1200     return env, [], [mn]
1201
1202   def Exec(self, feedback_fn):
1203     """Nothing to do.
1204
1205     """
1206     return True
1207
1208
1209 class LUDestroyCluster(LogicalUnit):
1210   """Logical unit for destroying the cluster.
1211
1212   """
1213   HPATH = "cluster-destroy"
1214   HTYPE = constants.HTYPE_CLUSTER
1215
1216   def BuildHooksEnv(self):
1217     """Build hooks env.
1218
1219     """
1220     env = {"OP_TARGET": self.cfg.GetClusterName()}
1221     return env, [], []
1222
1223   def CheckPrereq(self):
1224     """Check prerequisites.
1225
1226     This checks whether the cluster is empty.
1227
1228     Any errors are signaled by raising errors.OpPrereqError.
1229
1230     """
1231     master = self.cfg.GetMasterNode()
1232
1233     nodelist = self.cfg.GetNodeList()
1234     if len(nodelist) != 1 or nodelist[0] != master:
1235       raise errors.OpPrereqError("There are still %d node(s) in"
1236                                  " this cluster." % (len(nodelist) - 1),
1237                                  errors.ECODE_INVAL)
1238     instancelist = self.cfg.GetInstanceList()
1239     if instancelist:
1240       raise errors.OpPrereqError("There are still %d instance(s) in"
1241                                  " this cluster." % len(instancelist),
1242                                  errors.ECODE_INVAL)
1243
1244   def Exec(self, feedback_fn):
1245     """Destroys the cluster.
1246
1247     """
1248     master = self.cfg.GetMasterNode()
1249     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1250
1251     # Run post hooks on master node before it's removed
1252     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1253     try:
1254       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1255     except:
1256       # pylint: disable-msg=W0702
1257       self.LogWarning("Errors occurred running hooks on %s" % master)
1258
1259     result = self.rpc.call_node_stop_master(master, False)
1260     result.Raise("Could not disable the master role")
1261
1262     if modify_ssh_setup:
1263       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1264       utils.CreateBackup(priv_key)
1265       utils.CreateBackup(pub_key)
1266
1267     return master
1268
1269
1270 def _VerifyCertificate(filename):
1271   """Verifies a certificate for LUVerifyCluster.
1272
1273   @type filename: string
1274   @param filename: Path to PEM file
1275
1276   """
1277   try:
1278     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1279                                            utils.ReadFile(filename))
1280   except Exception, err: # pylint: disable-msg=W0703
1281     return (LUVerifyCluster.ETYPE_ERROR,
1282             "Failed to load X509 certificate %s: %s" % (filename, err))
1283
1284   (errcode, msg) = \
1285     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1286                                 constants.SSL_CERT_EXPIRATION_ERROR)
1287
1288   if msg:
1289     fnamemsg = "While verifying %s: %s" % (filename, msg)
1290   else:
1291     fnamemsg = None
1292
1293   if errcode is None:
1294     return (None, fnamemsg)
1295   elif errcode == utils.CERT_WARNING:
1296     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1297   elif errcode == utils.CERT_ERROR:
1298     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1299
1300   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1301
1302
1303 class LUVerifyCluster(LogicalUnit):
1304   """Verifies the cluster status.
1305
1306   """
1307   HPATH = "cluster-verify"
1308   HTYPE = constants.HTYPE_CLUSTER
1309   _OP_PARAMS = [
1310     ("skip_checks", _EmptyList,
1311      _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1312     ("verbose", False, _TBool),
1313     ("error_codes", False, _TBool),
1314     ("debug_simulate_errors", False, _TBool),
1315     ]
1316   REQ_BGL = False
1317
1318   TCLUSTER = "cluster"
1319   TNODE = "node"
1320   TINSTANCE = "instance"
1321
1322   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1323   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1324   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1325   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1326   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1327   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1328   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1329   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1330   ENODEDRBD = (TNODE, "ENODEDRBD")
1331   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1332   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1333   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1334   ENODEHV = (TNODE, "ENODEHV")
1335   ENODELVM = (TNODE, "ENODELVM")
1336   ENODEN1 = (TNODE, "ENODEN1")
1337   ENODENET = (TNODE, "ENODENET")
1338   ENODEOS = (TNODE, "ENODEOS")
1339   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1340   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1341   ENODERPC = (TNODE, "ENODERPC")
1342   ENODESSH = (TNODE, "ENODESSH")
1343   ENODEVERSION = (TNODE, "ENODEVERSION")
1344   ENODESETUP = (TNODE, "ENODESETUP")
1345   ENODETIME = (TNODE, "ENODETIME")
1346
1347   ETYPE_FIELD = "code"
1348   ETYPE_ERROR = "ERROR"
1349   ETYPE_WARNING = "WARNING"
1350
1351   class NodeImage(object):
1352     """A class representing the logical and physical status of a node.
1353
1354     @type name: string
1355     @ivar name: the node name to which this object refers
1356     @ivar volumes: a structure as returned from
1357         L{ganeti.backend.GetVolumeList} (runtime)
1358     @ivar instances: a list of running instances (runtime)
1359     @ivar pinst: list of configured primary instances (config)
1360     @ivar sinst: list of configured secondary instances (config)
1361     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1362         of this node (config)
1363     @ivar mfree: free memory, as reported by hypervisor (runtime)
1364     @ivar dfree: free disk, as reported by the node (runtime)
1365     @ivar offline: the offline status (config)
1366     @type rpc_fail: boolean
1367     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1368         not whether the individual keys were correct) (runtime)
1369     @type lvm_fail: boolean
1370     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1371     @type hyp_fail: boolean
1372     @ivar hyp_fail: whether the RPC call didn't return the instance list
1373     @type ghost: boolean
1374     @ivar ghost: whether this is a known node or not (config)
1375     @type os_fail: boolean
1376     @ivar os_fail: whether the RPC call didn't return valid OS data
1377     @type oslist: list
1378     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1379
1380     """
1381     def __init__(self, offline=False, name=None):
1382       self.name = name
1383       self.volumes = {}
1384       self.instances = []
1385       self.pinst = []
1386       self.sinst = []
1387       self.sbp = {}
1388       self.mfree = 0
1389       self.dfree = 0
1390       self.offline = offline
1391       self.rpc_fail = False
1392       self.lvm_fail = False
1393       self.hyp_fail = False
1394       self.ghost = False
1395       self.os_fail = False
1396       self.oslist = {}
1397
1398   def ExpandNames(self):
1399     self.needed_locks = {
1400       locking.LEVEL_NODE: locking.ALL_SET,
1401       locking.LEVEL_INSTANCE: locking.ALL_SET,
1402     }
1403     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1404
1405   def _Error(self, ecode, item, msg, *args, **kwargs):
1406     """Format an error message.
1407
1408     Based on the opcode's error_codes parameter, either format a
1409     parseable error code, or a simpler error string.
1410
1411     This must be called only from Exec and functions called from Exec.
1412
1413     """
1414     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1415     itype, etxt = ecode
1416     # first complete the msg
1417     if args:
1418       msg = msg % args
1419     # then format the whole message
1420     if self.op.error_codes:
1421       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1422     else:
1423       if item:
1424         item = " " + item
1425       else:
1426         item = ""
1427       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1428     # and finally report it via the feedback_fn
1429     self._feedback_fn("  - %s" % msg)
1430
1431   def _ErrorIf(self, cond, *args, **kwargs):
1432     """Log an error message if the passed condition is True.
1433
1434     """
1435     cond = bool(cond) or self.op.debug_simulate_errors
1436     if cond:
1437       self._Error(*args, **kwargs)
1438     # do not mark the operation as failed for WARN cases only
1439     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1440       self.bad = self.bad or cond
1441
1442   def _VerifyNode(self, ninfo, nresult):
1443     """Perform some basic validation on data returned from a node.
1444
1445       - check the result data structure is well formed and has all the
1446         mandatory fields
1447       - check ganeti version
1448
1449     @type ninfo: L{objects.Node}
1450     @param ninfo: the node to check
1451     @param nresult: the results from the node
1452     @rtype: boolean
1453     @return: whether overall this call was successful (and we can expect
1454          reasonable values in the respose)
1455
1456     """
1457     node = ninfo.name
1458     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1459
1460     # main result, nresult should be a non-empty dict
1461     test = not nresult or not isinstance(nresult, dict)
1462     _ErrorIf(test, self.ENODERPC, node,
1463                   "unable to verify node: no data returned")
1464     if test:
1465       return False
1466
1467     # compares ganeti version
1468     local_version = constants.PROTOCOL_VERSION
1469     remote_version = nresult.get("version", None)
1470     test = not (remote_version and
1471                 isinstance(remote_version, (list, tuple)) and
1472                 len(remote_version) == 2)
1473     _ErrorIf(test, self.ENODERPC, node,
1474              "connection to node returned invalid data")
1475     if test:
1476       return False
1477
1478     test = local_version != remote_version[0]
1479     _ErrorIf(test, self.ENODEVERSION, node,
1480              "incompatible protocol versions: master %s,"
1481              " node %s", local_version, remote_version[0])
1482     if test:
1483       return False
1484
1485     # node seems compatible, we can actually try to look into its results
1486
1487     # full package version
1488     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1489                   self.ENODEVERSION, node,
1490                   "software version mismatch: master %s, node %s",
1491                   constants.RELEASE_VERSION, remote_version[1],
1492                   code=self.ETYPE_WARNING)
1493
1494     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1495     if isinstance(hyp_result, dict):
1496       for hv_name, hv_result in hyp_result.iteritems():
1497         test = hv_result is not None
1498         _ErrorIf(test, self.ENODEHV, node,
1499                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1500
1501
1502     test = nresult.get(constants.NV_NODESETUP,
1503                            ["Missing NODESETUP results"])
1504     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1505              "; ".join(test))
1506
1507     return True
1508
1509   def _VerifyNodeTime(self, ninfo, nresult,
1510                       nvinfo_starttime, nvinfo_endtime):
1511     """Check the node time.
1512
1513     @type ninfo: L{objects.Node}
1514     @param ninfo: the node to check
1515     @param nresult: the remote results for the node
1516     @param nvinfo_starttime: the start time of the RPC call
1517     @param nvinfo_endtime: the end time of the RPC call
1518
1519     """
1520     node = ninfo.name
1521     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1522
1523     ntime = nresult.get(constants.NV_TIME, None)
1524     try:
1525       ntime_merged = utils.MergeTime(ntime)
1526     except (ValueError, TypeError):
1527       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1528       return
1529
1530     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1531       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1532     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1533       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1534     else:
1535       ntime_diff = None
1536
1537     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1538              "Node time diverges by at least %s from master node time",
1539              ntime_diff)
1540
1541   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1542     """Check the node time.
1543
1544     @type ninfo: L{objects.Node}
1545     @param ninfo: the node to check
1546     @param nresult: the remote results for the node
1547     @param vg_name: the configured VG name
1548
1549     """
1550     if vg_name is None:
1551       return
1552
1553     node = ninfo.name
1554     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1555
1556     # checks vg existence and size > 20G
1557     vglist = nresult.get(constants.NV_VGLIST, None)
1558     test = not vglist
1559     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1560     if not test:
1561       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1562                                             constants.MIN_VG_SIZE)
1563       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1564
1565     # check pv names
1566     pvlist = nresult.get(constants.NV_PVLIST, None)
1567     test = pvlist is None
1568     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1569     if not test:
1570       # check that ':' is not present in PV names, since it's a
1571       # special character for lvcreate (denotes the range of PEs to
1572       # use on the PV)
1573       for _, pvname, owner_vg in pvlist:
1574         test = ":" in pvname
1575         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1576                  " '%s' of VG '%s'", pvname, owner_vg)
1577
1578   def _VerifyNodeNetwork(self, ninfo, nresult):
1579     """Check the node time.
1580
1581     @type ninfo: L{objects.Node}
1582     @param ninfo: the node to check
1583     @param nresult: the remote results for the node
1584
1585     """
1586     node = ninfo.name
1587     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1588
1589     test = constants.NV_NODELIST not in nresult
1590     _ErrorIf(test, self.ENODESSH, node,
1591              "node hasn't returned node ssh connectivity data")
1592     if not test:
1593       if nresult[constants.NV_NODELIST]:
1594         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1595           _ErrorIf(True, self.ENODESSH, node,
1596                    "ssh communication with node '%s': %s", a_node, a_msg)
1597
1598     test = constants.NV_NODENETTEST not in nresult
1599     _ErrorIf(test, self.ENODENET, node,
1600              "node hasn't returned node tcp connectivity data")
1601     if not test:
1602       if nresult[constants.NV_NODENETTEST]:
1603         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1604         for anode in nlist:
1605           _ErrorIf(True, self.ENODENET, node,
1606                    "tcp communication with node '%s': %s",
1607                    anode, nresult[constants.NV_NODENETTEST][anode])
1608
1609     test = constants.NV_MASTERIP not in nresult
1610     _ErrorIf(test, self.ENODENET, node,
1611              "node hasn't returned node master IP reachability data")
1612     if not test:
1613       if not nresult[constants.NV_MASTERIP]:
1614         if node == self.master_node:
1615           msg = "the master node cannot reach the master IP (not configured?)"
1616         else:
1617           msg = "cannot reach the master IP"
1618         _ErrorIf(True, self.ENODENET, node, msg)
1619
1620
1621   def _VerifyInstance(self, instance, instanceconfig, node_image):
1622     """Verify an instance.
1623
1624     This function checks to see if the required block devices are
1625     available on the instance's node.
1626
1627     """
1628     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1629     node_current = instanceconfig.primary_node
1630
1631     node_vol_should = {}
1632     instanceconfig.MapLVsByNode(node_vol_should)
1633
1634     for node in node_vol_should:
1635       n_img = node_image[node]
1636       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1637         # ignore missing volumes on offline or broken nodes
1638         continue
1639       for volume in node_vol_should[node]:
1640         test = volume not in n_img.volumes
1641         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1642                  "volume %s missing on node %s", volume, node)
1643
1644     if instanceconfig.admin_up:
1645       pri_img = node_image[node_current]
1646       test = instance not in pri_img.instances and not pri_img.offline
1647       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1648                "instance not running on its primary node %s",
1649                node_current)
1650
1651     for node, n_img in node_image.items():
1652       if (not node == node_current):
1653         test = instance in n_img.instances
1654         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1655                  "instance should not run on node %s", node)
1656
1657   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1658     """Verify if there are any unknown volumes in the cluster.
1659
1660     The .os, .swap and backup volumes are ignored. All other volumes are
1661     reported as unknown.
1662
1663     @type reserved: L{ganeti.utils.FieldSet}
1664     @param reserved: a FieldSet of reserved volume names
1665
1666     """
1667     for node, n_img in node_image.items():
1668       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1669         # skip non-healthy nodes
1670         continue
1671       for volume in n_img.volumes:
1672         test = ((node not in node_vol_should or
1673                 volume not in node_vol_should[node]) and
1674                 not reserved.Matches(volume))
1675         self._ErrorIf(test, self.ENODEORPHANLV, node,
1676                       "volume %s is unknown", volume)
1677
1678   def _VerifyOrphanInstances(self, instancelist, node_image):
1679     """Verify the list of running instances.
1680
1681     This checks what instances are running but unknown to the cluster.
1682
1683     """
1684     for node, n_img in node_image.items():
1685       for o_inst in n_img.instances:
1686         test = o_inst not in instancelist
1687         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1688                       "instance %s on node %s should not exist", o_inst, node)
1689
1690   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1691     """Verify N+1 Memory Resilience.
1692
1693     Check that if one single node dies we can still start all the
1694     instances it was primary for.
1695
1696     """
1697     for node, n_img in node_image.items():
1698       # This code checks that every node which is now listed as
1699       # secondary has enough memory to host all instances it is
1700       # supposed to should a single other node in the cluster fail.
1701       # FIXME: not ready for failover to an arbitrary node
1702       # FIXME: does not support file-backed instances
1703       # WARNING: we currently take into account down instances as well
1704       # as up ones, considering that even if they're down someone
1705       # might want to start them even in the event of a node failure.
1706       for prinode, instances in n_img.sbp.items():
1707         needed_mem = 0
1708         for instance in instances:
1709           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1710           if bep[constants.BE_AUTO_BALANCE]:
1711             needed_mem += bep[constants.BE_MEMORY]
1712         test = n_img.mfree < needed_mem
1713         self._ErrorIf(test, self.ENODEN1, node,
1714                       "not enough memory on to accommodate"
1715                       " failovers should peer node %s fail", prinode)
1716
1717   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1718                        master_files):
1719     """Verifies and computes the node required file checksums.
1720
1721     @type ninfo: L{objects.Node}
1722     @param ninfo: the node to check
1723     @param nresult: the remote results for the node
1724     @param file_list: required list of files
1725     @param local_cksum: dictionary of local files and their checksums
1726     @param master_files: list of files that only masters should have
1727
1728     """
1729     node = ninfo.name
1730     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1731
1732     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1733     test = not isinstance(remote_cksum, dict)
1734     _ErrorIf(test, self.ENODEFILECHECK, node,
1735              "node hasn't returned file checksum data")
1736     if test:
1737       return
1738
1739     for file_name in file_list:
1740       node_is_mc = ninfo.master_candidate
1741       must_have = (file_name not in master_files) or node_is_mc
1742       # missing
1743       test1 = file_name not in remote_cksum
1744       # invalid checksum
1745       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1746       # existing and good
1747       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1748       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1749                "file '%s' missing", file_name)
1750       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1751                "file '%s' has wrong checksum", file_name)
1752       # not candidate and this is not a must-have file
1753       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1754                "file '%s' should not exist on non master"
1755                " candidates (and the file is outdated)", file_name)
1756       # all good, except non-master/non-must have combination
1757       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1758                "file '%s' should not exist"
1759                " on non master candidates", file_name)
1760
1761   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1762                       drbd_map):
1763     """Verifies and the node DRBD status.
1764
1765     @type ninfo: L{objects.Node}
1766     @param ninfo: the node to check
1767     @param nresult: the remote results for the node
1768     @param instanceinfo: the dict of instances
1769     @param drbd_helper: the configured DRBD usermode helper
1770     @param drbd_map: the DRBD map as returned by
1771         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1772
1773     """
1774     node = ninfo.name
1775     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1776
1777     if drbd_helper:
1778       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1779       test = (helper_result == None)
1780       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1781                "no drbd usermode helper returned")
1782       if helper_result:
1783         status, payload = helper_result
1784         test = not status
1785         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1786                  "drbd usermode helper check unsuccessful: %s", payload)
1787         test = status and (payload != drbd_helper)
1788         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1789                  "wrong drbd usermode helper: %s", payload)
1790
1791     # compute the DRBD minors
1792     node_drbd = {}
1793     for minor, instance in drbd_map[node].items():
1794       test = instance not in instanceinfo
1795       _ErrorIf(test, self.ECLUSTERCFG, None,
1796                "ghost instance '%s' in temporary DRBD map", instance)
1797         # ghost instance should not be running, but otherwise we
1798         # don't give double warnings (both ghost instance and
1799         # unallocated minor in use)
1800       if test:
1801         node_drbd[minor] = (instance, False)
1802       else:
1803         instance = instanceinfo[instance]
1804         node_drbd[minor] = (instance.name, instance.admin_up)
1805
1806     # and now check them
1807     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1808     test = not isinstance(used_minors, (tuple, list))
1809     _ErrorIf(test, self.ENODEDRBD, node,
1810              "cannot parse drbd status file: %s", str(used_minors))
1811     if test:
1812       # we cannot check drbd status
1813       return
1814
1815     for minor, (iname, must_exist) in node_drbd.items():
1816       test = minor not in used_minors and must_exist
1817       _ErrorIf(test, self.ENODEDRBD, node,
1818                "drbd minor %d of instance %s is not active", minor, iname)
1819     for minor in used_minors:
1820       test = minor not in node_drbd
1821       _ErrorIf(test, self.ENODEDRBD, node,
1822                "unallocated drbd minor %d is in use", minor)
1823
1824   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1825     """Builds the node OS structures.
1826
1827     @type ninfo: L{objects.Node}
1828     @param ninfo: the node to check
1829     @param nresult: the remote results for the node
1830     @param nimg: the node image object
1831
1832     """
1833     node = ninfo.name
1834     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1835
1836     remote_os = nresult.get(constants.NV_OSLIST, None)
1837     test = (not isinstance(remote_os, list) or
1838             not compat.all(isinstance(v, list) and len(v) == 7
1839                            for v in remote_os))
1840
1841     _ErrorIf(test, self.ENODEOS, node,
1842              "node hasn't returned valid OS data")
1843
1844     nimg.os_fail = test
1845
1846     if test:
1847       return
1848
1849     os_dict = {}
1850
1851     for (name, os_path, status, diagnose,
1852          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1853
1854       if name not in os_dict:
1855         os_dict[name] = []
1856
1857       # parameters is a list of lists instead of list of tuples due to
1858       # JSON lacking a real tuple type, fix it:
1859       parameters = [tuple(v) for v in parameters]
1860       os_dict[name].append((os_path, status, diagnose,
1861                             set(variants), set(parameters), set(api_ver)))
1862
1863     nimg.oslist = os_dict
1864
1865   def _VerifyNodeOS(self, ninfo, nimg, base):
1866     """Verifies the node OS list.
1867
1868     @type ninfo: L{objects.Node}
1869     @param ninfo: the node to check
1870     @param nimg: the node image object
1871     @param base: the 'template' node we match against (e.g. from the master)
1872
1873     """
1874     node = ninfo.name
1875     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1876
1877     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1878
1879     for os_name, os_data in nimg.oslist.items():
1880       assert os_data, "Empty OS status for OS %s?!" % os_name
1881       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1882       _ErrorIf(not f_status, self.ENODEOS, node,
1883                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1884       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1885                "OS '%s' has multiple entries (first one shadows the rest): %s",
1886                os_name, utils.CommaJoin([v[0] for v in os_data]))
1887       # this will catched in backend too
1888       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1889                and not f_var, self.ENODEOS, node,
1890                "OS %s with API at least %d does not declare any variant",
1891                os_name, constants.OS_API_V15)
1892       # comparisons with the 'base' image
1893       test = os_name not in base.oslist
1894       _ErrorIf(test, self.ENODEOS, node,
1895                "Extra OS %s not present on reference node (%s)",
1896                os_name, base.name)
1897       if test:
1898         continue
1899       assert base.oslist[os_name], "Base node has empty OS status?"
1900       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1901       if not b_status:
1902         # base OS is invalid, skipping
1903         continue
1904       for kind, a, b in [("API version", f_api, b_api),
1905                          ("variants list", f_var, b_var),
1906                          ("parameters", f_param, b_param)]:
1907         _ErrorIf(a != b, self.ENODEOS, node,
1908                  "OS %s %s differs from reference node %s: %s vs. %s",
1909                  kind, os_name, base.name,
1910                  utils.CommaJoin(a), utils.CommaJoin(b))
1911
1912     # check any missing OSes
1913     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1914     _ErrorIf(missing, self.ENODEOS, node,
1915              "OSes present on reference node %s but missing on this node: %s",
1916              base.name, utils.CommaJoin(missing))
1917
1918   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1919     """Verifies and updates the node volume data.
1920
1921     This function will update a L{NodeImage}'s internal structures
1922     with data from the remote call.
1923
1924     @type ninfo: L{objects.Node}
1925     @param ninfo: the node to check
1926     @param nresult: the remote results for the node
1927     @param nimg: the node image object
1928     @param vg_name: the configured VG name
1929
1930     """
1931     node = ninfo.name
1932     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1933
1934     nimg.lvm_fail = True
1935     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1936     if vg_name is None:
1937       pass
1938     elif isinstance(lvdata, basestring):
1939       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1940                utils.SafeEncode(lvdata))
1941     elif not isinstance(lvdata, dict):
1942       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1943     else:
1944       nimg.volumes = lvdata
1945       nimg.lvm_fail = False
1946
1947   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1948     """Verifies and updates the node instance list.
1949
1950     If the listing was successful, then updates this node's instance
1951     list. Otherwise, it marks the RPC call as failed for the instance
1952     list key.
1953
1954     @type ninfo: L{objects.Node}
1955     @param ninfo: the node to check
1956     @param nresult: the remote results for the node
1957     @param nimg: the node image object
1958
1959     """
1960     idata = nresult.get(constants.NV_INSTANCELIST, None)
1961     test = not isinstance(idata, list)
1962     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1963                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1964     if test:
1965       nimg.hyp_fail = True
1966     else:
1967       nimg.instances = idata
1968
1969   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1970     """Verifies and computes a node information map
1971
1972     @type ninfo: L{objects.Node}
1973     @param ninfo: the node to check
1974     @param nresult: the remote results for the node
1975     @param nimg: the node image object
1976     @param vg_name: the configured VG name
1977
1978     """
1979     node = ninfo.name
1980     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1981
1982     # try to read free memory (from the hypervisor)
1983     hv_info = nresult.get(constants.NV_HVINFO, None)
1984     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1985     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1986     if not test:
1987       try:
1988         nimg.mfree = int(hv_info["memory_free"])
1989       except (ValueError, TypeError):
1990         _ErrorIf(True, self.ENODERPC, node,
1991                  "node returned invalid nodeinfo, check hypervisor")
1992
1993     # FIXME: devise a free space model for file based instances as well
1994     if vg_name is not None:
1995       test = (constants.NV_VGLIST not in nresult or
1996               vg_name not in nresult[constants.NV_VGLIST])
1997       _ErrorIf(test, self.ENODELVM, node,
1998                "node didn't return data for the volume group '%s'"
1999                " - it is either missing or broken", vg_name)
2000       if not test:
2001         try:
2002           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2003         except (ValueError, TypeError):
2004           _ErrorIf(True, self.ENODERPC, node,
2005                    "node returned invalid LVM info, check LVM status")
2006
2007   def BuildHooksEnv(self):
2008     """Build hooks env.
2009
2010     Cluster-Verify hooks just ran in the post phase and their failure makes
2011     the output be logged in the verify output and the verification to fail.
2012
2013     """
2014     all_nodes = self.cfg.GetNodeList()
2015     env = {
2016       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2017       }
2018     for node in self.cfg.GetAllNodesInfo().values():
2019       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2020
2021     return env, [], all_nodes
2022
2023   def Exec(self, feedback_fn):
2024     """Verify integrity of cluster, performing various test on nodes.
2025
2026     """
2027     self.bad = False
2028     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2029     verbose = self.op.verbose
2030     self._feedback_fn = feedback_fn
2031     feedback_fn("* Verifying global settings")
2032     for msg in self.cfg.VerifyConfig():
2033       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2034
2035     # Check the cluster certificates
2036     for cert_filename in constants.ALL_CERT_FILES:
2037       (errcode, msg) = _VerifyCertificate(cert_filename)
2038       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2039
2040     vg_name = self.cfg.GetVGName()
2041     drbd_helper = self.cfg.GetDRBDHelper()
2042     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2043     cluster = self.cfg.GetClusterInfo()
2044     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2045     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2046     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2047     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2048                         for iname in instancelist)
2049     i_non_redundant = [] # Non redundant instances
2050     i_non_a_balanced = [] # Non auto-balanced instances
2051     n_offline = 0 # Count of offline nodes
2052     n_drained = 0 # Count of nodes being drained
2053     node_vol_should = {}
2054
2055     # FIXME: verify OS list
2056     # do local checksums
2057     master_files = [constants.CLUSTER_CONF_FILE]
2058     master_node = self.master_node = self.cfg.GetMasterNode()
2059     master_ip = self.cfg.GetMasterIP()
2060
2061     file_names = ssconf.SimpleStore().GetFileList()
2062     file_names.extend(constants.ALL_CERT_FILES)
2063     file_names.extend(master_files)
2064     if cluster.modify_etc_hosts:
2065       file_names.append(constants.ETC_HOSTS)
2066
2067     local_checksums = utils.FingerprintFiles(file_names)
2068
2069     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2070     node_verify_param = {
2071       constants.NV_FILELIST: file_names,
2072       constants.NV_NODELIST: [node.name for node in nodeinfo
2073                               if not node.offline],
2074       constants.NV_HYPERVISOR: hypervisors,
2075       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2076                                   node.secondary_ip) for node in nodeinfo
2077                                  if not node.offline],
2078       constants.NV_INSTANCELIST: hypervisors,
2079       constants.NV_VERSION: None,
2080       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2081       constants.NV_NODESETUP: None,
2082       constants.NV_TIME: None,
2083       constants.NV_MASTERIP: (master_node, master_ip),
2084       constants.NV_OSLIST: None,
2085       }
2086
2087     if vg_name is not None:
2088       node_verify_param[constants.NV_VGLIST] = None
2089       node_verify_param[constants.NV_LVLIST] = vg_name
2090       node_verify_param[constants.NV_PVLIST] = [vg_name]
2091       node_verify_param[constants.NV_DRBDLIST] = None
2092
2093     if drbd_helper:
2094       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2095
2096     # Build our expected cluster state
2097     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2098                                                  name=node.name))
2099                       for node in nodeinfo)
2100
2101     for instance in instancelist:
2102       inst_config = instanceinfo[instance]
2103
2104       for nname in inst_config.all_nodes:
2105         if nname not in node_image:
2106           # ghost node
2107           gnode = self.NodeImage(name=nname)
2108           gnode.ghost = True
2109           node_image[nname] = gnode
2110
2111       inst_config.MapLVsByNode(node_vol_should)
2112
2113       pnode = inst_config.primary_node
2114       node_image[pnode].pinst.append(instance)
2115
2116       for snode in inst_config.secondary_nodes:
2117         nimg = node_image[snode]
2118         nimg.sinst.append(instance)
2119         if pnode not in nimg.sbp:
2120           nimg.sbp[pnode] = []
2121         nimg.sbp[pnode].append(instance)
2122
2123     # At this point, we have the in-memory data structures complete,
2124     # except for the runtime information, which we'll gather next
2125
2126     # Due to the way our RPC system works, exact response times cannot be
2127     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2128     # time before and after executing the request, we can at least have a time
2129     # window.
2130     nvinfo_starttime = time.time()
2131     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2132                                            self.cfg.GetClusterName())
2133     nvinfo_endtime = time.time()
2134
2135     all_drbd_map = self.cfg.ComputeDRBDMap()
2136
2137     feedback_fn("* Verifying node status")
2138
2139     refos_img = None
2140
2141     for node_i in nodeinfo:
2142       node = node_i.name
2143       nimg = node_image[node]
2144
2145       if node_i.offline:
2146         if verbose:
2147           feedback_fn("* Skipping offline node %s" % (node,))
2148         n_offline += 1
2149         continue
2150
2151       if node == master_node:
2152         ntype = "master"
2153       elif node_i.master_candidate:
2154         ntype = "master candidate"
2155       elif node_i.drained:
2156         ntype = "drained"
2157         n_drained += 1
2158       else:
2159         ntype = "regular"
2160       if verbose:
2161         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2162
2163       msg = all_nvinfo[node].fail_msg
2164       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2165       if msg:
2166         nimg.rpc_fail = True
2167         continue
2168
2169       nresult = all_nvinfo[node].payload
2170
2171       nimg.call_ok = self._VerifyNode(node_i, nresult)
2172       self._VerifyNodeNetwork(node_i, nresult)
2173       self._VerifyNodeLVM(node_i, nresult, vg_name)
2174       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2175                             master_files)
2176       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2177                            all_drbd_map)
2178       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2179
2180       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2181       self._UpdateNodeInstances(node_i, nresult, nimg)
2182       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2183       self._UpdateNodeOS(node_i, nresult, nimg)
2184       if not nimg.os_fail:
2185         if refos_img is None:
2186           refos_img = nimg
2187         self._VerifyNodeOS(node_i, nimg, refos_img)
2188
2189     feedback_fn("* Verifying instance status")
2190     for instance in instancelist:
2191       if verbose:
2192         feedback_fn("* Verifying instance %s" % instance)
2193       inst_config = instanceinfo[instance]
2194       self._VerifyInstance(instance, inst_config, node_image)
2195       inst_nodes_offline = []
2196
2197       pnode = inst_config.primary_node
2198       pnode_img = node_image[pnode]
2199       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2200                self.ENODERPC, pnode, "instance %s, connection to"
2201                " primary node failed", instance)
2202
2203       if pnode_img.offline:
2204         inst_nodes_offline.append(pnode)
2205
2206       # If the instance is non-redundant we cannot survive losing its primary
2207       # node, so we are not N+1 compliant. On the other hand we have no disk
2208       # templates with more than one secondary so that situation is not well
2209       # supported either.
2210       # FIXME: does not support file-backed instances
2211       if not inst_config.secondary_nodes:
2212         i_non_redundant.append(instance)
2213       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2214                instance, "instance has multiple secondary nodes: %s",
2215                utils.CommaJoin(inst_config.secondary_nodes),
2216                code=self.ETYPE_WARNING)
2217
2218       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2219         i_non_a_balanced.append(instance)
2220
2221       for snode in inst_config.secondary_nodes:
2222         s_img = node_image[snode]
2223         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2224                  "instance %s, connection to secondary node failed", instance)
2225
2226         if s_img.offline:
2227           inst_nodes_offline.append(snode)
2228
2229       # warn that the instance lives on offline nodes
2230       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2231                "instance lives on offline node(s) %s",
2232                utils.CommaJoin(inst_nodes_offline))
2233       # ... or ghost nodes
2234       for node in inst_config.all_nodes:
2235         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2236                  "instance lives on ghost node %s", node)
2237
2238     feedback_fn("* Verifying orphan volumes")
2239     reserved = utils.FieldSet(*cluster.reserved_lvs)
2240     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2241
2242     feedback_fn("* Verifying orphan instances")
2243     self._VerifyOrphanInstances(instancelist, node_image)
2244
2245     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2246       feedback_fn("* Verifying N+1 Memory redundancy")
2247       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2248
2249     feedback_fn("* Other Notes")
2250     if i_non_redundant:
2251       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2252                   % len(i_non_redundant))
2253
2254     if i_non_a_balanced:
2255       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2256                   % len(i_non_a_balanced))
2257
2258     if n_offline:
2259       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2260
2261     if n_drained:
2262       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2263
2264     return not self.bad
2265
2266   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2267     """Analyze the post-hooks' result
2268
2269     This method analyses the hook result, handles it, and sends some
2270     nicely-formatted feedback back to the user.
2271
2272     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2273         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2274     @param hooks_results: the results of the multi-node hooks rpc call
2275     @param feedback_fn: function used send feedback back to the caller
2276     @param lu_result: previous Exec result
2277     @return: the new Exec result, based on the previous result
2278         and hook results
2279
2280     """
2281     # We only really run POST phase hooks, and are only interested in
2282     # their results
2283     if phase == constants.HOOKS_PHASE_POST:
2284       # Used to change hooks' output to proper indentation
2285       indent_re = re.compile('^', re.M)
2286       feedback_fn("* Hooks Results")
2287       assert hooks_results, "invalid result from hooks"
2288
2289       for node_name in hooks_results:
2290         res = hooks_results[node_name]
2291         msg = res.fail_msg
2292         test = msg and not res.offline
2293         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2294                       "Communication failure in hooks execution: %s", msg)
2295         if res.offline or msg:
2296           # No need to investigate payload if node is offline or gave an error.
2297           # override manually lu_result here as _ErrorIf only
2298           # overrides self.bad
2299           lu_result = 1
2300           continue
2301         for script, hkr, output in res.payload:
2302           test = hkr == constants.HKR_FAIL
2303           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2304                         "Script %s failed, output:", script)
2305           if test:
2306             output = indent_re.sub('      ', output)
2307             feedback_fn("%s" % output)
2308             lu_result = 0
2309
2310       return lu_result
2311
2312
2313 class LUVerifyDisks(NoHooksLU):
2314   """Verifies the cluster disks status.
2315
2316   """
2317   REQ_BGL = False
2318
2319   def ExpandNames(self):
2320     self.needed_locks = {
2321       locking.LEVEL_NODE: locking.ALL_SET,
2322       locking.LEVEL_INSTANCE: locking.ALL_SET,
2323     }
2324     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2325
2326   def Exec(self, feedback_fn):
2327     """Verify integrity of cluster disks.
2328
2329     @rtype: tuple of three items
2330     @return: a tuple of (dict of node-to-node_error, list of instances
2331         which need activate-disks, dict of instance: (node, volume) for
2332         missing volumes
2333
2334     """
2335     result = res_nodes, res_instances, res_missing = {}, [], {}
2336
2337     vg_name = self.cfg.GetVGName()
2338     nodes = utils.NiceSort(self.cfg.GetNodeList())
2339     instances = [self.cfg.GetInstanceInfo(name)
2340                  for name in self.cfg.GetInstanceList()]
2341
2342     nv_dict = {}
2343     for inst in instances:
2344       inst_lvs = {}
2345       if (not inst.admin_up or
2346           inst.disk_template not in constants.DTS_NET_MIRROR):
2347         continue
2348       inst.MapLVsByNode(inst_lvs)
2349       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2350       for node, vol_list in inst_lvs.iteritems():
2351         for vol in vol_list:
2352           nv_dict[(node, vol)] = inst
2353
2354     if not nv_dict:
2355       return result
2356
2357     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2358
2359     for node in nodes:
2360       # node_volume
2361       node_res = node_lvs[node]
2362       if node_res.offline:
2363         continue
2364       msg = node_res.fail_msg
2365       if msg:
2366         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2367         res_nodes[node] = msg
2368         continue
2369
2370       lvs = node_res.payload
2371       for lv_name, (_, _, lv_online) in lvs.items():
2372         inst = nv_dict.pop((node, lv_name), None)
2373         if (not lv_online and inst is not None
2374             and inst.name not in res_instances):
2375           res_instances.append(inst.name)
2376
2377     # any leftover items in nv_dict are missing LVs, let's arrange the
2378     # data better
2379     for key, inst in nv_dict.iteritems():
2380       if inst.name not in res_missing:
2381         res_missing[inst.name] = []
2382       res_missing[inst.name].append(key)
2383
2384     return result
2385
2386
2387 class LURepairDiskSizes(NoHooksLU):
2388   """Verifies the cluster disks sizes.
2389
2390   """
2391   _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2392   REQ_BGL = False
2393
2394   def ExpandNames(self):
2395     if self.op.instances:
2396       self.wanted_names = []
2397       for name in self.op.instances:
2398         full_name = _ExpandInstanceName(self.cfg, name)
2399         self.wanted_names.append(full_name)
2400       self.needed_locks = {
2401         locking.LEVEL_NODE: [],
2402         locking.LEVEL_INSTANCE: self.wanted_names,
2403         }
2404       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2405     else:
2406       self.wanted_names = None
2407       self.needed_locks = {
2408         locking.LEVEL_NODE: locking.ALL_SET,
2409         locking.LEVEL_INSTANCE: locking.ALL_SET,
2410         }
2411     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2412
2413   def DeclareLocks(self, level):
2414     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2415       self._LockInstancesNodes(primary_only=True)
2416
2417   def CheckPrereq(self):
2418     """Check prerequisites.
2419
2420     This only checks the optional instance list against the existing names.
2421
2422     """
2423     if self.wanted_names is None:
2424       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2425
2426     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2427                              in self.wanted_names]
2428
2429   def _EnsureChildSizes(self, disk):
2430     """Ensure children of the disk have the needed disk size.
2431
2432     This is valid mainly for DRBD8 and fixes an issue where the
2433     children have smaller disk size.
2434
2435     @param disk: an L{ganeti.objects.Disk} object
2436
2437     """
2438     if disk.dev_type == constants.LD_DRBD8:
2439       assert disk.children, "Empty children for DRBD8?"
2440       fchild = disk.children[0]
2441       mismatch = fchild.size < disk.size
2442       if mismatch:
2443         self.LogInfo("Child disk has size %d, parent %d, fixing",
2444                      fchild.size, disk.size)
2445         fchild.size = disk.size
2446
2447       # and we recurse on this child only, not on the metadev
2448       return self._EnsureChildSizes(fchild) or mismatch
2449     else:
2450       return False
2451
2452   def Exec(self, feedback_fn):
2453     """Verify the size of cluster disks.
2454
2455     """
2456     # TODO: check child disks too
2457     # TODO: check differences in size between primary/secondary nodes
2458     per_node_disks = {}
2459     for instance in self.wanted_instances:
2460       pnode = instance.primary_node
2461       if pnode not in per_node_disks:
2462         per_node_disks[pnode] = []
2463       for idx, disk in enumerate(instance.disks):
2464         per_node_disks[pnode].append((instance, idx, disk))
2465
2466     changed = []
2467     for node, dskl in per_node_disks.items():
2468       newl = [v[2].Copy() for v in dskl]
2469       for dsk in newl:
2470         self.cfg.SetDiskID(dsk, node)
2471       result = self.rpc.call_blockdev_getsizes(node, newl)
2472       if result.fail_msg:
2473         self.LogWarning("Failure in blockdev_getsizes call to node"
2474                         " %s, ignoring", node)
2475         continue
2476       if len(result.data) != len(dskl):
2477         self.LogWarning("Invalid result from node %s, ignoring node results",
2478                         node)
2479         continue
2480       for ((instance, idx, disk), size) in zip(dskl, result.data):
2481         if size is None:
2482           self.LogWarning("Disk %d of instance %s did not return size"
2483                           " information, ignoring", idx, instance.name)
2484           continue
2485         if not isinstance(size, (int, long)):
2486           self.LogWarning("Disk %d of instance %s did not return valid"
2487                           " size information, ignoring", idx, instance.name)
2488           continue
2489         size = size >> 20
2490         if size != disk.size:
2491           self.LogInfo("Disk %d of instance %s has mismatched size,"
2492                        " correcting: recorded %d, actual %d", idx,
2493                        instance.name, disk.size, size)
2494           disk.size = size
2495           self.cfg.Update(instance, feedback_fn)
2496           changed.append((instance.name, idx, size))
2497         if self._EnsureChildSizes(disk):
2498           self.cfg.Update(instance, feedback_fn)
2499           changed.append((instance.name, idx, disk.size))
2500     return changed
2501
2502
2503 class LURenameCluster(LogicalUnit):
2504   """Rename the cluster.
2505
2506   """
2507   HPATH = "cluster-rename"
2508   HTYPE = constants.HTYPE_CLUSTER
2509   _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2510
2511   def BuildHooksEnv(self):
2512     """Build hooks env.
2513
2514     """
2515     env = {
2516       "OP_TARGET": self.cfg.GetClusterName(),
2517       "NEW_NAME": self.op.name,
2518       }
2519     mn = self.cfg.GetMasterNode()
2520     all_nodes = self.cfg.GetNodeList()
2521     return env, [mn], all_nodes
2522
2523   def CheckPrereq(self):
2524     """Verify that the passed name is a valid one.
2525
2526     """
2527     hostname = netutils.GetHostInfo(self.op.name)
2528
2529     new_name = hostname.name
2530     self.ip = new_ip = hostname.ip
2531     old_name = self.cfg.GetClusterName()
2532     old_ip = self.cfg.GetMasterIP()
2533     if new_name == old_name and new_ip == old_ip:
2534       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2535                                  " cluster has changed",
2536                                  errors.ECODE_INVAL)
2537     if new_ip != old_ip:
2538       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2539         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2540                                    " reachable on the network. Aborting." %
2541                                    new_ip, errors.ECODE_NOTUNIQUE)
2542
2543     self.op.name = new_name
2544
2545   def Exec(self, feedback_fn):
2546     """Rename the cluster.
2547
2548     """
2549     clustername = self.op.name
2550     ip = self.ip
2551
2552     # shutdown the master IP
2553     master = self.cfg.GetMasterNode()
2554     result = self.rpc.call_node_stop_master(master, False)
2555     result.Raise("Could not disable the master role")
2556
2557     try:
2558       cluster = self.cfg.GetClusterInfo()
2559       cluster.cluster_name = clustername
2560       cluster.master_ip = ip
2561       self.cfg.Update(cluster, feedback_fn)
2562
2563       # update the known hosts file
2564       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2565       node_list = self.cfg.GetNodeList()
2566       try:
2567         node_list.remove(master)
2568       except ValueError:
2569         pass
2570       result = self.rpc.call_upload_file(node_list,
2571                                          constants.SSH_KNOWN_HOSTS_FILE)
2572       for to_node, to_result in result.iteritems():
2573         msg = to_result.fail_msg
2574         if msg:
2575           msg = ("Copy of file %s to node %s failed: %s" %
2576                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2577           self.proc.LogWarning(msg)
2578
2579     finally:
2580       result = self.rpc.call_node_start_master(master, False, False)
2581       msg = result.fail_msg
2582       if msg:
2583         self.LogWarning("Could not re-enable the master role on"
2584                         " the master, please restart manually: %s", msg)
2585
2586
2587 class LUSetClusterParams(LogicalUnit):
2588   """Change the parameters of the cluster.
2589
2590   """
2591   HPATH = "cluster-modify"
2592   HTYPE = constants.HTYPE_CLUSTER
2593   _OP_PARAMS = [
2594     ("vg_name", None, _TMaybeString),
2595     ("enabled_hypervisors", None,
2596      _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2597     ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2598     ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2599     ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2600     ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2601     ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2602     ("uid_pool", None, _NoType),
2603     ("add_uids", None, _NoType),
2604     ("remove_uids", None, _NoType),
2605     ("maintain_node_health", None, _TMaybeBool),
2606     ("nicparams", None, _TOr(_TDict, _TNone)),
2607     ("drbd_helper", None, _TOr(_TString, _TNone)),
2608     ("default_iallocator", None, _TMaybeString),
2609     ]
2610   REQ_BGL = False
2611
2612   def CheckArguments(self):
2613     """Check parameters
2614
2615     """
2616     if self.op.uid_pool:
2617       uidpool.CheckUidPool(self.op.uid_pool)
2618
2619     if self.op.add_uids:
2620       uidpool.CheckUidPool(self.op.add_uids)
2621
2622     if self.op.remove_uids:
2623       uidpool.CheckUidPool(self.op.remove_uids)
2624
2625   def ExpandNames(self):
2626     # FIXME: in the future maybe other cluster params won't require checking on
2627     # all nodes to be modified.
2628     self.needed_locks = {
2629       locking.LEVEL_NODE: locking.ALL_SET,
2630     }
2631     self.share_locks[locking.LEVEL_NODE] = 1
2632
2633   def BuildHooksEnv(self):
2634     """Build hooks env.
2635
2636     """
2637     env = {
2638       "OP_TARGET": self.cfg.GetClusterName(),
2639       "NEW_VG_NAME": self.op.vg_name,
2640       }
2641     mn = self.cfg.GetMasterNode()
2642     return env, [mn], [mn]
2643
2644   def CheckPrereq(self):
2645     """Check prerequisites.
2646
2647     This checks whether the given params don't conflict and
2648     if the given volume group is valid.
2649
2650     """
2651     if self.op.vg_name is not None and not self.op.vg_name:
2652       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2653         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2654                                    " instances exist", errors.ECODE_INVAL)
2655
2656     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2657       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2658         raise errors.OpPrereqError("Cannot disable drbd helper while"
2659                                    " drbd-based instances exist",
2660                                    errors.ECODE_INVAL)
2661
2662     node_list = self.acquired_locks[locking.LEVEL_NODE]
2663
2664     # if vg_name not None, checks given volume group on all nodes
2665     if self.op.vg_name:
2666       vglist = self.rpc.call_vg_list(node_list)
2667       for node in node_list:
2668         msg = vglist[node].fail_msg
2669         if msg:
2670           # ignoring down node
2671           self.LogWarning("Error while gathering data on node %s"
2672                           " (ignoring node): %s", node, msg)
2673           continue
2674         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2675                                               self.op.vg_name,
2676                                               constants.MIN_VG_SIZE)
2677         if vgstatus:
2678           raise errors.OpPrereqError("Error on node '%s': %s" %
2679                                      (node, vgstatus), errors.ECODE_ENVIRON)
2680
2681     if self.op.drbd_helper:
2682       # checks given drbd helper on all nodes
2683       helpers = self.rpc.call_drbd_helper(node_list)
2684       for node in node_list:
2685         ninfo = self.cfg.GetNodeInfo(node)
2686         if ninfo.offline:
2687           self.LogInfo("Not checking drbd helper on offline node %s", node)
2688           continue
2689         msg = helpers[node].fail_msg
2690         if msg:
2691           raise errors.OpPrereqError("Error checking drbd helper on node"
2692                                      " '%s': %s" % (node, msg),
2693                                      errors.ECODE_ENVIRON)
2694         node_helper = helpers[node].payload
2695         if node_helper != self.op.drbd_helper:
2696           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2697                                      (node, node_helper), errors.ECODE_ENVIRON)
2698
2699     self.cluster = cluster = self.cfg.GetClusterInfo()
2700     # validate params changes
2701     if self.op.beparams:
2702       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2703       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2704
2705     if self.op.nicparams:
2706       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2707       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2708       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2709       nic_errors = []
2710
2711       # check all instances for consistency
2712       for instance in self.cfg.GetAllInstancesInfo().values():
2713         for nic_idx, nic in enumerate(instance.nics):
2714           params_copy = copy.deepcopy(nic.nicparams)
2715           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2716
2717           # check parameter syntax
2718           try:
2719             objects.NIC.CheckParameterSyntax(params_filled)
2720           except errors.ConfigurationError, err:
2721             nic_errors.append("Instance %s, nic/%d: %s" %
2722                               (instance.name, nic_idx, err))
2723
2724           # if we're moving instances to routed, check that they have an ip
2725           target_mode = params_filled[constants.NIC_MODE]
2726           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2727             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2728                               (instance.name, nic_idx))
2729       if nic_errors:
2730         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2731                                    "\n".join(nic_errors))
2732
2733     # hypervisor list/parameters
2734     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2735     if self.op.hvparams:
2736       for hv_name, hv_dict in self.op.hvparams.items():
2737         if hv_name not in self.new_hvparams:
2738           self.new_hvparams[hv_name] = hv_dict
2739         else:
2740           self.new_hvparams[hv_name].update(hv_dict)
2741
2742     # os hypervisor parameters
2743     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2744     if self.op.os_hvp:
2745       for os_name, hvs in self.op.os_hvp.items():
2746         if os_name not in self.new_os_hvp:
2747           self.new_os_hvp[os_name] = hvs
2748         else:
2749           for hv_name, hv_dict in hvs.items():
2750             if hv_name not in self.new_os_hvp[os_name]:
2751               self.new_os_hvp[os_name][hv_name] = hv_dict
2752             else:
2753               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2754
2755     # os parameters
2756     self.new_osp = objects.FillDict(cluster.osparams, {})
2757     if self.op.osparams:
2758       for os_name, osp in self.op.osparams.items():
2759         if os_name not in self.new_osp:
2760           self.new_osp[os_name] = {}
2761
2762         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2763                                                   use_none=True)
2764
2765         if not self.new_osp[os_name]:
2766           # we removed all parameters
2767           del self.new_osp[os_name]
2768         else:
2769           # check the parameter validity (remote check)
2770           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2771                          os_name, self.new_osp[os_name])
2772
2773     # changes to the hypervisor list
2774     if self.op.enabled_hypervisors is not None:
2775       self.hv_list = self.op.enabled_hypervisors
2776       for hv in self.hv_list:
2777         # if the hypervisor doesn't already exist in the cluster
2778         # hvparams, we initialize it to empty, and then (in both
2779         # cases) we make sure to fill the defaults, as we might not
2780         # have a complete defaults list if the hypervisor wasn't
2781         # enabled before
2782         if hv not in new_hvp:
2783           new_hvp[hv] = {}
2784         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2785         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2786     else:
2787       self.hv_list = cluster.enabled_hypervisors
2788
2789     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2790       # either the enabled list has changed, or the parameters have, validate
2791       for hv_name, hv_params in self.new_hvparams.items():
2792         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2793             (self.op.enabled_hypervisors and
2794              hv_name in self.op.enabled_hypervisors)):
2795           # either this is a new hypervisor, or its parameters have changed
2796           hv_class = hypervisor.GetHypervisor(hv_name)
2797           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2798           hv_class.CheckParameterSyntax(hv_params)
2799           _CheckHVParams(self, node_list, hv_name, hv_params)
2800
2801     if self.op.os_hvp:
2802       # no need to check any newly-enabled hypervisors, since the
2803       # defaults have already been checked in the above code-block
2804       for os_name, os_hvp in self.new_os_hvp.items():
2805         for hv_name, hv_params in os_hvp.items():
2806           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2807           # we need to fill in the new os_hvp on top of the actual hv_p
2808           cluster_defaults = self.new_hvparams.get(hv_name, {})
2809           new_osp = objects.FillDict(cluster_defaults, hv_params)
2810           hv_class = hypervisor.GetHypervisor(hv_name)
2811           hv_class.CheckParameterSyntax(new_osp)
2812           _CheckHVParams(self, node_list, hv_name, new_osp)
2813
2814     if self.op.default_iallocator:
2815       alloc_script = utils.FindFile(self.op.default_iallocator,
2816                                     constants.IALLOCATOR_SEARCH_PATH,
2817                                     os.path.isfile)
2818       if alloc_script is None:
2819         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2820                                    " specified" % self.op.default_iallocator,
2821                                    errors.ECODE_INVAL)
2822
2823   def Exec(self, feedback_fn):
2824     """Change the parameters of the cluster.
2825
2826     """
2827     if self.op.vg_name is not None:
2828       new_volume = self.op.vg_name
2829       if not new_volume:
2830         new_volume = None
2831       if new_volume != self.cfg.GetVGName():
2832         self.cfg.SetVGName(new_volume)
2833       else:
2834         feedback_fn("Cluster LVM configuration already in desired"
2835                     " state, not changing")
2836     if self.op.drbd_helper is not None:
2837       new_helper = self.op.drbd_helper
2838       if not new_helper:
2839         new_helper = None
2840       if new_helper != self.cfg.GetDRBDHelper():
2841         self.cfg.SetDRBDHelper(new_helper)
2842       else:
2843         feedback_fn("Cluster DRBD helper already in desired state,"
2844                     " not changing")
2845     if self.op.hvparams:
2846       self.cluster.hvparams = self.new_hvparams
2847     if self.op.os_hvp:
2848       self.cluster.os_hvp = self.new_os_hvp
2849     if self.op.enabled_hypervisors is not None:
2850       self.cluster.hvparams = self.new_hvparams
2851       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2852     if self.op.beparams:
2853       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2854     if self.op.nicparams:
2855       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2856     if self.op.osparams:
2857       self.cluster.osparams = self.new_osp
2858
2859     if self.op.candidate_pool_size is not None:
2860       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2861       # we need to update the pool size here, otherwise the save will fail
2862       _AdjustCandidatePool(self, [])
2863
2864     if self.op.maintain_node_health is not None:
2865       self.cluster.maintain_node_health = self.op.maintain_node_health
2866
2867     if self.op.add_uids is not None:
2868       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2869
2870     if self.op.remove_uids is not None:
2871       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2872
2873     if self.op.uid_pool is not None:
2874       self.cluster.uid_pool = self.op.uid_pool
2875
2876     if self.op.default_iallocator is not None:
2877       self.cluster.default_iallocator = self.op.default_iallocator
2878
2879     self.cfg.Update(self.cluster, feedback_fn)
2880
2881
2882 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2883   """Distribute additional files which are part of the cluster configuration.
2884
2885   ConfigWriter takes care of distributing the config and ssconf files, but
2886   there are more files which should be distributed to all nodes. This function
2887   makes sure those are copied.
2888
2889   @param lu: calling logical unit
2890   @param additional_nodes: list of nodes not in the config to distribute to
2891
2892   """
2893   # 1. Gather target nodes
2894   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2895   dist_nodes = lu.cfg.GetOnlineNodeList()
2896   if additional_nodes is not None:
2897     dist_nodes.extend(additional_nodes)
2898   if myself.name in dist_nodes:
2899     dist_nodes.remove(myself.name)
2900
2901   # 2. Gather files to distribute
2902   dist_files = set([constants.ETC_HOSTS,
2903                     constants.SSH_KNOWN_HOSTS_FILE,
2904                     constants.RAPI_CERT_FILE,
2905                     constants.RAPI_USERS_FILE,
2906                     constants.CONFD_HMAC_KEY,
2907                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2908                    ])
2909
2910   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2911   for hv_name in enabled_hypervisors:
2912     hv_class = hypervisor.GetHypervisor(hv_name)
2913     dist_files.update(hv_class.GetAncillaryFiles())
2914
2915   # 3. Perform the files upload
2916   for fname in dist_files:
2917     if os.path.exists(fname):
2918       result = lu.rpc.call_upload_file(dist_nodes, fname)
2919       for to_node, to_result in result.items():
2920         msg = to_result.fail_msg
2921         if msg:
2922           msg = ("Copy of file %s to node %s failed: %s" %
2923                  (fname, to_node, msg))
2924           lu.proc.LogWarning(msg)
2925
2926
2927 class LURedistributeConfig(NoHooksLU):
2928   """Force the redistribution of cluster configuration.
2929
2930   This is a very simple LU.
2931
2932   """
2933   REQ_BGL = False
2934
2935   def ExpandNames(self):
2936     self.needed_locks = {
2937       locking.LEVEL_NODE: locking.ALL_SET,
2938     }
2939     self.share_locks[locking.LEVEL_NODE] = 1
2940
2941   def Exec(self, feedback_fn):
2942     """Redistribute the configuration.
2943
2944     """
2945     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2946     _RedistributeAncillaryFiles(self)
2947
2948
2949 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2950   """Sleep and poll for an instance's disk to sync.
2951
2952   """
2953   if not instance.disks or disks is not None and not disks:
2954     return True
2955
2956   disks = _ExpandCheckDisks(instance, disks)
2957
2958   if not oneshot:
2959     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2960
2961   node = instance.primary_node
2962
2963   for dev in disks:
2964     lu.cfg.SetDiskID(dev, node)
2965
2966   # TODO: Convert to utils.Retry
2967
2968   retries = 0
2969   degr_retries = 10 # in seconds, as we sleep 1 second each time
2970   while True:
2971     max_time = 0
2972     done = True
2973     cumul_degraded = False
2974     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2975     msg = rstats.fail_msg
2976     if msg:
2977       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2978       retries += 1
2979       if retries >= 10:
2980         raise errors.RemoteError("Can't contact node %s for mirror data,"
2981                                  " aborting." % node)
2982       time.sleep(6)
2983       continue
2984     rstats = rstats.payload
2985     retries = 0
2986     for i, mstat in enumerate(rstats):
2987       if mstat is None:
2988         lu.LogWarning("Can't compute data for node %s/%s",
2989                            node, disks[i].iv_name)
2990         continue
2991
2992       cumul_degraded = (cumul_degraded or
2993                         (mstat.is_degraded and mstat.sync_percent is None))
2994       if mstat.sync_percent is not None:
2995         done = False
2996         if mstat.estimated_time is not None:
2997           rem_time = ("%s remaining (estimated)" %
2998                       utils.FormatSeconds(mstat.estimated_time))
2999           max_time = mstat.estimated_time
3000         else:
3001           rem_time = "no time estimate"
3002         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3003                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3004
3005     # if we're done but degraded, let's do a few small retries, to
3006     # make sure we see a stable and not transient situation; therefore
3007     # we force restart of the loop
3008     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3009       logging.info("Degraded disks found, %d retries left", degr_retries)
3010       degr_retries -= 1
3011       time.sleep(1)
3012       continue
3013
3014     if done or oneshot:
3015       break
3016
3017     time.sleep(min(60, max_time))
3018
3019   if done:
3020     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3021   return not cumul_degraded
3022
3023
3024 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3025   """Check that mirrors are not degraded.
3026
3027   The ldisk parameter, if True, will change the test from the
3028   is_degraded attribute (which represents overall non-ok status for
3029   the device(s)) to the ldisk (representing the local storage status).
3030
3031   """
3032   lu.cfg.SetDiskID(dev, node)
3033
3034   result = True
3035
3036   if on_primary or dev.AssembleOnSecondary():
3037     rstats = lu.rpc.call_blockdev_find(node, dev)
3038     msg = rstats.fail_msg
3039     if msg:
3040       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3041       result = False
3042     elif not rstats.payload:
3043       lu.LogWarning("Can't find disk on node %s", node)
3044       result = False
3045     else:
3046       if ldisk:
3047         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3048       else:
3049         result = result and not rstats.payload.is_degraded
3050
3051   if dev.children:
3052     for child in dev.children:
3053       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3054
3055   return result
3056
3057
3058 class LUDiagnoseOS(NoHooksLU):
3059   """Logical unit for OS diagnose/query.
3060
3061   """
3062   _OP_PARAMS = [
3063     _POutputFields,
3064     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3065     ]
3066   REQ_BGL = False
3067   _FIELDS_STATIC = utils.FieldSet()
3068   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
3069                                    "parameters", "api_versions")
3070
3071   def CheckArguments(self):
3072     if self.op.names:
3073       raise errors.OpPrereqError("Selective OS query not supported",
3074                                  errors.ECODE_INVAL)
3075
3076     _CheckOutputFields(static=self._FIELDS_STATIC,
3077                        dynamic=self._FIELDS_DYNAMIC,
3078                        selected=self.op.output_fields)
3079
3080   def ExpandNames(self):
3081     # Lock all nodes, in shared mode
3082     # Temporary removal of locks, should be reverted later
3083     # TODO: reintroduce locks when they are lighter-weight
3084     self.needed_locks = {}
3085     #self.share_locks[locking.LEVEL_NODE] = 1
3086     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3087
3088   @staticmethod
3089   def _DiagnoseByOS(rlist):
3090     """Remaps a per-node return list into an a per-os per-node dictionary
3091
3092     @param rlist: a map with node names as keys and OS objects as values
3093
3094     @rtype: dict
3095     @return: a dictionary with osnames as keys and as value another
3096         map, with nodes as keys and tuples of (path, status, diagnose,
3097         variants, parameters, api_versions) as values, eg::
3098
3099           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3100                                      (/srv/..., False, "invalid api")],
3101                            "node2": [(/srv/..., True, "", [], [])]}
3102           }
3103
3104     """
3105     all_os = {}
3106     # we build here the list of nodes that didn't fail the RPC (at RPC
3107     # level), so that nodes with a non-responding node daemon don't
3108     # make all OSes invalid
3109     good_nodes = [node_name for node_name in rlist
3110                   if not rlist[node_name].fail_msg]
3111     for node_name, nr in rlist.items():
3112       if nr.fail_msg or not nr.payload:
3113         continue
3114       for (name, path, status, diagnose, variants,
3115            params, api_versions) in nr.payload:
3116         if name not in all_os:
3117           # build a list of nodes for this os containing empty lists
3118           # for each node in node_list
3119           all_os[name] = {}
3120           for nname in good_nodes:
3121             all_os[name][nname] = []
3122         # convert params from [name, help] to (name, help)
3123         params = [tuple(v) for v in params]
3124         all_os[name][node_name].append((path, status, diagnose,
3125                                         variants, params, api_versions))
3126     return all_os
3127
3128   def Exec(self, feedback_fn):
3129     """Compute the list of OSes.
3130
3131     """
3132     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3133     node_data = self.rpc.call_os_diagnose(valid_nodes)
3134     pol = self._DiagnoseByOS(node_data)
3135     output = []
3136
3137     for os_name, os_data in pol.items():
3138       row = []
3139       valid = True
3140       (variants, params, api_versions) = null_state = (set(), set(), set())
3141       for idx, osl in enumerate(os_data.values()):
3142         valid = bool(valid and osl and osl[0][1])
3143         if not valid:
3144           (variants, params, api_versions) = null_state
3145           break
3146         node_variants, node_params, node_api = osl[0][3:6]
3147         if idx == 0: # first entry
3148           variants = set(node_variants)
3149           params = set(node_params)
3150           api_versions = set(node_api)
3151         else: # keep consistency
3152           variants.intersection_update(node_variants)
3153           params.intersection_update(node_params)
3154           api_versions.intersection_update(node_api)
3155
3156       for field in self.op.output_fields:
3157         if field == "name":
3158           val = os_name
3159         elif field == "valid":
3160           val = valid
3161         elif field == "node_status":
3162           # this is just a copy of the dict
3163           val = {}
3164           for node_name, nos_list in os_data.items():
3165             val[node_name] = nos_list
3166         elif field == "variants":
3167           val = list(variants)
3168         elif field == "parameters":
3169           val = list(params)
3170         elif field == "api_versions":
3171           val = list(api_versions)
3172         else:
3173           raise errors.ParameterError(field)
3174         row.append(val)
3175       output.append(row)
3176
3177     return output
3178
3179
3180 class LURemoveNode(LogicalUnit):
3181   """Logical unit for removing a node.
3182
3183   """
3184   HPATH = "node-remove"
3185   HTYPE = constants.HTYPE_NODE
3186   _OP_PARAMS = [
3187     _PNodeName,
3188     ]
3189
3190   def BuildHooksEnv(self):
3191     """Build hooks env.
3192
3193     This doesn't run on the target node in the pre phase as a failed
3194     node would then be impossible to remove.
3195
3196     """
3197     env = {
3198       "OP_TARGET": self.op.node_name,
3199       "NODE_NAME": self.op.node_name,
3200       }
3201     all_nodes = self.cfg.GetNodeList()
3202     try:
3203       all_nodes.remove(self.op.node_name)
3204     except ValueError:
3205       logging.warning("Node %s which is about to be removed not found"
3206                       " in the all nodes list", self.op.node_name)
3207     return env, all_nodes, all_nodes
3208
3209   def CheckPrereq(self):
3210     """Check prerequisites.
3211
3212     This checks:
3213      - the node exists in the configuration
3214      - it does not have primary or secondary instances
3215      - it's not the master
3216
3217     Any errors are signaled by raising errors.OpPrereqError.
3218
3219     """
3220     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3221     node = self.cfg.GetNodeInfo(self.op.node_name)
3222     assert node is not None
3223
3224     instance_list = self.cfg.GetInstanceList()
3225
3226     masternode = self.cfg.GetMasterNode()
3227     if node.name == masternode:
3228       raise errors.OpPrereqError("Node is the master node,"
3229                                  " you need to failover first.",
3230                                  errors.ECODE_INVAL)
3231
3232     for instance_name in instance_list:
3233       instance = self.cfg.GetInstanceInfo(instance_name)
3234       if node.name in instance.all_nodes:
3235         raise errors.OpPrereqError("Instance %s is still running on the node,"
3236                                    " please remove first." % instance_name,
3237                                    errors.ECODE_INVAL)
3238     self.op.node_name = node.name
3239     self.node = node
3240
3241   def Exec(self, feedback_fn):
3242     """Removes the node from the cluster.
3243
3244     """
3245     node = self.node
3246     logging.info("Stopping the node daemon and removing configs from node %s",
3247                  node.name)
3248
3249     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3250
3251     # Promote nodes to master candidate as needed
3252     _AdjustCandidatePool(self, exceptions=[node.name])
3253     self.context.RemoveNode(node.name)
3254
3255     # Run post hooks on the node before it's removed
3256     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3257     try:
3258       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3259     except:
3260       # pylint: disable-msg=W0702
3261       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3262
3263     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3264     msg = result.fail_msg
3265     if msg:
3266       self.LogWarning("Errors encountered on the remote node while leaving"
3267                       " the cluster: %s", msg)
3268
3269     # Remove node from our /etc/hosts
3270     if self.cfg.GetClusterInfo().modify_etc_hosts:
3271       # FIXME: this should be done via an rpc call to node daemon
3272       utils.RemoveHostFromEtcHosts(node.name)
3273       _RedistributeAncillaryFiles(self)
3274
3275
3276 class LUQueryNodes(NoHooksLU):
3277   """Logical unit for querying nodes.
3278
3279   """
3280   # pylint: disable-msg=W0142
3281   _OP_PARAMS = [
3282     _POutputFields,
3283     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3284     ("use_locking", False, _TBool),
3285     ]
3286   REQ_BGL = False
3287
3288   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3289                     "master_candidate", "offline", "drained"]
3290
3291   _FIELDS_DYNAMIC = utils.FieldSet(
3292     "dtotal", "dfree",
3293     "mtotal", "mnode", "mfree",
3294     "bootid",
3295     "ctotal", "cnodes", "csockets",
3296     )
3297
3298   _FIELDS_STATIC = utils.FieldSet(*[
3299     "pinst_cnt", "sinst_cnt",
3300     "pinst_list", "sinst_list",
3301     "pip", "sip", "tags",
3302     "master",
3303     "role"] + _SIMPLE_FIELDS
3304     )
3305
3306   def CheckArguments(self):
3307     _CheckOutputFields(static=self._FIELDS_STATIC,
3308                        dynamic=self._FIELDS_DYNAMIC,
3309                        selected=self.op.output_fields)
3310
3311   def ExpandNames(self):
3312     self.needed_locks = {}
3313     self.share_locks[locking.LEVEL_NODE] = 1
3314
3315     if self.op.names:
3316       self.wanted = _GetWantedNodes(self, self.op.names)
3317     else:
3318       self.wanted = locking.ALL_SET
3319
3320     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3321     self.do_locking = self.do_node_query and self.op.use_locking
3322     if self.do_locking:
3323       # if we don't request only static fields, we need to lock the nodes
3324       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3325
3326   def Exec(self, feedback_fn):
3327     """Computes the list of nodes and their attributes.
3328
3329     """
3330     all_info = self.cfg.GetAllNodesInfo()
3331     if self.do_locking:
3332       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3333     elif self.wanted != locking.ALL_SET:
3334       nodenames = self.wanted
3335       missing = set(nodenames).difference(all_info.keys())
3336       if missing:
3337         raise errors.OpExecError(
3338           "Some nodes were removed before retrieving their data: %s" % missing)
3339     else:
3340       nodenames = all_info.keys()
3341
3342     nodenames = utils.NiceSort(nodenames)
3343     nodelist = [all_info[name] for name in nodenames]
3344
3345     # begin data gathering
3346
3347     if self.do_node_query:
3348       live_data = {}
3349       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3350                                           self.cfg.GetHypervisorType())
3351       for name in nodenames:
3352         nodeinfo = node_data[name]
3353         if not nodeinfo.fail_msg and nodeinfo.payload:
3354           nodeinfo = nodeinfo.payload
3355           fn = utils.TryConvert
3356           live_data[name] = {
3357             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3358             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3359             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3360             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3361             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3362             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3363             "bootid": nodeinfo.get('bootid', None),
3364             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3365             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3366             }
3367         else:
3368           live_data[name] = {}
3369     else:
3370       live_data = dict.fromkeys(nodenames, {})
3371
3372     node_to_primary = dict([(name, set()) for name in nodenames])
3373     node_to_secondary = dict([(name, set()) for name in nodenames])
3374
3375     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3376                              "sinst_cnt", "sinst_list"))
3377     if inst_fields & frozenset(self.op.output_fields):
3378       inst_data = self.cfg.GetAllInstancesInfo()
3379
3380       for inst in inst_data.values():
3381         if inst.primary_node in node_to_primary:
3382           node_to_primary[inst.primary_node].add(inst.name)
3383         for secnode in inst.secondary_nodes:
3384           if secnode in node_to_secondary:
3385             node_to_secondary[secnode].add(inst.name)
3386
3387     master_node = self.cfg.GetMasterNode()
3388
3389     # end data gathering
3390
3391     output = []
3392     for node in nodelist:
3393       node_output = []
3394       for field in self.op.output_fields:
3395         if field in self._SIMPLE_FIELDS:
3396           val = getattr(node, field)
3397         elif field == "pinst_list":
3398           val = list(node_to_primary[node.name])
3399         elif field == "sinst_list":
3400           val = list(node_to_secondary[node.name])
3401         elif field == "pinst_cnt":
3402           val = len(node_to_primary[node.name])
3403         elif field == "sinst_cnt":
3404           val = len(node_to_secondary[node.name])
3405         elif field == "pip":
3406           val = node.primary_ip
3407         elif field == "sip":
3408           val = node.secondary_ip
3409         elif field == "tags":
3410           val = list(node.GetTags())
3411         elif field == "master":
3412           val = node.name == master_node
3413         elif self._FIELDS_DYNAMIC.Matches(field):
3414           val = live_data[node.name].get(field, None)
3415         elif field == "role":
3416           if node.name == master_node:
3417             val = "M"
3418           elif node.master_candidate:
3419             val = "C"
3420           elif node.drained:
3421             val = "D"
3422           elif node.offline:
3423             val = "O"
3424           else:
3425             val = "R"
3426         else:
3427           raise errors.ParameterError(field)
3428         node_output.append(val)
3429       output.append(node_output)
3430
3431     return output
3432
3433
3434 class LUQueryNodeVolumes(NoHooksLU):
3435   """Logical unit for getting volumes on node(s).
3436
3437   """
3438   _OP_PARAMS = [
3439     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3440     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3441     ]
3442   REQ_BGL = False
3443   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3444   _FIELDS_STATIC = utils.FieldSet("node")
3445
3446   def CheckArguments(self):
3447     _CheckOutputFields(static=self._FIELDS_STATIC,
3448                        dynamic=self._FIELDS_DYNAMIC,
3449                        selected=self.op.output_fields)
3450
3451   def ExpandNames(self):
3452     self.needed_locks = {}
3453     self.share_locks[locking.LEVEL_NODE] = 1
3454     if not self.op.nodes:
3455       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3456     else:
3457       self.needed_locks[locking.LEVEL_NODE] = \
3458         _GetWantedNodes(self, self.op.nodes)
3459
3460   def Exec(self, feedback_fn):
3461     """Computes the list of nodes and their attributes.
3462
3463     """
3464     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3465     volumes = self.rpc.call_node_volumes(nodenames)
3466
3467     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3468              in self.cfg.GetInstanceList()]
3469
3470     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3471
3472     output = []
3473     for node in nodenames:
3474       nresult = volumes[node]
3475       if nresult.offline:
3476         continue
3477       msg = nresult.fail_msg
3478       if msg:
3479         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3480         continue
3481
3482       node_vols = nresult.payload[:]
3483       node_vols.sort(key=lambda vol: vol['dev'])
3484
3485       for vol in node_vols:
3486         node_output = []
3487         for field in self.op.output_fields:
3488           if field == "node":
3489             val = node
3490           elif field == "phys":
3491             val = vol['dev']
3492           elif field == "vg":
3493             val = vol['vg']
3494           elif field == "name":
3495             val = vol['name']
3496           elif field == "size":
3497             val = int(float(vol['size']))
3498           elif field == "instance":
3499             for inst in ilist:
3500               if node not in lv_by_node[inst]:
3501                 continue
3502               if vol['name'] in lv_by_node[inst][node]:
3503                 val = inst.name
3504                 break
3505             else:
3506               val = '-'
3507           else:
3508             raise errors.ParameterError(field)
3509           node_output.append(str(val))
3510
3511         output.append(node_output)
3512
3513     return output
3514
3515
3516 class LUQueryNodeStorage(NoHooksLU):
3517   """Logical unit for getting information on storage units on node(s).
3518
3519   """
3520   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3521   _OP_PARAMS = [
3522     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3523     ("storage_type", _NoDefault, _CheckStorageType),
3524     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3525     ("name", None, _TMaybeString),
3526     ]
3527   REQ_BGL = False
3528
3529   def CheckArguments(self):
3530     _CheckOutputFields(static=self._FIELDS_STATIC,
3531                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3532                        selected=self.op.output_fields)
3533
3534   def ExpandNames(self):
3535     self.needed_locks = {}
3536     self.share_locks[locking.LEVEL_NODE] = 1
3537
3538     if self.op.nodes:
3539       self.needed_locks[locking.LEVEL_NODE] = \
3540         _GetWantedNodes(self, self.op.nodes)
3541     else:
3542       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3543
3544   def Exec(self, feedback_fn):
3545     """Computes the list of nodes and their attributes.
3546
3547     """
3548     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3549
3550     # Always get name to sort by
3551     if constants.SF_NAME in self.op.output_fields:
3552       fields = self.op.output_fields[:]
3553     else:
3554       fields = [constants.SF_NAME] + self.op.output_fields
3555
3556     # Never ask for node or type as it's only known to the LU
3557     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3558       while extra in fields:
3559         fields.remove(extra)
3560
3561     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3562     name_idx = field_idx[constants.SF_NAME]
3563
3564     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3565     data = self.rpc.call_storage_list(self.nodes,
3566                                       self.op.storage_type, st_args,
3567                                       self.op.name, fields)
3568
3569     result = []
3570
3571     for node in utils.NiceSort(self.nodes):
3572       nresult = data[node]
3573       if nresult.offline:
3574         continue
3575
3576       msg = nresult.fail_msg
3577       if msg:
3578         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3579         continue
3580
3581       rows = dict([(row[name_idx], row) for row in nresult.payload])
3582
3583       for name in utils.NiceSort(rows.keys()):
3584         row = rows[name]
3585
3586         out = []
3587
3588         for field in self.op.output_fields:
3589           if field == constants.SF_NODE:
3590             val = node
3591           elif field == constants.SF_TYPE:
3592             val = self.op.storage_type
3593           elif field in field_idx:
3594             val = row[field_idx[field]]
3595           else:
3596             raise errors.ParameterError(field)
3597
3598           out.append(val)
3599
3600         result.append(out)
3601
3602     return result
3603
3604
3605 class LUModifyNodeStorage(NoHooksLU):
3606   """Logical unit for modifying a storage volume on a node.
3607
3608   """
3609   _OP_PARAMS = [
3610     _PNodeName,
3611     ("storage_type", _NoDefault, _CheckStorageType),
3612     ("name", _NoDefault, _TNonEmptyString),
3613     ("changes", _NoDefault, _TDict),
3614     ]
3615   REQ_BGL = False
3616
3617   def CheckArguments(self):
3618     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3619
3620     storage_type = self.op.storage_type
3621
3622     try:
3623       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3624     except KeyError:
3625       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3626                                  " modified" % storage_type,
3627                                  errors.ECODE_INVAL)
3628
3629     diff = set(self.op.changes.keys()) - modifiable
3630     if diff:
3631       raise errors.OpPrereqError("The following fields can not be modified for"
3632                                  " storage units of type '%s': %r" %
3633                                  (storage_type, list(diff)),
3634                                  errors.ECODE_INVAL)
3635
3636   def ExpandNames(self):
3637     self.needed_locks = {
3638       locking.LEVEL_NODE: self.op.node_name,
3639       }
3640
3641   def Exec(self, feedback_fn):
3642     """Computes the list of nodes and their attributes.
3643
3644     """
3645     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3646     result = self.rpc.call_storage_modify(self.op.node_name,
3647                                           self.op.storage_type, st_args,
3648                                           self.op.name, self.op.changes)
3649     result.Raise("Failed to modify storage unit '%s' on %s" %
3650                  (self.op.name, self.op.node_name))
3651
3652
3653 class LUAddNode(LogicalUnit):
3654   """Logical unit for adding node to the cluster.
3655
3656   """
3657   HPATH = "node-add"
3658   HTYPE = constants.HTYPE_NODE
3659   _OP_PARAMS = [
3660     _PNodeName,
3661     ("primary_ip", None, _NoType),
3662     ("secondary_ip", None, _TMaybeString),
3663     ("readd", False, _TBool),
3664     ]
3665
3666   def CheckArguments(self):
3667     # validate/normalize the node name
3668     self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3669
3670   def BuildHooksEnv(self):
3671     """Build hooks env.
3672
3673     This will run on all nodes before, and on all nodes + the new node after.
3674
3675     """
3676     env = {
3677       "OP_TARGET": self.op.node_name,
3678       "NODE_NAME": self.op.node_name,
3679       "NODE_PIP": self.op.primary_ip,
3680       "NODE_SIP": self.op.secondary_ip,
3681       }
3682     nodes_0 = self.cfg.GetNodeList()
3683     nodes_1 = nodes_0 + [self.op.node_name, ]
3684     return env, nodes_0, nodes_1
3685
3686   def CheckPrereq(self):
3687     """Check prerequisites.
3688
3689     This checks:
3690      - the new node is not already in the config
3691      - it is resolvable
3692      - its parameters (single/dual homed) matches the cluster
3693
3694     Any errors are signaled by raising errors.OpPrereqError.
3695
3696     """
3697     node_name = self.op.node_name
3698     cfg = self.cfg
3699
3700     dns_data = netutils.GetHostInfo(node_name)
3701
3702     node = dns_data.name
3703     primary_ip = self.op.primary_ip = dns_data.ip
3704     if self.op.secondary_ip is None:
3705       self.op.secondary_ip = primary_ip
3706     if not netutils.IsValidIP4(self.op.secondary_ip):
3707       raise errors.OpPrereqError("Invalid secondary IP given",
3708                                  errors.ECODE_INVAL)
3709     secondary_ip = self.op.secondary_ip
3710
3711     node_list = cfg.GetNodeList()
3712     if not self.op.readd and node in node_list:
3713       raise errors.OpPrereqError("Node %s is already in the configuration" %
3714                                  node, errors.ECODE_EXISTS)
3715     elif self.op.readd and node not in node_list:
3716       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3717                                  errors.ECODE_NOENT)
3718
3719     self.changed_primary_ip = False
3720
3721     for existing_node_name in node_list:
3722       existing_node = cfg.GetNodeInfo(existing_node_name)
3723
3724       if self.op.readd and node == existing_node_name:
3725         if existing_node.secondary_ip != secondary_ip:
3726           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3727                                      " address configuration as before",
3728                                      errors.ECODE_INVAL)
3729         if existing_node.primary_ip != primary_ip:
3730           self.changed_primary_ip = True
3731
3732         continue
3733
3734       if (existing_node.primary_ip == primary_ip or
3735           existing_node.secondary_ip == primary_ip or
3736           existing_node.primary_ip == secondary_ip or
3737           existing_node.secondary_ip == secondary_ip):
3738         raise errors.OpPrereqError("New node ip address(es) conflict with"
3739                                    " existing node %s" % existing_node.name,
3740                                    errors.ECODE_NOTUNIQUE)
3741
3742     # check that the type of the node (single versus dual homed) is the
3743     # same as for the master
3744     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3745     master_singlehomed = myself.secondary_ip == myself.primary_ip
3746     newbie_singlehomed = secondary_ip == primary_ip
3747     if master_singlehomed != newbie_singlehomed:
3748       if master_singlehomed:
3749         raise errors.OpPrereqError("The master has no private ip but the"
3750                                    " new node has one",
3751                                    errors.ECODE_INVAL)
3752       else:
3753         raise errors.OpPrereqError("The master has a private ip but the"
3754                                    " new node doesn't have one",
3755                                    errors.ECODE_INVAL)
3756
3757     # checks reachability
3758     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3759       raise errors.OpPrereqError("Node not reachable by ping",
3760                                  errors.ECODE_ENVIRON)
3761
3762     if not newbie_singlehomed:
3763       # check reachability from my secondary ip to newbie's secondary ip
3764       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3765                            source=myself.secondary_ip):
3766         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3767                                    " based ping to noded port",
3768                                    errors.ECODE_ENVIRON)
3769
3770     if self.op.readd:
3771       exceptions = [node]
3772     else:
3773       exceptions = []
3774
3775     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3776
3777     if self.op.readd:
3778       self.new_node = self.cfg.GetNodeInfo(node)
3779       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3780     else:
3781       self.new_node = objects.Node(name=node,
3782                                    primary_ip=primary_ip,
3783                                    secondary_ip=secondary_ip,
3784                                    master_candidate=self.master_candidate,
3785                                    offline=False, drained=False)
3786
3787   def Exec(self, feedback_fn):
3788     """Adds the new node to the cluster.
3789
3790     """
3791     new_node = self.new_node
3792     node = new_node.name
3793
3794     # for re-adds, reset the offline/drained/master-candidate flags;
3795     # we need to reset here, otherwise offline would prevent RPC calls
3796     # later in the procedure; this also means that if the re-add
3797     # fails, we are left with a non-offlined, broken node
3798     if self.op.readd:
3799       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3800       self.LogInfo("Readding a node, the offline/drained flags were reset")
3801       # if we demote the node, we do cleanup later in the procedure
3802       new_node.master_candidate = self.master_candidate
3803       if self.changed_primary_ip:
3804         new_node.primary_ip = self.op.primary_ip
3805
3806     # notify the user about any possible mc promotion
3807     if new_node.master_candidate:
3808       self.LogInfo("Node will be a master candidate")
3809
3810     # check connectivity
3811     result = self.rpc.call_version([node])[node]
3812     result.Raise("Can't get version information from node %s" % node)
3813     if constants.PROTOCOL_VERSION == result.payload:
3814       logging.info("Communication to node %s fine, sw version %s match",
3815                    node, result.payload)
3816     else:
3817       raise errors.OpExecError("Version mismatch master version %s,"
3818                                " node version %s" %
3819                                (constants.PROTOCOL_VERSION, result.payload))
3820
3821     # setup ssh on node
3822     if self.cfg.GetClusterInfo().modify_ssh_setup:
3823       logging.info("Copy ssh key to node %s", node)
3824       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3825       keyarray = []
3826       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3827                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3828                   priv_key, pub_key]
3829
3830       for i in keyfiles:
3831         keyarray.append(utils.ReadFile(i))
3832
3833       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3834                                       keyarray[2], keyarray[3], keyarray[4],
3835                                       keyarray[5])
3836       result.Raise("Cannot transfer ssh keys to the new node")
3837
3838     # Add node to our /etc/hosts, and add key to known_hosts
3839     if self.cfg.GetClusterInfo().modify_etc_hosts:
3840       # FIXME: this should be done via an rpc call to node daemon
3841       utils.AddHostToEtcHosts(new_node.name)
3842
3843     if new_node.secondary_ip != new_node.primary_ip:
3844       result = self.rpc.call_node_has_ip_address(new_node.name,
3845                                                  new_node.secondary_ip)
3846       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3847                    prereq=True, ecode=errors.ECODE_ENVIRON)
3848       if not result.payload:
3849         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3850                                  " you gave (%s). Please fix and re-run this"
3851                                  " command." % new_node.secondary_ip)
3852
3853     node_verify_list = [self.cfg.GetMasterNode()]
3854     node_verify_param = {
3855       constants.NV_NODELIST: [node],
3856       # TODO: do a node-net-test as well?
3857     }
3858
3859     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3860                                        self.cfg.GetClusterName())
3861     for verifier in node_verify_list:
3862       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3863       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3864       if nl_payload:
3865         for failed in nl_payload:
3866           feedback_fn("ssh/hostname verification failed"
3867                       " (checking from %s): %s" %
3868                       (verifier, nl_payload[failed]))
3869         raise errors.OpExecError("ssh/hostname verification failed.")
3870
3871     if self.op.readd:
3872       _RedistributeAncillaryFiles(self)
3873       self.context.ReaddNode(new_node)
3874       # make sure we redistribute the config
3875       self.cfg.Update(new_node, feedback_fn)
3876       # and make sure the new node will not have old files around
3877       if not new_node.master_candidate:
3878         result = self.rpc.call_node_demote_from_mc(new_node.name)
3879         msg = result.fail_msg
3880         if msg:
3881           self.LogWarning("Node failed to demote itself from master"
3882                           " candidate status: %s" % msg)
3883     else:
3884       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3885       self.context.AddNode(new_node, self.proc.GetECId())
3886
3887
3888 class LUSetNodeParams(LogicalUnit):
3889   """Modifies the parameters of a node.
3890
3891   """
3892   HPATH = "node-modify"
3893   HTYPE = constants.HTYPE_NODE
3894   _OP_PARAMS = [
3895     _PNodeName,
3896     ("master_candidate", None, _TMaybeBool),
3897     ("offline", None, _TMaybeBool),
3898     ("drained", None, _TMaybeBool),
3899     ("auto_promote", False, _TBool),
3900     _PForce,
3901     ]
3902   REQ_BGL = False
3903
3904   def CheckArguments(self):
3905     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3906     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3907     if all_mods.count(None) == 3:
3908       raise errors.OpPrereqError("Please pass at least one modification",
3909                                  errors.ECODE_INVAL)
3910     if all_mods.count(True) > 1:
3911       raise errors.OpPrereqError("Can't set the node into more than one"
3912                                  " state at the same time",
3913                                  errors.ECODE_INVAL)
3914
3915     # Boolean value that tells us whether we're offlining or draining the node
3916     self.offline_or_drain = (self.op.offline == True or
3917                              self.op.drained == True)
3918     self.deoffline_or_drain = (self.op.offline == False or
3919                                self.op.drained == False)
3920     self.might_demote = (self.op.master_candidate == False or
3921                          self.offline_or_drain)
3922
3923     self.lock_all = self.op.auto_promote and self.might_demote
3924
3925
3926   def ExpandNames(self):
3927     if self.lock_all:
3928       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3929     else:
3930       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3931
3932   def BuildHooksEnv(self):
3933     """Build hooks env.
3934
3935     This runs on the master node.
3936
3937     """
3938     env = {
3939       "OP_TARGET": self.op.node_name,
3940       "MASTER_CANDIDATE": str(self.op.master_candidate),
3941       "OFFLINE": str(self.op.offline),
3942       "DRAINED": str(self.op.drained),
3943       }
3944     nl = [self.cfg.GetMasterNode(),
3945           self.op.node_name]
3946     return env, nl, nl
3947
3948   def CheckPrereq(self):
3949     """Check prerequisites.
3950
3951     This only checks the instance list against the existing names.
3952
3953     """
3954     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3955
3956     if (self.op.master_candidate is not None or
3957         self.op.drained is not None or
3958         self.op.offline is not None):
3959       # we can't change the master's node flags
3960       if self.op.node_name == self.cfg.GetMasterNode():
3961         raise errors.OpPrereqError("The master role can be changed"
3962                                    " only via masterfailover",
3963                                    errors.ECODE_INVAL)
3964
3965
3966     if node.master_candidate and self.might_demote and not self.lock_all:
3967       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3968       # check if after removing the current node, we're missing master
3969       # candidates
3970       (mc_remaining, mc_should, _) = \
3971           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3972       if mc_remaining < mc_should:
3973         raise errors.OpPrereqError("Not enough master candidates, please"
3974                                    " pass auto_promote to allow promotion",
3975                                    errors.ECODE_INVAL)
3976
3977     if (self.op.master_candidate == True and
3978         ((node.offline and not self.op.offline == False) or
3979          (node.drained and not self.op.drained == False))):
3980       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3981                                  " to master_candidate" % node.name,
3982                                  errors.ECODE_INVAL)
3983
3984     # If we're being deofflined/drained, we'll MC ourself if needed
3985     if (self.deoffline_or_drain and not self.offline_or_drain and not
3986         self.op.master_candidate == True and not node.master_candidate):
3987       self.op.master_candidate = _DecideSelfPromotion(self)
3988       if self.op.master_candidate:
3989         self.LogInfo("Autopromoting node to master candidate")
3990
3991     return
3992
3993   def Exec(self, feedback_fn):
3994     """Modifies a node.
3995
3996     """
3997     node = self.node
3998
3999     result = []
4000     changed_mc = False
4001
4002     if self.op.offline is not None:
4003       node.offline = self.op.offline
4004       result.append(("offline", str(self.op.offline)))
4005       if self.op.offline == True:
4006         if node.master_candidate:
4007           node.master_candidate = False
4008           changed_mc = True
4009           result.append(("master_candidate", "auto-demotion due to offline"))
4010         if node.drained:
4011           node.drained = False
4012           result.append(("drained", "clear drained status due to offline"))
4013
4014     if self.op.master_candidate is not None:
4015       node.master_candidate = self.op.master_candidate
4016       changed_mc = True
4017       result.append(("master_candidate", str(self.op.master_candidate)))
4018       if self.op.master_candidate == False:
4019         rrc = self.rpc.call_node_demote_from_mc(node.name)
4020         msg = rrc.fail_msg
4021         if msg:
4022           self.LogWarning("Node failed to demote itself: %s" % msg)
4023
4024     if self.op.drained is not None:
4025       node.drained = self.op.drained
4026       result.append(("drained", str(self.op.drained)))
4027       if self.op.drained == True:
4028         if node.master_candidate:
4029           node.master_candidate = False
4030           changed_mc = True
4031           result.append(("master_candidate", "auto-demotion due to drain"))
4032           rrc = self.rpc.call_node_demote_from_mc(node.name)
4033           msg = rrc.fail_msg
4034           if msg:
4035             self.LogWarning("Node failed to demote itself: %s" % msg)
4036         if node.offline:
4037           node.offline = False
4038           result.append(("offline", "clear offline status due to drain"))
4039
4040     # we locked all nodes, we adjust the CP before updating this node
4041     if self.lock_all:
4042       _AdjustCandidatePool(self, [node.name])
4043
4044     # this will trigger configuration file update, if needed
4045     self.cfg.Update(node, feedback_fn)
4046
4047     # this will trigger job queue propagation or cleanup
4048     if changed_mc:
4049       self.context.ReaddNode(node)
4050
4051     return result
4052
4053
4054 class LUPowercycleNode(NoHooksLU):
4055   """Powercycles a node.
4056
4057   """
4058   _OP_PARAMS = [
4059     _PNodeName,
4060     _PForce,
4061     ]
4062   REQ_BGL = False
4063
4064   def CheckArguments(self):
4065     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4066     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4067       raise errors.OpPrereqError("The node is the master and the force"
4068                                  " parameter was not set",
4069                                  errors.ECODE_INVAL)
4070
4071   def ExpandNames(self):
4072     """Locking for PowercycleNode.
4073
4074     This is a last-resort option and shouldn't block on other
4075     jobs. Therefore, we grab no locks.
4076
4077     """
4078     self.needed_locks = {}
4079
4080   def Exec(self, feedback_fn):
4081     """Reboots a node.
4082
4083     """
4084     result = self.rpc.call_node_powercycle(self.op.node_name,
4085                                            self.cfg.GetHypervisorType())
4086     result.Raise("Failed to schedule the reboot")
4087     return result.payload
4088
4089
4090 class LUQueryClusterInfo(NoHooksLU):
4091   """Query cluster configuration.
4092
4093   """
4094   REQ_BGL = False
4095
4096   def ExpandNames(self):
4097     self.needed_locks = {}
4098
4099   def Exec(self, feedback_fn):
4100     """Return cluster config.
4101
4102     """
4103     cluster = self.cfg.GetClusterInfo()
4104     os_hvp = {}
4105
4106     # Filter just for enabled hypervisors
4107     for os_name, hv_dict in cluster.os_hvp.items():
4108       os_hvp[os_name] = {}
4109       for hv_name, hv_params in hv_dict.items():
4110         if hv_name in cluster.enabled_hypervisors:
4111           os_hvp[os_name][hv_name] = hv_params
4112
4113     result = {
4114       "software_version": constants.RELEASE_VERSION,
4115       "protocol_version": constants.PROTOCOL_VERSION,
4116       "config_version": constants.CONFIG_VERSION,
4117       "os_api_version": max(constants.OS_API_VERSIONS),
4118       "export_version": constants.EXPORT_VERSION,
4119       "architecture": (platform.architecture()[0], platform.machine()),
4120       "name": cluster.cluster_name,
4121       "master": cluster.master_node,
4122       "default_hypervisor": cluster.enabled_hypervisors[0],
4123       "enabled_hypervisors": cluster.enabled_hypervisors,
4124       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4125                         for hypervisor_name in cluster.enabled_hypervisors]),
4126       "os_hvp": os_hvp,
4127       "beparams": cluster.beparams,
4128       "osparams": cluster.osparams,
4129       "nicparams": cluster.nicparams,
4130       "candidate_pool_size": cluster.candidate_pool_size,
4131       "master_netdev": cluster.master_netdev,
4132       "volume_group_name": cluster.volume_group_name,
4133       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4134       "file_storage_dir": cluster.file_storage_dir,
4135       "maintain_node_health": cluster.maintain_node_health,
4136       "ctime": cluster.ctime,
4137       "mtime": cluster.mtime,
4138       "uuid": cluster.uuid,
4139       "tags": list(cluster.GetTags()),
4140       "uid_pool": cluster.uid_pool,
4141       "default_iallocator": cluster.default_iallocator,
4142       }
4143
4144     return result
4145
4146
4147 class LUQueryConfigValues(NoHooksLU):
4148   """Return configuration values.
4149
4150   """
4151   _OP_PARAMS = [_POutputFields]
4152   REQ_BGL = False
4153   _FIELDS_DYNAMIC = utils.FieldSet()
4154   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4155                                   "watcher_pause")
4156
4157   def CheckArguments(self):
4158     _CheckOutputFields(static=self._FIELDS_STATIC,
4159                        dynamic=self._FIELDS_DYNAMIC,
4160                        selected=self.op.output_fields)
4161
4162   def ExpandNames(self):
4163     self.needed_locks = {}
4164
4165   def Exec(self, feedback_fn):
4166     """Dump a representation of the cluster config to the standard output.
4167
4168     """
4169     values = []
4170     for field in self.op.output_fields:
4171       if field == "cluster_name":
4172         entry = self.cfg.GetClusterName()
4173       elif field == "master_node":
4174         entry = self.cfg.GetMasterNode()
4175       elif field == "drain_flag":
4176         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4177       elif field == "watcher_pause":
4178         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4179       else:
4180         raise errors.ParameterError(field)
4181       values.append(entry)
4182     return values
4183
4184
4185 class LUActivateInstanceDisks(NoHooksLU):
4186   """Bring up an instance's disks.
4187
4188   """
4189   _OP_PARAMS = [
4190     _PInstanceName,
4191     ("ignore_size", False, _TBool),
4192     ]
4193   REQ_BGL = False
4194
4195   def ExpandNames(self):
4196     self._ExpandAndLockInstance()
4197     self.needed_locks[locking.LEVEL_NODE] = []
4198     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4199
4200   def DeclareLocks(self, level):
4201     if level == locking.LEVEL_NODE:
4202       self._LockInstancesNodes()
4203
4204   def CheckPrereq(self):
4205     """Check prerequisites.
4206
4207     This checks that the instance is in the cluster.
4208
4209     """
4210     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4211     assert self.instance is not None, \
4212       "Cannot retrieve locked instance %s" % self.op.instance_name
4213     _CheckNodeOnline(self, self.instance.primary_node)
4214
4215   def Exec(self, feedback_fn):
4216     """Activate the disks.
4217
4218     """
4219     disks_ok, disks_info = \
4220               _AssembleInstanceDisks(self, self.instance,
4221                                      ignore_size=self.op.ignore_size)
4222     if not disks_ok:
4223       raise errors.OpExecError("Cannot activate block devices")
4224
4225     return disks_info
4226
4227
4228 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4229                            ignore_size=False):
4230   """Prepare the block devices for an instance.
4231
4232   This sets up the block devices on all nodes.
4233
4234   @type lu: L{LogicalUnit}
4235   @param lu: the logical unit on whose behalf we execute
4236   @type instance: L{objects.Instance}
4237   @param instance: the instance for whose disks we assemble
4238   @type disks: list of L{objects.Disk} or None
4239   @param disks: which disks to assemble (or all, if None)
4240   @type ignore_secondaries: boolean
4241   @param ignore_secondaries: if true, errors on secondary nodes
4242       won't result in an error return from the function
4243   @type ignore_size: boolean
4244   @param ignore_size: if true, the current known size of the disk
4245       will not be used during the disk activation, useful for cases
4246       when the size is wrong
4247   @return: False if the operation failed, otherwise a list of
4248       (host, instance_visible_name, node_visible_name)
4249       with the mapping from node devices to instance devices
4250
4251   """
4252   device_info = []
4253   disks_ok = True
4254   iname = instance.name
4255   disks = _ExpandCheckDisks(instance, disks)
4256
4257   # With the two passes mechanism we try to reduce the window of
4258   # opportunity for the race condition of switching DRBD to primary
4259   # before handshaking occured, but we do not eliminate it
4260
4261   # The proper fix would be to wait (with some limits) until the
4262   # connection has been made and drbd transitions from WFConnection
4263   # into any other network-connected state (Connected, SyncTarget,
4264   # SyncSource, etc.)
4265
4266   # 1st pass, assemble on all nodes in secondary mode
4267   for inst_disk in disks:
4268     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4269       if ignore_size:
4270         node_disk = node_disk.Copy()
4271         node_disk.UnsetSize()
4272       lu.cfg.SetDiskID(node_disk, node)
4273       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4274       msg = result.fail_msg
4275       if msg:
4276         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4277                            " (is_primary=False, pass=1): %s",
4278                            inst_disk.iv_name, node, msg)
4279         if not ignore_secondaries:
4280           disks_ok = False
4281
4282   # FIXME: race condition on drbd migration to primary
4283
4284   # 2nd pass, do only the primary node
4285   for inst_disk in disks:
4286     dev_path = None
4287
4288     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4289       if node != instance.primary_node:
4290         continue
4291       if ignore_size:
4292         node_disk = node_disk.Copy()
4293         node_disk.UnsetSize()
4294       lu.cfg.SetDiskID(node_disk, node)
4295       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4296       msg = result.fail_msg
4297       if msg:
4298         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4299                            " (is_primary=True, pass=2): %s",
4300                            inst_disk.iv_name, node, msg)
4301         disks_ok = False
4302       else:
4303         dev_path = result.payload
4304
4305     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4306
4307   # leave the disks configured for the primary node
4308   # this is a workaround that would be fixed better by
4309   # improving the logical/physical id handling
4310   for disk in disks:
4311     lu.cfg.SetDiskID(disk, instance.primary_node)
4312
4313   return disks_ok, device_info
4314
4315
4316 def _StartInstanceDisks(lu, instance, force):
4317   """Start the disks of an instance.
4318
4319   """
4320   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4321                                            ignore_secondaries=force)
4322   if not disks_ok:
4323     _ShutdownInstanceDisks(lu, instance)
4324     if force is not None and not force:
4325       lu.proc.LogWarning("", hint="If the message above refers to a"
4326                          " secondary node,"
4327                          " you can retry the operation using '--force'.")
4328     raise errors.OpExecError("Disk consistency error")
4329
4330
4331 class LUDeactivateInstanceDisks(NoHooksLU):
4332   """Shutdown an instance's disks.
4333
4334   """
4335   _OP_PARAMS = [
4336     _PInstanceName,
4337     ]
4338   REQ_BGL = False
4339
4340   def ExpandNames(self):
4341     self._ExpandAndLockInstance()
4342     self.needed_locks[locking.LEVEL_NODE] = []
4343     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4344
4345   def DeclareLocks(self, level):
4346     if level == locking.LEVEL_NODE:
4347       self._LockInstancesNodes()
4348
4349   def CheckPrereq(self):
4350     """Check prerequisites.
4351
4352     This checks that the instance is in the cluster.
4353
4354     """
4355     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4356     assert self.instance is not None, \
4357       "Cannot retrieve locked instance %s" % self.op.instance_name
4358
4359   def Exec(self, feedback_fn):
4360     """Deactivate the disks
4361
4362     """
4363     instance = self.instance
4364     _SafeShutdownInstanceDisks(self, instance)
4365
4366
4367 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4368   """Shutdown block devices of an instance.
4369
4370   This function checks if an instance is running, before calling
4371   _ShutdownInstanceDisks.
4372
4373   """
4374   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4375   _ShutdownInstanceDisks(lu, instance, disks=disks)
4376
4377
4378 def _ExpandCheckDisks(instance, disks):
4379   """Return the instance disks selected by the disks list
4380
4381   @type disks: list of L{objects.Disk} or None
4382   @param disks: selected disks
4383   @rtype: list of L{objects.Disk}
4384   @return: selected instance disks to act on
4385
4386   """
4387   if disks is None:
4388     return instance.disks
4389   else:
4390     if not set(disks).issubset(instance.disks):
4391       raise errors.ProgrammerError("Can only act on disks belonging to the"
4392                                    " target instance")
4393     return disks
4394
4395
4396 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4397   """Shutdown block devices of an instance.
4398
4399   This does the shutdown on all nodes of the instance.
4400
4401   If the ignore_primary is false, errors on the primary node are
4402   ignored.
4403
4404   """
4405   all_result = True
4406   disks = _ExpandCheckDisks(instance, disks)
4407
4408   for disk in disks:
4409     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4410       lu.cfg.SetDiskID(top_disk, node)
4411       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4412       msg = result.fail_msg
4413       if msg:
4414         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4415                       disk.iv_name, node, msg)
4416         if not ignore_primary or node != instance.primary_node:
4417           all_result = False
4418   return all_result
4419
4420
4421 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4422   """Checks if a node has enough free memory.
4423
4424   This function check if a given node has the needed amount of free
4425   memory. In case the node has less memory or we cannot get the
4426   information from the node, this function raise an OpPrereqError
4427   exception.
4428
4429   @type lu: C{LogicalUnit}
4430   @param lu: a logical unit from which we get configuration data
4431   @type node: C{str}
4432   @param node: the node to check
4433   @type reason: C{str}
4434   @param reason: string to use in the error message
4435   @type requested: C{int}
4436   @param requested: the amount of memory in MiB to check for
4437   @type hypervisor_name: C{str}
4438   @param hypervisor_name: the hypervisor to ask for memory stats
4439   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4440       we cannot check the node
4441
4442   """
4443   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4444   nodeinfo[node].Raise("Can't get data from node %s" % node,
4445                        prereq=True, ecode=errors.ECODE_ENVIRON)
4446   free_mem = nodeinfo[node].payload.get('memory_free', None)
4447   if not isinstance(free_mem, int):
4448     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4449                                " was '%s'" % (node, free_mem),
4450                                errors.ECODE_ENVIRON)
4451   if requested > free_mem:
4452     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4453                                " needed %s MiB, available %s MiB" %
4454                                (node, reason, requested, free_mem),
4455                                errors.ECODE_NORES)
4456
4457
4458 def _CheckNodesFreeDisk(lu, nodenames, requested):
4459   """Checks if nodes have enough free disk space in the default VG.
4460
4461   This function check if all given nodes have the needed amount of
4462   free disk. In case any node has less disk or we cannot get the
4463   information from the node, this function raise an OpPrereqError
4464   exception.
4465
4466   @type lu: C{LogicalUnit}
4467   @param lu: a logical unit from which we get configuration data
4468   @type nodenames: C{list}
4469   @param nodenames: the list of node names to check
4470   @type requested: C{int}
4471   @param requested: the amount of disk in MiB to check for
4472   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4473       we cannot check the node
4474
4475   """
4476   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4477                                    lu.cfg.GetHypervisorType())
4478   for node in nodenames:
4479     info = nodeinfo[node]
4480     info.Raise("Cannot get current information from node %s" % node,
4481                prereq=True, ecode=errors.ECODE_ENVIRON)
4482     vg_free = info.payload.get("vg_free", None)
4483     if not isinstance(vg_free, int):
4484       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4485                                  " result was '%s'" % (node, vg_free),
4486                                  errors.ECODE_ENVIRON)
4487     if requested > vg_free:
4488       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4489                                  " required %d MiB, available %d MiB" %
4490                                  (node, requested, vg_free),
4491                                  errors.ECODE_NORES)
4492
4493
4494 class LUStartupInstance(LogicalUnit):
4495   """Starts an instance.
4496
4497   """
4498   HPATH = "instance-start"
4499   HTYPE = constants.HTYPE_INSTANCE
4500   _OP_PARAMS = [
4501     _PInstanceName,
4502     _PForce,
4503     ("hvparams", _EmptyDict, _TDict),
4504     ("beparams", _EmptyDict, _TDict),
4505     ]
4506   REQ_BGL = False
4507
4508   def CheckArguments(self):
4509     # extra beparams
4510     if self.op.beparams:
4511       # fill the beparams dict
4512       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4513
4514   def ExpandNames(self):
4515     self._ExpandAndLockInstance()
4516
4517   def BuildHooksEnv(self):
4518     """Build hooks env.
4519
4520     This runs on master, primary and secondary nodes of the instance.
4521
4522     """
4523     env = {
4524       "FORCE": self.op.force,
4525       }
4526     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4527     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4528     return env, nl, nl
4529
4530   def CheckPrereq(self):
4531     """Check prerequisites.
4532
4533     This checks that the instance is in the cluster.
4534
4535     """
4536     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4537     assert self.instance is not None, \
4538       "Cannot retrieve locked instance %s" % self.op.instance_name
4539
4540     # extra hvparams
4541     if self.op.hvparams:
4542       # check hypervisor parameter syntax (locally)
4543       cluster = self.cfg.GetClusterInfo()
4544       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4545       filled_hvp = cluster.FillHV(instance)
4546       filled_hvp.update(self.op.hvparams)
4547       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4548       hv_type.CheckParameterSyntax(filled_hvp)
4549       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4550
4551     _CheckNodeOnline(self, instance.primary_node)
4552
4553     bep = self.cfg.GetClusterInfo().FillBE(instance)
4554     # check bridges existence
4555     _CheckInstanceBridgesExist(self, instance)
4556
4557     remote_info = self.rpc.call_instance_info(instance.primary_node,
4558                                               instance.name,
4559                                               instance.hypervisor)
4560     remote_info.Raise("Error checking node %s" % instance.primary_node,
4561                       prereq=True, ecode=errors.ECODE_ENVIRON)
4562     if not remote_info.payload: # not running already
4563       _CheckNodeFreeMemory(self, instance.primary_node,
4564                            "starting instance %s" % instance.name,
4565                            bep[constants.BE_MEMORY], instance.hypervisor)
4566
4567   def Exec(self, feedback_fn):
4568     """Start the instance.
4569
4570     """
4571     instance = self.instance
4572     force = self.op.force
4573
4574     self.cfg.MarkInstanceUp(instance.name)
4575
4576     node_current = instance.primary_node
4577
4578     _StartInstanceDisks(self, instance, force)
4579
4580     result = self.rpc.call_instance_start(node_current, instance,
4581                                           self.op.hvparams, self.op.beparams)
4582     msg = result.fail_msg
4583     if msg:
4584       _ShutdownInstanceDisks(self, instance)
4585       raise errors.OpExecError("Could not start instance: %s" % msg)
4586
4587
4588 class LURebootInstance(LogicalUnit):
4589   """Reboot an instance.
4590
4591   """
4592   HPATH = "instance-reboot"
4593   HTYPE = constants.HTYPE_INSTANCE
4594   _OP_PARAMS = [
4595     _PInstanceName,
4596     ("ignore_secondaries", False, _TBool),
4597     ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4598     _PShutdownTimeout,
4599     ]
4600   REQ_BGL = False
4601
4602   def ExpandNames(self):
4603     self._ExpandAndLockInstance()
4604
4605   def BuildHooksEnv(self):
4606     """Build hooks env.
4607
4608     This runs on master, primary and secondary nodes of the instance.
4609
4610     """
4611     env = {
4612       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4613       "REBOOT_TYPE": self.op.reboot_type,
4614       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4615       }
4616     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4617     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4618     return env, nl, nl
4619
4620   def CheckPrereq(self):
4621     """Check prerequisites.
4622
4623     This checks that the instance is in the cluster.
4624
4625     """
4626     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4627     assert self.instance is not None, \
4628       "Cannot retrieve locked instance %s" % self.op.instance_name
4629
4630     _CheckNodeOnline(self, instance.primary_node)
4631
4632     # check bridges existence
4633     _CheckInstanceBridgesExist(self, instance)
4634
4635   def Exec(self, feedback_fn):
4636     """Reboot the instance.
4637
4638     """
4639     instance = self.instance
4640     ignore_secondaries = self.op.ignore_secondaries
4641     reboot_type = self.op.reboot_type
4642
4643     node_current = instance.primary_node
4644
4645     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4646                        constants.INSTANCE_REBOOT_HARD]:
4647       for disk in instance.disks:
4648         self.cfg.SetDiskID(disk, node_current)
4649       result = self.rpc.call_instance_reboot(node_current, instance,
4650                                              reboot_type,
4651                                              self.op.shutdown_timeout)
4652       result.Raise("Could not reboot instance")
4653     else:
4654       result = self.rpc.call_instance_shutdown(node_current, instance,
4655                                                self.op.shutdown_timeout)
4656       result.Raise("Could not shutdown instance for full reboot")
4657       _ShutdownInstanceDisks(self, instance)
4658       _StartInstanceDisks(self, instance, ignore_secondaries)
4659       result = self.rpc.call_instance_start(node_current, instance, None, None)
4660       msg = result.fail_msg
4661       if msg:
4662         _ShutdownInstanceDisks(self, instance)
4663         raise errors.OpExecError("Could not start instance for"
4664                                  " full reboot: %s" % msg)
4665
4666     self.cfg.MarkInstanceUp(instance.name)
4667
4668
4669 class LUShutdownInstance(LogicalUnit):
4670   """Shutdown an instance.
4671
4672   """
4673   HPATH = "instance-stop"
4674   HTYPE = constants.HTYPE_INSTANCE
4675   _OP_PARAMS = [
4676     _PInstanceName,
4677     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4678     ]
4679   REQ_BGL = False
4680
4681   def ExpandNames(self):
4682     self._ExpandAndLockInstance()
4683
4684   def BuildHooksEnv(self):
4685     """Build hooks env.
4686
4687     This runs on master, primary and secondary nodes of the instance.
4688
4689     """
4690     env = _BuildInstanceHookEnvByObject(self, self.instance)
4691     env["TIMEOUT"] = self.op.timeout
4692     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4693     return env, nl, nl
4694
4695   def CheckPrereq(self):
4696     """Check prerequisites.
4697
4698     This checks that the instance is in the cluster.
4699
4700     """
4701     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4702     assert self.instance is not None, \
4703       "Cannot retrieve locked instance %s" % self.op.instance_name
4704     _CheckNodeOnline(self, self.instance.primary_node)
4705
4706   def Exec(self, feedback_fn):
4707     """Shutdown the instance.
4708
4709     """
4710     instance = self.instance
4711     node_current = instance.primary_node
4712     timeout = self.op.timeout
4713     self.cfg.MarkInstanceDown(instance.name)
4714     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4715     msg = result.fail_msg
4716     if msg:
4717       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4718
4719     _ShutdownInstanceDisks(self, instance)
4720
4721
4722 class LUReinstallInstance(LogicalUnit):
4723   """Reinstall an instance.
4724
4725   """
4726   HPATH = "instance-reinstall"
4727   HTYPE = constants.HTYPE_INSTANCE
4728   _OP_PARAMS = [
4729     _PInstanceName,
4730     ("os_type", None, _TMaybeString),
4731     ("force_variant", False, _TBool),
4732     ]
4733   REQ_BGL = False
4734
4735   def ExpandNames(self):
4736     self._ExpandAndLockInstance()
4737
4738   def BuildHooksEnv(self):
4739     """Build hooks env.
4740
4741     This runs on master, primary and secondary nodes of the instance.
4742
4743     """
4744     env = _BuildInstanceHookEnvByObject(self, self.instance)
4745     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4746     return env, nl, nl
4747
4748   def CheckPrereq(self):
4749     """Check prerequisites.
4750
4751     This checks that the instance is in the cluster and is not running.
4752
4753     """
4754     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4755     assert instance is not None, \
4756       "Cannot retrieve locked instance %s" % self.op.instance_name
4757     _CheckNodeOnline(self, instance.primary_node)
4758
4759     if instance.disk_template == constants.DT_DISKLESS:
4760       raise errors.OpPrereqError("Instance '%s' has no disks" %
4761                                  self.op.instance_name,
4762                                  errors.ECODE_INVAL)
4763     _CheckInstanceDown(self, instance, "cannot reinstall")
4764
4765     if self.op.os_type is not None:
4766       # OS verification
4767       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4768       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4769
4770     self.instance = instance
4771
4772   def Exec(self, feedback_fn):
4773     """Reinstall the instance.
4774
4775     """
4776     inst = self.instance
4777
4778     if self.op.os_type is not None:
4779       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4780       inst.os = self.op.os_type
4781       self.cfg.Update(inst, feedback_fn)
4782
4783     _StartInstanceDisks(self, inst, None)
4784     try:
4785       feedback_fn("Running the instance OS create scripts...")
4786       # FIXME: pass debug option from opcode to backend
4787       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4788                                              self.op.debug_level)
4789       result.Raise("Could not install OS for instance %s on node %s" %
4790                    (inst.name, inst.primary_node))
4791     finally:
4792       _ShutdownInstanceDisks(self, inst)
4793
4794
4795 class LURecreateInstanceDisks(LogicalUnit):
4796   """Recreate an instance's missing disks.
4797
4798   """
4799   HPATH = "instance-recreate-disks"
4800   HTYPE = constants.HTYPE_INSTANCE
4801   _OP_PARAMS = [
4802     _PInstanceName,
4803     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4804     ]
4805   REQ_BGL = False
4806
4807   def ExpandNames(self):
4808     self._ExpandAndLockInstance()
4809
4810   def BuildHooksEnv(self):
4811     """Build hooks env.
4812
4813     This runs on master, primary and secondary nodes of the instance.
4814
4815     """
4816     env = _BuildInstanceHookEnvByObject(self, self.instance)
4817     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4818     return env, nl, nl
4819
4820   def CheckPrereq(self):
4821     """Check prerequisites.
4822
4823     This checks that the instance is in the cluster and is not running.
4824
4825     """
4826     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4827     assert instance is not None, \
4828       "Cannot retrieve locked instance %s" % self.op.instance_name
4829     _CheckNodeOnline(self, instance.primary_node)
4830
4831     if instance.disk_template == constants.DT_DISKLESS:
4832       raise errors.OpPrereqError("Instance '%s' has no disks" %
4833                                  self.op.instance_name, errors.ECODE_INVAL)
4834     _CheckInstanceDown(self, instance, "cannot recreate disks")
4835
4836     if not self.op.disks:
4837       self.op.disks = range(len(instance.disks))
4838     else:
4839       for idx in self.op.disks:
4840         if idx >= len(instance.disks):
4841           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4842                                      errors.ECODE_INVAL)
4843
4844     self.instance = instance
4845
4846   def Exec(self, feedback_fn):
4847     """Recreate the disks.
4848
4849     """
4850     to_skip = []
4851     for idx, _ in enumerate(self.instance.disks):
4852       if idx not in self.op.disks: # disk idx has not been passed in
4853         to_skip.append(idx)
4854         continue
4855
4856     _CreateDisks(self, self.instance, to_skip=to_skip)
4857
4858
4859 class LURenameInstance(LogicalUnit):
4860   """Rename an instance.
4861
4862   """
4863   HPATH = "instance-rename"
4864   HTYPE = constants.HTYPE_INSTANCE
4865   _OP_PARAMS = [
4866     _PInstanceName,
4867     ("new_name", _NoDefault, _TNonEmptyString),
4868     ("ignore_ip", False, _TBool),
4869     ("check_name", True, _TBool),
4870     ]
4871
4872   def BuildHooksEnv(self):
4873     """Build hooks env.
4874
4875     This runs on master, primary and secondary nodes of the instance.
4876
4877     """
4878     env = _BuildInstanceHookEnvByObject(self, self.instance)
4879     env["INSTANCE_NEW_NAME"] = self.op.new_name
4880     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4881     return env, nl, nl
4882
4883   def CheckPrereq(self):
4884     """Check prerequisites.
4885
4886     This checks that the instance is in the cluster and is not running.
4887
4888     """
4889     self.op.instance_name = _ExpandInstanceName(self.cfg,
4890                                                 self.op.instance_name)
4891     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4892     assert instance is not None
4893     _CheckNodeOnline(self, instance.primary_node)
4894     _CheckInstanceDown(self, instance, "cannot rename")
4895     self.instance = instance
4896
4897     # new name verification
4898     if self.op.check_name:
4899       name_info = netutils.GetHostInfo(self.op.new_name)
4900       self.op.new_name = name_info.name
4901
4902     new_name = self.op.new_name
4903
4904     instance_list = self.cfg.GetInstanceList()
4905     if new_name in instance_list:
4906       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4907                                  new_name, errors.ECODE_EXISTS)
4908
4909     if not self.op.ignore_ip:
4910       if netutils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4911         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4912                                    (name_info.ip, new_name),
4913                                    errors.ECODE_NOTUNIQUE)
4914
4915   def Exec(self, feedback_fn):
4916     """Reinstall the instance.
4917
4918     """
4919     inst = self.instance
4920     old_name = inst.name
4921
4922     if inst.disk_template == constants.DT_FILE:
4923       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4924
4925     self.cfg.RenameInstance(inst.name, self.op.new_name)
4926     # Change the instance lock. This is definitely safe while we hold the BGL
4927     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4928     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4929
4930     # re-read the instance from the configuration after rename
4931     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4932
4933     if inst.disk_template == constants.DT_FILE:
4934       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4935       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4936                                                      old_file_storage_dir,
4937                                                      new_file_storage_dir)
4938       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4939                    " (but the instance has been renamed in Ganeti)" %
4940                    (inst.primary_node, old_file_storage_dir,
4941                     new_file_storage_dir))
4942
4943     _StartInstanceDisks(self, inst, None)
4944     try:
4945       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4946                                                  old_name, self.op.debug_level)
4947       msg = result.fail_msg
4948       if msg:
4949         msg = ("Could not run OS rename script for instance %s on node %s"
4950                " (but the instance has been renamed in Ganeti): %s" %
4951                (inst.name, inst.primary_node, msg))
4952         self.proc.LogWarning(msg)
4953     finally:
4954       _ShutdownInstanceDisks(self, inst)
4955
4956
4957 class LURemoveInstance(LogicalUnit):
4958   """Remove an instance.
4959
4960   """
4961   HPATH = "instance-remove"
4962   HTYPE = constants.HTYPE_INSTANCE
4963   _OP_PARAMS = [
4964     _PInstanceName,
4965     ("ignore_failures", False, _TBool),
4966     _PShutdownTimeout,
4967     ]
4968   REQ_BGL = False
4969
4970   def ExpandNames(self):
4971     self._ExpandAndLockInstance()
4972     self.needed_locks[locking.LEVEL_NODE] = []
4973     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4974
4975   def DeclareLocks(self, level):
4976     if level == locking.LEVEL_NODE:
4977       self._LockInstancesNodes()
4978
4979   def BuildHooksEnv(self):
4980     """Build hooks env.
4981
4982     This runs on master, primary and secondary nodes of the instance.
4983
4984     """
4985     env = _BuildInstanceHookEnvByObject(self, self.instance)
4986     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4987     nl = [self.cfg.GetMasterNode()]
4988     nl_post = list(self.instance.all_nodes) + nl
4989     return env, nl, nl_post
4990
4991   def CheckPrereq(self):
4992     """Check prerequisites.
4993
4994     This checks that the instance is in the cluster.
4995
4996     """
4997     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4998     assert self.instance is not None, \
4999       "Cannot retrieve locked instance %s" % self.op.instance_name
5000
5001   def Exec(self, feedback_fn):
5002     """Remove the instance.
5003
5004     """
5005     instance = self.instance
5006     logging.info("Shutting down instance %s on node %s",
5007                  instance.name, instance.primary_node)
5008
5009     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5010                                              self.op.shutdown_timeout)
5011     msg = result.fail_msg
5012     if msg:
5013       if self.op.ignore_failures:
5014         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5015       else:
5016         raise errors.OpExecError("Could not shutdown instance %s on"
5017                                  " node %s: %s" %
5018                                  (instance.name, instance.primary_node, msg))
5019
5020     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5021
5022
5023 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5024   """Utility function to remove an instance.
5025
5026   """
5027   logging.info("Removing block devices for instance %s", instance.name)
5028
5029   if not _RemoveDisks(lu, instance):
5030     if not ignore_failures:
5031       raise errors.OpExecError("Can't remove instance's disks")
5032     feedback_fn("Warning: can't remove instance's disks")
5033
5034   logging.info("Removing instance %s out of cluster config", instance.name)
5035
5036   lu.cfg.RemoveInstance(instance.name)
5037
5038   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5039     "Instance lock removal conflict"
5040
5041   # Remove lock for the instance
5042   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5043
5044
5045 class LUQueryInstances(NoHooksLU):
5046   """Logical unit for querying instances.
5047
5048   """
5049   # pylint: disable-msg=W0142
5050   _OP_PARAMS = [
5051     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5052     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5053     ("use_locking", False, _TBool),
5054     ]
5055   REQ_BGL = False
5056   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5057                     "serial_no", "ctime", "mtime", "uuid"]
5058   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5059                                     "admin_state",
5060                                     "disk_template", "ip", "mac", "bridge",
5061                                     "nic_mode", "nic_link",
5062                                     "sda_size", "sdb_size", "vcpus", "tags",
5063                                     "network_port", "beparams",
5064                                     r"(disk)\.(size)/([0-9]+)",
5065                                     r"(disk)\.(sizes)", "disk_usage",
5066                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5067                                     r"(nic)\.(bridge)/([0-9]+)",
5068                                     r"(nic)\.(macs|ips|modes|links|bridges)",
5069                                     r"(disk|nic)\.(count)",
5070                                     "hvparams",
5071                                     ] + _SIMPLE_FIELDS +
5072                                   ["hv/%s" % name
5073                                    for name in constants.HVS_PARAMETERS
5074                                    if name not in constants.HVC_GLOBALS] +
5075                                   ["be/%s" % name
5076                                    for name in constants.BES_PARAMETERS])
5077   _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5078                                    "oper_ram",
5079                                    "oper_vcpus",
5080                                    "status")
5081
5082
5083   def CheckArguments(self):
5084     _CheckOutputFields(static=self._FIELDS_STATIC,
5085                        dynamic=self._FIELDS_DYNAMIC,
5086                        selected=self.op.output_fields)
5087
5088   def ExpandNames(self):
5089     self.needed_locks = {}
5090     self.share_locks[locking.LEVEL_INSTANCE] = 1
5091     self.share_locks[locking.LEVEL_NODE] = 1
5092
5093     if self.op.names:
5094       self.wanted = _GetWantedInstances(self, self.op.names)
5095     else:
5096       self.wanted = locking.ALL_SET
5097
5098     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5099     self.do_locking = self.do_node_query and self.op.use_locking
5100     if self.do_locking:
5101       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5102       self.needed_locks[locking.LEVEL_NODE] = []
5103       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5104
5105   def DeclareLocks(self, level):
5106     if level == locking.LEVEL_NODE and self.do_locking:
5107       self._LockInstancesNodes()
5108
5109   def Exec(self, feedback_fn):
5110     """Computes the list of nodes and their attributes.
5111
5112     """
5113     # pylint: disable-msg=R0912
5114     # way too many branches here
5115     all_info = self.cfg.GetAllInstancesInfo()
5116     if self.wanted == locking.ALL_SET:
5117       # caller didn't specify instance names, so ordering is not important
5118       if self.do_locking:
5119         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5120       else:
5121         instance_names = all_info.keys()
5122       instance_names = utils.NiceSort(instance_names)
5123     else:
5124       # caller did specify names, so we must keep the ordering
5125       if self.do_locking:
5126         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5127       else:
5128         tgt_set = all_info.keys()
5129       missing = set(self.wanted).difference(tgt_set)
5130       if missing:
5131         raise errors.OpExecError("Some instances were removed before"
5132                                  " retrieving their data: %s" % missing)
5133       instance_names = self.wanted
5134
5135     instance_list = [all_info[iname] for iname in instance_names]
5136
5137     # begin data gathering
5138
5139     nodes = frozenset([inst.primary_node for inst in instance_list])
5140     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5141
5142     bad_nodes = []
5143     off_nodes = []
5144     if self.do_node_query:
5145       live_data = {}
5146       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5147       for name in nodes:
5148         result = node_data[name]
5149         if result.offline:
5150           # offline nodes will be in both lists
5151           off_nodes.append(name)
5152         if result.fail_msg:
5153           bad_nodes.append(name)
5154         else:
5155           if result.payload:
5156             live_data.update(result.payload)
5157           # else no instance is alive
5158     else:
5159       live_data = dict([(name, {}) for name in instance_names])
5160
5161     # end data gathering
5162
5163     HVPREFIX = "hv/"
5164     BEPREFIX = "be/"
5165     output = []
5166     cluster = self.cfg.GetClusterInfo()
5167     for instance in instance_list:
5168       iout = []
5169       i_hv = cluster.FillHV(instance, skip_globals=True)
5170       i_be = cluster.FillBE(instance)
5171       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5172       for field in self.op.output_fields:
5173         st_match = self._FIELDS_STATIC.Matches(field)
5174         if field in self._SIMPLE_FIELDS:
5175           val = getattr(instance, field)
5176         elif field == "pnode":
5177           val = instance.primary_node
5178         elif field == "snodes":
5179           val = list(instance.secondary_nodes)
5180         elif field == "admin_state":
5181           val = instance.admin_up
5182         elif field == "oper_state":
5183           if instance.primary_node in bad_nodes:
5184             val = None
5185           else:
5186             val = bool(live_data.get(instance.name))
5187         elif field == "status":
5188           if instance.primary_node in off_nodes:
5189             val = "ERROR_nodeoffline"
5190           elif instance.primary_node in bad_nodes:
5191             val = "ERROR_nodedown"
5192           else:
5193             running = bool(live_data.get(instance.name))
5194             if running:
5195               if instance.admin_up:
5196                 val = "running"
5197               else:
5198                 val = "ERROR_up"
5199             else:
5200               if instance.admin_up:
5201                 val = "ERROR_down"
5202               else:
5203                 val = "ADMIN_down"
5204         elif field == "oper_ram":
5205           if instance.primary_node in bad_nodes:
5206             val = None
5207           elif instance.name in live_data:
5208             val = live_data[instance.name].get("memory", "?")
5209           else:
5210             val = "-"
5211         elif field == "oper_vcpus":
5212           if instance.primary_node in bad_nodes:
5213             val = None
5214           elif instance.name in live_data:
5215             val = live_data[instance.name].get("vcpus", "?")
5216           else:
5217             val = "-"
5218         elif field == "vcpus":
5219           val = i_be[constants.BE_VCPUS]
5220         elif field == "disk_template":
5221           val = instance.disk_template
5222         elif field == "ip":
5223           if instance.nics:
5224             val = instance.nics[0].ip
5225           else:
5226             val = None
5227         elif field == "nic_mode":
5228           if instance.nics:
5229             val = i_nicp[0][constants.NIC_MODE]
5230           else:
5231             val = None
5232         elif field == "nic_link":
5233           if instance.nics:
5234             val = i_nicp[0][constants.NIC_LINK]
5235           else:
5236             val = None
5237         elif field == "bridge":
5238           if (instance.nics and
5239               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5240             val = i_nicp[0][constants.NIC_LINK]
5241           else:
5242             val = None
5243         elif field == "mac":
5244           if instance.nics:
5245             val = instance.nics[0].mac
5246           else:
5247             val = None
5248         elif field == "sda_size" or field == "sdb_size":
5249           idx = ord(field[2]) - ord('a')
5250           try:
5251             val = instance.FindDisk(idx).size
5252           except errors.OpPrereqError:
5253             val = None
5254         elif field == "disk_usage": # total disk usage per node
5255           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5256           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5257         elif field == "tags":
5258           val = list(instance.GetTags())
5259         elif field == "hvparams":
5260           val = i_hv
5261         elif (field.startswith(HVPREFIX) and
5262               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5263               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5264           val = i_hv.get(field[len(HVPREFIX):], None)
5265         elif field == "beparams":
5266           val = i_be
5267         elif (field.startswith(BEPREFIX) and
5268               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5269           val = i_be.get(field[len(BEPREFIX):], None)
5270         elif st_match and st_match.groups():
5271           # matches a variable list
5272           st_groups = st_match.groups()
5273           if st_groups and st_groups[0] == "disk":
5274             if st_groups[1] == "count":
5275               val = len(instance.disks)
5276             elif st_groups[1] == "sizes":
5277               val = [disk.size for disk in instance.disks]
5278             elif st_groups[1] == "size":
5279               try:
5280                 val = instance.FindDisk(st_groups[2]).size
5281               except errors.OpPrereqError:
5282                 val = None
5283             else:
5284               assert False, "Unhandled disk parameter"
5285           elif st_groups[0] == "nic":
5286             if st_groups[1] == "count":
5287               val = len(instance.nics)
5288             elif st_groups[1] == "macs":
5289               val = [nic.mac for nic in instance.nics]
5290             elif st_groups[1] == "ips":
5291               val = [nic.ip for nic in instance.nics]
5292             elif st_groups[1] == "modes":
5293               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5294             elif st_groups[1] == "links":
5295               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5296             elif st_groups[1] == "bridges":
5297               val = []
5298               for nicp in i_nicp:
5299                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5300                   val.append(nicp[constants.NIC_LINK])
5301                 else:
5302                   val.append(None)
5303             else:
5304               # index-based item
5305               nic_idx = int(st_groups[2])
5306               if nic_idx >= len(instance.nics):
5307                 val = None
5308               else:
5309                 if st_groups[1] == "mac":
5310                   val = instance.nics[nic_idx].mac
5311                 elif st_groups[1] == "ip":
5312                   val = instance.nics[nic_idx].ip
5313                 elif st_groups[1] == "mode":
5314                   val = i_nicp[nic_idx][constants.NIC_MODE]
5315                 elif st_groups[1] == "link":
5316                   val = i_nicp[nic_idx][constants.NIC_LINK]
5317                 elif st_groups[1] == "bridge":
5318                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5319                   if nic_mode == constants.NIC_MODE_BRIDGED:
5320                     val = i_nicp[nic_idx][constants.NIC_LINK]
5321                   else:
5322                     val = None
5323                 else:
5324                   assert False, "Unhandled NIC parameter"
5325           else:
5326             assert False, ("Declared but unhandled variable parameter '%s'" %
5327                            field)
5328         else:
5329           assert False, "Declared but unhandled parameter '%s'" % field
5330         iout.append(val)
5331       output.append(iout)
5332
5333     return output
5334
5335
5336 class LUFailoverInstance(LogicalUnit):
5337   """Failover an instance.
5338
5339   """
5340   HPATH = "instance-failover"
5341   HTYPE = constants.HTYPE_INSTANCE
5342   _OP_PARAMS = [
5343     _PInstanceName,
5344     ("ignore_consistency", False, _TBool),
5345     _PShutdownTimeout,
5346     ]
5347   REQ_BGL = False
5348
5349   def ExpandNames(self):
5350     self._ExpandAndLockInstance()
5351     self.needed_locks[locking.LEVEL_NODE] = []
5352     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5353
5354   def DeclareLocks(self, level):
5355     if level == locking.LEVEL_NODE:
5356       self._LockInstancesNodes()
5357
5358   def BuildHooksEnv(self):
5359     """Build hooks env.
5360
5361     This runs on master, primary and secondary nodes of the instance.
5362
5363     """
5364     instance = self.instance
5365     source_node = instance.primary_node
5366     target_node = instance.secondary_nodes[0]
5367     env = {
5368       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5369       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5370       "OLD_PRIMARY": source_node,
5371       "OLD_SECONDARY": target_node,
5372       "NEW_PRIMARY": target_node,
5373       "NEW_SECONDARY": source_node,
5374       }
5375     env.update(_BuildInstanceHookEnvByObject(self, instance))
5376     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5377     nl_post = list(nl)
5378     nl_post.append(source_node)
5379     return env, nl, nl_post
5380
5381   def CheckPrereq(self):
5382     """Check prerequisites.
5383
5384     This checks that the instance is in the cluster.
5385
5386     """
5387     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5388     assert self.instance is not None, \
5389       "Cannot retrieve locked instance %s" % self.op.instance_name
5390
5391     bep = self.cfg.GetClusterInfo().FillBE(instance)
5392     if instance.disk_template not in constants.DTS_NET_MIRROR:
5393       raise errors.OpPrereqError("Instance's disk layout is not"
5394                                  " network mirrored, cannot failover.",
5395                                  errors.ECODE_STATE)
5396
5397     secondary_nodes = instance.secondary_nodes
5398     if not secondary_nodes:
5399       raise errors.ProgrammerError("no secondary node but using "
5400                                    "a mirrored disk template")
5401
5402     target_node = secondary_nodes[0]
5403     _CheckNodeOnline(self, target_node)
5404     _CheckNodeNotDrained(self, target_node)
5405     if instance.admin_up:
5406       # check memory requirements on the secondary node
5407       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5408                            instance.name, bep[constants.BE_MEMORY],
5409                            instance.hypervisor)
5410     else:
5411       self.LogInfo("Not checking memory on the secondary node as"
5412                    " instance will not be started")
5413
5414     # check bridge existance
5415     _CheckInstanceBridgesExist(self, instance, node=target_node)
5416
5417   def Exec(self, feedback_fn):
5418     """Failover an instance.
5419
5420     The failover is done by shutting it down on its present node and
5421     starting it on the secondary.
5422
5423     """
5424     instance = self.instance
5425
5426     source_node = instance.primary_node
5427     target_node = instance.secondary_nodes[0]
5428
5429     if instance.admin_up:
5430       feedback_fn("* checking disk consistency between source and target")
5431       for dev in instance.disks:
5432         # for drbd, these are drbd over lvm
5433         if not _CheckDiskConsistency(self, dev, target_node, False):
5434           if not self.op.ignore_consistency:
5435             raise errors.OpExecError("Disk %s is degraded on target node,"
5436                                      " aborting failover." % dev.iv_name)
5437     else:
5438       feedback_fn("* not checking disk consistency as instance is not running")
5439
5440     feedback_fn("* shutting down instance on source node")
5441     logging.info("Shutting down instance %s on node %s",
5442                  instance.name, source_node)
5443
5444     result = self.rpc.call_instance_shutdown(source_node, instance,
5445                                              self.op.shutdown_timeout)
5446     msg = result.fail_msg
5447     if msg:
5448       if self.op.ignore_consistency:
5449         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5450                              " Proceeding anyway. Please make sure node"
5451                              " %s is down. Error details: %s",
5452                              instance.name, source_node, source_node, msg)
5453       else:
5454         raise errors.OpExecError("Could not shutdown instance %s on"
5455                                  " node %s: %s" %
5456                                  (instance.name, source_node, msg))
5457
5458     feedback_fn("* deactivating the instance's disks on source node")
5459     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5460       raise errors.OpExecError("Can't shut down the instance's disks.")
5461
5462     instance.primary_node = target_node
5463     # distribute new instance config to the other nodes
5464     self.cfg.Update(instance, feedback_fn)
5465
5466     # Only start the instance if it's marked as up
5467     if instance.admin_up:
5468       feedback_fn("* activating the instance's disks on target node")
5469       logging.info("Starting instance %s on node %s",
5470                    instance.name, target_node)
5471
5472       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5473                                            ignore_secondaries=True)
5474       if not disks_ok:
5475         _ShutdownInstanceDisks(self, instance)
5476         raise errors.OpExecError("Can't activate the instance's disks")
5477
5478       feedback_fn("* starting the instance on the target node")
5479       result = self.rpc.call_instance_start(target_node, instance, None, None)
5480       msg = result.fail_msg
5481       if msg:
5482         _ShutdownInstanceDisks(self, instance)
5483         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5484                                  (instance.name, target_node, msg))
5485
5486
5487 class LUMigrateInstance(LogicalUnit):
5488   """Migrate an instance.
5489
5490   This is migration without shutting down, compared to the failover,
5491   which is done with shutdown.
5492
5493   """
5494   HPATH = "instance-migrate"
5495   HTYPE = constants.HTYPE_INSTANCE
5496   _OP_PARAMS = [
5497     _PInstanceName,
5498     _PMigrationMode,
5499     ("cleanup", False, _TBool),
5500     ]
5501
5502   REQ_BGL = False
5503
5504   def ExpandNames(self):
5505     self._ExpandAndLockInstance()
5506
5507     self.needed_locks[locking.LEVEL_NODE] = []
5508     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5509
5510     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5511                                        self.op.cleanup)
5512     self.tasklets = [self._migrater]
5513
5514   def DeclareLocks(self, level):
5515     if level == locking.LEVEL_NODE:
5516       self._LockInstancesNodes()
5517
5518   def BuildHooksEnv(self):
5519     """Build hooks env.
5520
5521     This runs on master, primary and secondary nodes of the instance.
5522
5523     """
5524     instance = self._migrater.instance
5525     source_node = instance.primary_node
5526     target_node = instance.secondary_nodes[0]
5527     env = _BuildInstanceHookEnvByObject(self, instance)
5528     env["MIGRATE_LIVE"] = self._migrater.live
5529     env["MIGRATE_CLEANUP"] = self.op.cleanup
5530     env.update({
5531         "OLD_PRIMARY": source_node,
5532         "OLD_SECONDARY": target_node,
5533         "NEW_PRIMARY": target_node,
5534         "NEW_SECONDARY": source_node,
5535         })
5536     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5537     nl_post = list(nl)
5538     nl_post.append(source_node)
5539     return env, nl, nl_post
5540
5541
5542 class LUMoveInstance(LogicalUnit):
5543   """Move an instance by data-copying.
5544
5545   """
5546   HPATH = "instance-move"
5547   HTYPE = constants.HTYPE_INSTANCE
5548   _OP_PARAMS = [
5549     _PInstanceName,
5550     ("target_node", _NoDefault, _TNonEmptyString),
5551     _PShutdownTimeout,
5552     ]
5553   REQ_BGL = False
5554
5555   def ExpandNames(self):
5556     self._ExpandAndLockInstance()
5557     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5558     self.op.target_node = target_node
5559     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5560     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5561
5562   def DeclareLocks(self, level):
5563     if level == locking.LEVEL_NODE:
5564       self._LockInstancesNodes(primary_only=True)
5565
5566   def BuildHooksEnv(self):
5567     """Build hooks env.
5568
5569     This runs on master, primary and secondary nodes of the instance.
5570
5571     """
5572     env = {
5573       "TARGET_NODE": self.op.target_node,
5574       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5575       }
5576     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5577     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5578                                        self.op.target_node]
5579     return env, nl, nl
5580
5581   def CheckPrereq(self):
5582     """Check prerequisites.
5583
5584     This checks that the instance is in the cluster.
5585
5586     """
5587     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5588     assert self.instance is not None, \
5589       "Cannot retrieve locked instance %s" % self.op.instance_name
5590
5591     node = self.cfg.GetNodeInfo(self.op.target_node)
5592     assert node is not None, \
5593       "Cannot retrieve locked node %s" % self.op.target_node
5594
5595     self.target_node = target_node = node.name
5596
5597     if target_node == instance.primary_node:
5598       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5599                                  (instance.name, target_node),
5600                                  errors.ECODE_STATE)
5601
5602     bep = self.cfg.GetClusterInfo().FillBE(instance)
5603
5604     for idx, dsk in enumerate(instance.disks):
5605       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5606         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5607                                    " cannot copy" % idx, errors.ECODE_STATE)
5608
5609     _CheckNodeOnline(self, target_node)
5610     _CheckNodeNotDrained(self, target_node)
5611
5612     if instance.admin_up:
5613       # check memory requirements on the secondary node
5614       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5615                            instance.name, bep[constants.BE_MEMORY],
5616                            instance.hypervisor)
5617     else:
5618       self.LogInfo("Not checking memory on the secondary node as"
5619                    " instance will not be started")
5620
5621     # check bridge existance
5622     _CheckInstanceBridgesExist(self, instance, node=target_node)
5623
5624   def Exec(self, feedback_fn):
5625     """Move an instance.
5626
5627     The move is done by shutting it down on its present node, copying
5628     the data over (slow) and starting it on the new node.
5629
5630     """
5631     instance = self.instance
5632
5633     source_node = instance.primary_node
5634     target_node = self.target_node
5635
5636     self.LogInfo("Shutting down instance %s on source node %s",
5637                  instance.name, source_node)
5638
5639     result = self.rpc.call_instance_shutdown(source_node, instance,
5640                                              self.op.shutdown_timeout)
5641     msg = result.fail_msg
5642     if msg:
5643       if self.op.ignore_consistency:
5644         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5645                              " Proceeding anyway. Please make sure node"
5646                              " %s is down. Error details: %s",
5647                              instance.name, source_node, source_node, msg)
5648       else:
5649         raise errors.OpExecError("Could not shutdown instance %s on"
5650                                  " node %s: %s" %
5651                                  (instance.name, source_node, msg))
5652
5653     # create the target disks
5654     try:
5655       _CreateDisks(self, instance, target_node=target_node)
5656     except errors.OpExecError:
5657       self.LogWarning("Device creation failed, reverting...")
5658       try:
5659         _RemoveDisks(self, instance, target_node=target_node)
5660       finally:
5661         self.cfg.ReleaseDRBDMinors(instance.name)
5662         raise
5663
5664     cluster_name = self.cfg.GetClusterInfo().cluster_name
5665
5666     errs = []
5667     # activate, get path, copy the data over
5668     for idx, disk in enumerate(instance.disks):
5669       self.LogInfo("Copying data for disk %d", idx)
5670       result = self.rpc.call_blockdev_assemble(target_node, disk,
5671                                                instance.name, True)
5672       if result.fail_msg:
5673         self.LogWarning("Can't assemble newly created disk %d: %s",
5674                         idx, result.fail_msg)
5675         errs.append(result.fail_msg)
5676         break
5677       dev_path = result.payload
5678       result = self.rpc.call_blockdev_export(source_node, disk,
5679                                              target_node, dev_path,
5680                                              cluster_name)
5681       if result.fail_msg:
5682         self.LogWarning("Can't copy data over for disk %d: %s",
5683                         idx, result.fail_msg)
5684         errs.append(result.fail_msg)
5685         break
5686
5687     if errs:
5688       self.LogWarning("Some disks failed to copy, aborting")
5689       try:
5690         _RemoveDisks(self, instance, target_node=target_node)
5691       finally:
5692         self.cfg.ReleaseDRBDMinors(instance.name)
5693         raise errors.OpExecError("Errors during disk copy: %s" %
5694                                  (",".join(errs),))
5695
5696     instance.primary_node = target_node
5697     self.cfg.Update(instance, feedback_fn)
5698
5699     self.LogInfo("Removing the disks on the original node")
5700     _RemoveDisks(self, instance, target_node=source_node)
5701
5702     # Only start the instance if it's marked as up
5703     if instance.admin_up:
5704       self.LogInfo("Starting instance %s on node %s",
5705                    instance.name, target_node)
5706
5707       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5708                                            ignore_secondaries=True)
5709       if not disks_ok:
5710         _ShutdownInstanceDisks(self, instance)
5711         raise errors.OpExecError("Can't activate the instance's disks")
5712
5713       result = self.rpc.call_instance_start(target_node, instance, None, None)
5714       msg = result.fail_msg
5715       if msg:
5716         _ShutdownInstanceDisks(self, instance)
5717         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5718                                  (instance.name, target_node, msg))
5719
5720
5721 class LUMigrateNode(LogicalUnit):
5722   """Migrate all instances from a node.
5723
5724   """
5725   HPATH = "node-migrate"
5726   HTYPE = constants.HTYPE_NODE
5727   _OP_PARAMS = [
5728     _PNodeName,
5729     _PMigrationMode,
5730     ]
5731   REQ_BGL = False
5732
5733   def ExpandNames(self):
5734     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5735
5736     self.needed_locks = {
5737       locking.LEVEL_NODE: [self.op.node_name],
5738       }
5739
5740     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5741
5742     # Create tasklets for migrating instances for all instances on this node
5743     names = []
5744     tasklets = []
5745
5746     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5747       logging.debug("Migrating instance %s", inst.name)
5748       names.append(inst.name)
5749
5750       tasklets.append(TLMigrateInstance(self, inst.name, False))
5751
5752     self.tasklets = tasklets
5753
5754     # Declare instance locks
5755     self.needed_locks[locking.LEVEL_INSTANCE] = names
5756
5757   def DeclareLocks(self, level):
5758     if level == locking.LEVEL_NODE:
5759       self._LockInstancesNodes()
5760
5761   def BuildHooksEnv(self):
5762     """Build hooks env.
5763
5764     This runs on the master, the primary and all the secondaries.
5765
5766     """
5767     env = {
5768       "NODE_NAME": self.op.node_name,
5769       }
5770
5771     nl = [self.cfg.GetMasterNode()]
5772
5773     return (env, nl, nl)
5774
5775
5776 class TLMigrateInstance(Tasklet):
5777   """Tasklet class for instance migration.
5778
5779   @type live: boolean
5780   @ivar live: whether the migration will be done live or non-live;
5781       this variable is initalized only after CheckPrereq has run
5782
5783   """
5784   def __init__(self, lu, instance_name, cleanup):
5785     """Initializes this class.
5786
5787     """
5788     Tasklet.__init__(self, lu)
5789
5790     # Parameters
5791     self.instance_name = instance_name
5792     self.cleanup = cleanup
5793     self.live = False # will be overridden later
5794
5795   def CheckPrereq(self):
5796     """Check prerequisites.
5797
5798     This checks that the instance is in the cluster.
5799
5800     """
5801     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5802     instance = self.cfg.GetInstanceInfo(instance_name)
5803     assert instance is not None
5804
5805     if instance.disk_template != constants.DT_DRBD8:
5806       raise errors.OpPrereqError("Instance's disk layout is not"
5807                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5808
5809     secondary_nodes = instance.secondary_nodes
5810     if not secondary_nodes:
5811       raise errors.ConfigurationError("No secondary node but using"
5812                                       " drbd8 disk template")
5813
5814     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5815
5816     target_node = secondary_nodes[0]
5817     # check memory requirements on the secondary node
5818     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5819                          instance.name, i_be[constants.BE_MEMORY],
5820                          instance.hypervisor)
5821
5822     # check bridge existance
5823     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5824
5825     if not self.cleanup:
5826       _CheckNodeNotDrained(self.lu, target_node)
5827       result = self.rpc.call_instance_migratable(instance.primary_node,
5828                                                  instance)
5829       result.Raise("Can't migrate, please use failover",
5830                    prereq=True, ecode=errors.ECODE_STATE)
5831
5832     self.instance = instance
5833
5834     if self.lu.op.mode is None:
5835       # read the default value from the hypervisor
5836       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5837       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5838
5839     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5840
5841   def _WaitUntilSync(self):
5842     """Poll with custom rpc for disk sync.
5843
5844     This uses our own step-based rpc call.
5845
5846     """
5847     self.feedback_fn("* wait until resync is done")
5848     all_done = False
5849     while not all_done:
5850       all_done = True
5851       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5852                                             self.nodes_ip,
5853                                             self.instance.disks)
5854       min_percent = 100
5855       for node, nres in result.items():
5856         nres.Raise("Cannot resync disks on node %s" % node)
5857         node_done, node_percent = nres.payload
5858         all_done = all_done and node_done
5859         if node_percent is not None:
5860           min_percent = min(min_percent, node_percent)
5861       if not all_done:
5862         if min_percent < 100:
5863           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5864         time.sleep(2)
5865
5866   def _EnsureSecondary(self, node):
5867     """Demote a node to secondary.
5868
5869     """
5870     self.feedback_fn("* switching node %s to secondary mode" % node)
5871
5872     for dev in self.instance.disks:
5873       self.cfg.SetDiskID(dev, node)
5874
5875     result = self.rpc.call_blockdev_close(node, self.instance.name,
5876                                           self.instance.disks)
5877     result.Raise("Cannot change disk to secondary on node %s" % node)
5878
5879   def _GoStandalone(self):
5880     """Disconnect from the network.
5881
5882     """
5883     self.feedback_fn("* changing into standalone mode")
5884     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5885                                                self.instance.disks)
5886     for node, nres in result.items():
5887       nres.Raise("Cannot disconnect disks node %s" % node)
5888
5889   def _GoReconnect(self, multimaster):
5890     """Reconnect to the network.
5891
5892     """
5893     if multimaster:
5894       msg = "dual-master"
5895     else:
5896       msg = "single-master"
5897     self.feedback_fn("* changing disks into %s mode" % msg)
5898     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5899                                            self.instance.disks,
5900                                            self.instance.name, multimaster)
5901     for node, nres in result.items():
5902       nres.Raise("Cannot change disks config on node %s" % node)
5903
5904   def _ExecCleanup(self):
5905     """Try to cleanup after a failed migration.
5906
5907     The cleanup is done by:
5908       - check that the instance is running only on one node
5909         (and update the config if needed)
5910       - change disks on its secondary node to secondary
5911       - wait until disks are fully synchronized
5912       - disconnect from the network
5913       - change disks into single-master mode
5914       - wait again until disks are fully synchronized
5915
5916     """
5917     instance = self.instance
5918     target_node = self.target_node
5919     source_node = self.source_node
5920
5921     # check running on only one node
5922     self.feedback_fn("* checking where the instance actually runs"
5923                      " (if this hangs, the hypervisor might be in"
5924                      " a bad state)")
5925     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5926     for node, result in ins_l.items():
5927       result.Raise("Can't contact node %s" % node)
5928
5929     runningon_source = instance.name in ins_l[source_node].payload
5930     runningon_target = instance.name in ins_l[target_node].payload
5931
5932     if runningon_source and runningon_target:
5933       raise errors.OpExecError("Instance seems to be running on two nodes,"
5934                                " or the hypervisor is confused. You will have"
5935                                " to ensure manually that it runs only on one"
5936                                " and restart this operation.")
5937
5938     if not (runningon_source or runningon_target):
5939       raise errors.OpExecError("Instance does not seem to be running at all."
5940                                " In this case, it's safer to repair by"
5941                                " running 'gnt-instance stop' to ensure disk"
5942                                " shutdown, and then restarting it.")
5943
5944     if runningon_target:
5945       # the migration has actually succeeded, we need to update the config
5946       self.feedback_fn("* instance running on secondary node (%s),"
5947                        " updating config" % target_node)
5948       instance.primary_node = target_node
5949       self.cfg.Update(instance, self.feedback_fn)
5950       demoted_node = source_node
5951     else:
5952       self.feedback_fn("* instance confirmed to be running on its"
5953                        " primary node (%s)" % source_node)
5954       demoted_node = target_node
5955
5956     self._EnsureSecondary(demoted_node)
5957     try:
5958       self._WaitUntilSync()
5959     except errors.OpExecError:
5960       # we ignore here errors, since if the device is standalone, it
5961       # won't be able to sync
5962       pass
5963     self._GoStandalone()
5964     self._GoReconnect(False)
5965     self._WaitUntilSync()
5966
5967     self.feedback_fn("* done")
5968
5969   def _RevertDiskStatus(self):
5970     """Try to revert the disk status after a failed migration.
5971
5972     """
5973     target_node = self.target_node
5974     try:
5975       self._EnsureSecondary(target_node)
5976       self._GoStandalone()
5977       self._GoReconnect(False)
5978       self._WaitUntilSync()
5979     except errors.OpExecError, err:
5980       self.lu.LogWarning("Migration failed and I can't reconnect the"
5981                          " drives: error '%s'\n"
5982                          "Please look and recover the instance status" %
5983                          str(err))
5984
5985   def _AbortMigration(self):
5986     """Call the hypervisor code to abort a started migration.
5987
5988     """
5989     instance = self.instance
5990     target_node = self.target_node
5991     migration_info = self.migration_info
5992
5993     abort_result = self.rpc.call_finalize_migration(target_node,
5994                                                     instance,
5995                                                     migration_info,
5996                                                     False)
5997     abort_msg = abort_result.fail_msg
5998     if abort_msg:
5999       logging.error("Aborting migration failed on target node %s: %s",
6000                     target_node, abort_msg)
6001       # Don't raise an exception here, as we stil have to try to revert the
6002       # disk status, even if this step failed.
6003
6004   def _ExecMigration(self):
6005     """Migrate an instance.
6006
6007     The migrate is done by:
6008       - change the disks into dual-master mode
6009       - wait until disks are fully synchronized again
6010       - migrate the instance
6011       - change disks on the new secondary node (the old primary) to secondary
6012       - wait until disks are fully synchronized
6013       - change disks into single-master mode
6014
6015     """
6016     instance = self.instance
6017     target_node = self.target_node
6018     source_node = self.source_node
6019
6020     self.feedback_fn("* checking disk consistency between source and target")
6021     for dev in instance.disks:
6022       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6023         raise errors.OpExecError("Disk %s is degraded or not fully"
6024                                  " synchronized on target node,"
6025                                  " aborting migrate." % dev.iv_name)
6026
6027     # First get the migration information from the remote node
6028     result = self.rpc.call_migration_info(source_node, instance)
6029     msg = result.fail_msg
6030     if msg:
6031       log_err = ("Failed fetching source migration information from %s: %s" %
6032                  (source_node, msg))
6033       logging.error(log_err)
6034       raise errors.OpExecError(log_err)
6035
6036     self.migration_info = migration_info = result.payload
6037
6038     # Then switch the disks to master/master mode
6039     self._EnsureSecondary(target_node)
6040     self._GoStandalone()
6041     self._GoReconnect(True)
6042     self._WaitUntilSync()
6043
6044     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6045     result = self.rpc.call_accept_instance(target_node,
6046                                            instance,
6047                                            migration_info,
6048                                            self.nodes_ip[target_node])
6049
6050     msg = result.fail_msg
6051     if msg:
6052       logging.error("Instance pre-migration failed, trying to revert"
6053                     " disk status: %s", msg)
6054       self.feedback_fn("Pre-migration failed, aborting")
6055       self._AbortMigration()
6056       self._RevertDiskStatus()
6057       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6058                                (instance.name, msg))
6059
6060     self.feedback_fn("* migrating instance to %s" % target_node)
6061     time.sleep(10)
6062     result = self.rpc.call_instance_migrate(source_node, instance,
6063                                             self.nodes_ip[target_node],
6064                                             self.live)
6065     msg = result.fail_msg
6066     if msg:
6067       logging.error("Instance migration failed, trying to revert"
6068                     " disk status: %s", msg)
6069       self.feedback_fn("Migration failed, aborting")
6070       self._AbortMigration()
6071       self._RevertDiskStatus()
6072       raise errors.OpExecError("Could not migrate instance %s: %s" %
6073                                (instance.name, msg))
6074     time.sleep(10)
6075
6076     instance.primary_node = target_node
6077     # distribute new instance config to the other nodes
6078     self.cfg.Update(instance, self.feedback_fn)
6079
6080     result = self.rpc.call_finalize_migration(target_node,
6081                                               instance,
6082                                               migration_info,
6083                                               True)
6084     msg = result.fail_msg
6085     if msg:
6086       logging.error("Instance migration succeeded, but finalization failed:"
6087                     " %s", msg)
6088       raise errors.OpExecError("Could not finalize instance migration: %s" %
6089                                msg)
6090
6091     self._EnsureSecondary(source_node)
6092     self._WaitUntilSync()
6093     self._GoStandalone()
6094     self._GoReconnect(False)
6095     self._WaitUntilSync()
6096
6097     self.feedback_fn("* done")
6098
6099   def Exec(self, feedback_fn):
6100     """Perform the migration.
6101
6102     """
6103     feedback_fn("Migrating instance %s" % self.instance.name)
6104
6105     self.feedback_fn = feedback_fn
6106
6107     self.source_node = self.instance.primary_node
6108     self.target_node = self.instance.secondary_nodes[0]
6109     self.all_nodes = [self.source_node, self.target_node]
6110     self.nodes_ip = {
6111       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6112       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6113       }
6114
6115     if self.cleanup:
6116       return self._ExecCleanup()
6117     else:
6118       return self._ExecMigration()
6119
6120
6121 def _CreateBlockDev(lu, node, instance, device, force_create,
6122                     info, force_open):
6123   """Create a tree of block devices on a given node.
6124
6125   If this device type has to be created on secondaries, create it and
6126   all its children.
6127
6128   If not, just recurse to children keeping the same 'force' value.
6129
6130   @param lu: the lu on whose behalf we execute
6131   @param node: the node on which to create the device
6132   @type instance: L{objects.Instance}
6133   @param instance: the instance which owns the device
6134   @type device: L{objects.Disk}
6135   @param device: the device to create
6136   @type force_create: boolean
6137   @param force_create: whether to force creation of this device; this
6138       will be change to True whenever we find a device which has
6139       CreateOnSecondary() attribute
6140   @param info: the extra 'metadata' we should attach to the device
6141       (this will be represented as a LVM tag)
6142   @type force_open: boolean
6143   @param force_open: this parameter will be passes to the
6144       L{backend.BlockdevCreate} function where it specifies
6145       whether we run on primary or not, and it affects both
6146       the child assembly and the device own Open() execution
6147
6148   """
6149   if device.CreateOnSecondary():
6150     force_create = True
6151
6152   if device.children:
6153     for child in device.children:
6154       _CreateBlockDev(lu, node, instance, child, force_create,
6155                       info, force_open)
6156
6157   if not force_create:
6158     return
6159
6160   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6161
6162
6163 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6164   """Create a single block device on a given node.
6165
6166   This will not recurse over children of the device, so they must be
6167   created in advance.
6168
6169   @param lu: the lu on whose behalf we execute
6170   @param node: the node on which to create the device
6171   @type instance: L{objects.Instance}
6172   @param instance: the instance which owns the device
6173   @type device: L{objects.Disk}
6174   @param device: the device to create
6175   @param info: the extra 'metadata' we should attach to the device
6176       (this will be represented as a LVM tag)
6177   @type force_open: boolean
6178   @param force_open: this parameter will be passes to the
6179       L{backend.BlockdevCreate} function where it specifies
6180       whether we run on primary or not, and it affects both
6181       the child assembly and the device own Open() execution
6182
6183   """
6184   lu.cfg.SetDiskID(device, node)
6185   result = lu.rpc.call_blockdev_create(node, device, device.size,
6186                                        instance.name, force_open, info)
6187   result.Raise("Can't create block device %s on"
6188                " node %s for instance %s" % (device, node, instance.name))
6189   if device.physical_id is None:
6190     device.physical_id = result.payload
6191
6192
6193 def _GenerateUniqueNames(lu, exts):
6194   """Generate a suitable LV name.
6195
6196   This will generate a logical volume name for the given instance.
6197
6198   """
6199   results = []
6200   for val in exts:
6201     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6202     results.append("%s%s" % (new_id, val))
6203   return results
6204
6205
6206 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6207                          p_minor, s_minor):
6208   """Generate a drbd8 device complete with its children.
6209
6210   """
6211   port = lu.cfg.AllocatePort()
6212   vgname = lu.cfg.GetVGName()
6213   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6214   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6215                           logical_id=(vgname, names[0]))
6216   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6217                           logical_id=(vgname, names[1]))
6218   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6219                           logical_id=(primary, secondary, port,
6220                                       p_minor, s_minor,
6221                                       shared_secret),
6222                           children=[dev_data, dev_meta],
6223                           iv_name=iv_name)
6224   return drbd_dev
6225
6226
6227 def _GenerateDiskTemplate(lu, template_name,
6228                           instance_name, primary_node,
6229                           secondary_nodes, disk_info,
6230                           file_storage_dir, file_driver,
6231                           base_index):
6232   """Generate the entire disk layout for a given template type.
6233
6234   """
6235   #TODO: compute space requirements
6236
6237   vgname = lu.cfg.GetVGName()
6238   disk_count = len(disk_info)
6239   disks = []
6240   if template_name == constants.DT_DISKLESS:
6241     pass
6242   elif template_name == constants.DT_PLAIN:
6243     if len(secondary_nodes) != 0:
6244       raise errors.ProgrammerError("Wrong template configuration")
6245
6246     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6247                                       for i in range(disk_count)])
6248     for idx, disk in enumerate(disk_info):
6249       disk_index = idx + base_index
6250       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6251                               logical_id=(vgname, names[idx]),
6252                               iv_name="disk/%d" % disk_index,
6253                               mode=disk["mode"])
6254       disks.append(disk_dev)
6255   elif template_name == constants.DT_DRBD8:
6256     if len(secondary_nodes) != 1:
6257       raise errors.ProgrammerError("Wrong template configuration")
6258     remote_node = secondary_nodes[0]
6259     minors = lu.cfg.AllocateDRBDMinor(
6260       [primary_node, remote_node] * len(disk_info), instance_name)
6261
6262     names = []
6263     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6264                                                for i in range(disk_count)]):
6265       names.append(lv_prefix + "_data")
6266       names.append(lv_prefix + "_meta")
6267     for idx, disk in enumerate(disk_info):
6268       disk_index = idx + base_index
6269       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6270                                       disk["size"], names[idx*2:idx*2+2],
6271                                       "disk/%d" % disk_index,
6272                                       minors[idx*2], minors[idx*2+1])
6273       disk_dev.mode = disk["mode"]
6274       disks.append(disk_dev)
6275   elif template_name == constants.DT_FILE:
6276     if len(secondary_nodes) != 0:
6277       raise errors.ProgrammerError("Wrong template configuration")
6278
6279     _RequireFileStorage()
6280
6281     for idx, disk in enumerate(disk_info):
6282       disk_index = idx + base_index
6283       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6284                               iv_name="disk/%d" % disk_index,
6285                               logical_id=(file_driver,
6286                                           "%s/disk%d" % (file_storage_dir,
6287                                                          disk_index)),
6288                               mode=disk["mode"])
6289       disks.append(disk_dev)
6290   else:
6291     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6292   return disks
6293
6294
6295 def _GetInstanceInfoText(instance):
6296   """Compute that text that should be added to the disk's metadata.
6297
6298   """
6299   return "originstname+%s" % instance.name
6300
6301
6302 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6303   """Create all disks for an instance.
6304
6305   This abstracts away some work from AddInstance.
6306
6307   @type lu: L{LogicalUnit}
6308   @param lu: the logical unit on whose behalf we execute
6309   @type instance: L{objects.Instance}
6310   @param instance: the instance whose disks we should create
6311   @type to_skip: list
6312   @param to_skip: list of indices to skip
6313   @type target_node: string
6314   @param target_node: if passed, overrides the target node for creation
6315   @rtype: boolean
6316   @return: the success of the creation
6317
6318   """
6319   info = _GetInstanceInfoText(instance)
6320   if target_node is None:
6321     pnode = instance.primary_node
6322     all_nodes = instance.all_nodes
6323   else:
6324     pnode = target_node
6325     all_nodes = [pnode]
6326
6327   if instance.disk_template == constants.DT_FILE:
6328     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6329     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6330
6331     result.Raise("Failed to create directory '%s' on"
6332                  " node %s" % (file_storage_dir, pnode))
6333
6334   # Note: this needs to be kept in sync with adding of disks in
6335   # LUSetInstanceParams
6336   for idx, device in enumerate(instance.disks):
6337     if to_skip and idx in to_skip:
6338       continue
6339     logging.info("Creating volume %s for instance %s",
6340                  device.iv_name, instance.name)
6341     #HARDCODE
6342     for node in all_nodes:
6343       f_create = node == pnode
6344       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6345
6346
6347 def _RemoveDisks(lu, instance, target_node=None):
6348   """Remove all disks for an instance.
6349
6350   This abstracts away some work from `AddInstance()` and
6351   `RemoveInstance()`. Note that in case some of the devices couldn't
6352   be removed, the removal will continue with the other ones (compare
6353   with `_CreateDisks()`).
6354
6355   @type lu: L{LogicalUnit}
6356   @param lu: the logical unit on whose behalf we execute
6357   @type instance: L{objects.Instance}
6358   @param instance: the instance whose disks we should remove
6359   @type target_node: string
6360   @param target_node: used to override the node on which to remove the disks
6361   @rtype: boolean
6362   @return: the success of the removal
6363
6364   """
6365   logging.info("Removing block devices for instance %s", instance.name)
6366
6367   all_result = True
6368   for device in instance.disks:
6369     if target_node:
6370       edata = [(target_node, device)]
6371     else:
6372       edata = device.ComputeNodeTree(instance.primary_node)
6373     for node, disk in edata:
6374       lu.cfg.SetDiskID(disk, node)
6375       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6376       if msg:
6377         lu.LogWarning("Could not remove block device %s on node %s,"
6378                       " continuing anyway: %s", device.iv_name, node, msg)
6379         all_result = False
6380
6381   if instance.disk_template == constants.DT_FILE:
6382     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6383     if target_node:
6384       tgt = target_node
6385     else:
6386       tgt = instance.primary_node
6387     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6388     if result.fail_msg:
6389       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6390                     file_storage_dir, instance.primary_node, result.fail_msg)
6391       all_result = False
6392
6393   return all_result
6394
6395
6396 def _ComputeDiskSize(disk_template, disks):
6397   """Compute disk size requirements in the volume group
6398
6399   """
6400   # Required free disk space as a function of disk and swap space
6401   req_size_dict = {
6402     constants.DT_DISKLESS: None,
6403     constants.DT_PLAIN: sum(d["size"] for d in disks),
6404     # 128 MB are added for drbd metadata for each disk
6405     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6406     constants.DT_FILE: None,
6407   }
6408
6409   if disk_template not in req_size_dict:
6410     raise errors.ProgrammerError("Disk template '%s' size requirement"
6411                                  " is unknown" %  disk_template)
6412
6413   return req_size_dict[disk_template]
6414
6415
6416 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6417   """Hypervisor parameter validation.
6418
6419   This function abstract the hypervisor parameter validation to be
6420   used in both instance create and instance modify.
6421
6422   @type lu: L{LogicalUnit}
6423   @param lu: the logical unit for which we check
6424   @type nodenames: list
6425   @param nodenames: the list of nodes on which we should check
6426   @type hvname: string
6427   @param hvname: the name of the hypervisor we should use
6428   @type hvparams: dict
6429   @param hvparams: the parameters which we need to check
6430   @raise errors.OpPrereqError: if the parameters are not valid
6431
6432   """
6433   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6434                                                   hvname,
6435                                                   hvparams)
6436   for node in nodenames:
6437     info = hvinfo[node]
6438     if info.offline:
6439       continue
6440     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6441
6442
6443 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6444   """OS parameters validation.
6445
6446   @type lu: L{LogicalUnit}
6447   @param lu: the logical unit for which we check
6448   @type required: boolean
6449   @param required: whether the validation should fail if the OS is not
6450       found
6451   @type nodenames: list
6452   @param nodenames: the list of nodes on which we should check
6453   @type osname: string
6454   @param osname: the name of the hypervisor we should use
6455   @type osparams: dict
6456   @param osparams: the parameters which we need to check
6457   @raise errors.OpPrereqError: if the parameters are not valid
6458
6459   """
6460   result = lu.rpc.call_os_validate(required, nodenames, osname,
6461                                    [constants.OS_VALIDATE_PARAMETERS],
6462                                    osparams)
6463   for node, nres in result.items():
6464     # we don't check for offline cases since this should be run only
6465     # against the master node and/or an instance's nodes
6466     nres.Raise("OS Parameters validation failed on node %s" % node)
6467     if not nres.payload:
6468       lu.LogInfo("OS %s not found on node %s, validation skipped",
6469                  osname, node)
6470
6471
6472 class LUCreateInstance(LogicalUnit):
6473   """Create an instance.
6474
6475   """
6476   HPATH = "instance-add"
6477   HTYPE = constants.HTYPE_INSTANCE
6478   _OP_PARAMS = [
6479     _PInstanceName,
6480     ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6481     ("start", True, _TBool),
6482     ("wait_for_sync", True, _TBool),
6483     ("ip_check", True, _TBool),
6484     ("name_check", True, _TBool),
6485     ("disks", _NoDefault, _TListOf(_TDict)),
6486     ("nics", _NoDefault, _TListOf(_TDict)),
6487     ("hvparams", _EmptyDict, _TDict),
6488     ("beparams", _EmptyDict, _TDict),
6489     ("osparams", _EmptyDict, _TDict),
6490     ("no_install", None, _TMaybeBool),
6491     ("os_type", None, _TMaybeString),
6492     ("force_variant", False, _TBool),
6493     ("source_handshake", None, _TOr(_TList, _TNone)),
6494     ("source_x509_ca", None, _TOr(_TList, _TNone)),
6495     ("source_instance_name", None, _TMaybeString),
6496     ("src_node", None, _TMaybeString),
6497     ("src_path", None, _TMaybeString),
6498     ("pnode", None, _TMaybeString),
6499     ("snode", None, _TMaybeString),
6500     ("iallocator", None, _TMaybeString),
6501     ("hypervisor", None, _TMaybeString),
6502     ("disk_template", _NoDefault, _CheckDiskTemplate),
6503     ("identify_defaults", False, _TBool),
6504     ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6505     ("file_storage_dir", None, _TMaybeString),
6506     ("dry_run", False, _TBool),
6507     ]
6508   REQ_BGL = False
6509
6510   def CheckArguments(self):
6511     """Check arguments.
6512
6513     """
6514     # do not require name_check to ease forward/backward compatibility
6515     # for tools
6516     if self.op.no_install and self.op.start:
6517       self.LogInfo("No-installation mode selected, disabling startup")
6518       self.op.start = False
6519     # validate/normalize the instance name
6520     self.op.instance_name = \
6521       netutils.HostInfo.NormalizeName(self.op.instance_name)
6522
6523     if self.op.ip_check and not self.op.name_check:
6524       # TODO: make the ip check more flexible and not depend on the name check
6525       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6526                                  errors.ECODE_INVAL)
6527
6528     # check nics' parameter names
6529     for nic in self.op.nics:
6530       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6531
6532     # check disks. parameter names and consistent adopt/no-adopt strategy
6533     has_adopt = has_no_adopt = False
6534     for disk in self.op.disks:
6535       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6536       if "adopt" in disk:
6537         has_adopt = True
6538       else:
6539         has_no_adopt = True
6540     if has_adopt and has_no_adopt:
6541       raise errors.OpPrereqError("Either all disks are adopted or none is",
6542                                  errors.ECODE_INVAL)
6543     if has_adopt:
6544       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6545         raise errors.OpPrereqError("Disk adoption is not supported for the"
6546                                    " '%s' disk template" %
6547                                    self.op.disk_template,
6548                                    errors.ECODE_INVAL)
6549       if self.op.iallocator is not None:
6550         raise errors.OpPrereqError("Disk adoption not allowed with an"
6551                                    " iallocator script", errors.ECODE_INVAL)
6552       if self.op.mode == constants.INSTANCE_IMPORT:
6553         raise errors.OpPrereqError("Disk adoption not allowed for"
6554                                    " instance import", errors.ECODE_INVAL)
6555
6556     self.adopt_disks = has_adopt
6557
6558     # instance name verification
6559     if self.op.name_check:
6560       self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6561       self.op.instance_name = self.hostname1.name
6562       # used in CheckPrereq for ip ping check
6563       self.check_ip = self.hostname1.ip
6564     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6565       raise errors.OpPrereqError("Remote imports require names to be checked" %
6566                                  errors.ECODE_INVAL)
6567     else:
6568       self.check_ip = None
6569
6570     # file storage checks
6571     if (self.op.file_driver and
6572         not self.op.file_driver in constants.FILE_DRIVER):
6573       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6574                                  self.op.file_driver, errors.ECODE_INVAL)
6575
6576     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6577       raise errors.OpPrereqError("File storage directory path not absolute",
6578                                  errors.ECODE_INVAL)
6579
6580     ### Node/iallocator related checks
6581     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6582
6583     self._cds = _GetClusterDomainSecret()
6584
6585     if self.op.mode == constants.INSTANCE_IMPORT:
6586       # On import force_variant must be True, because if we forced it at
6587       # initial install, our only chance when importing it back is that it
6588       # works again!
6589       self.op.force_variant = True
6590
6591       if self.op.no_install:
6592         self.LogInfo("No-installation mode has no effect during import")
6593
6594     elif self.op.mode == constants.INSTANCE_CREATE:
6595       if self.op.os_type is None:
6596         raise errors.OpPrereqError("No guest OS specified",
6597                                    errors.ECODE_INVAL)
6598       if self.op.disk_template is None:
6599         raise errors.OpPrereqError("No disk template specified",
6600                                    errors.ECODE_INVAL)
6601
6602     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6603       # Check handshake to ensure both clusters have the same domain secret
6604       src_handshake = self.op.source_handshake
6605       if not src_handshake:
6606         raise errors.OpPrereqError("Missing source handshake",
6607                                    errors.ECODE_INVAL)
6608
6609       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6610                                                            src_handshake)
6611       if errmsg:
6612         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6613                                    errors.ECODE_INVAL)
6614
6615       # Load and check source CA
6616       self.source_x509_ca_pem = self.op.source_x509_ca
6617       if not self.source_x509_ca_pem:
6618         raise errors.OpPrereqError("Missing source X509 CA",
6619                                    errors.ECODE_INVAL)
6620
6621       try:
6622         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6623                                                     self._cds)
6624       except OpenSSL.crypto.Error, err:
6625         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6626                                    (err, ), errors.ECODE_INVAL)
6627
6628       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6629       if errcode is not None:
6630         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6631                                    errors.ECODE_INVAL)
6632
6633       self.source_x509_ca = cert
6634
6635       src_instance_name = self.op.source_instance_name
6636       if not src_instance_name:
6637         raise errors.OpPrereqError("Missing source instance name",
6638                                    errors.ECODE_INVAL)
6639
6640       norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6641       self.source_instance_name = netutils.GetHostInfo(norm_name).name
6642
6643     else:
6644       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6645                                  self.op.mode, errors.ECODE_INVAL)
6646
6647   def ExpandNames(self):
6648     """ExpandNames for CreateInstance.
6649
6650     Figure out the right locks for instance creation.
6651
6652     """
6653     self.needed_locks = {}
6654
6655     instance_name = self.op.instance_name
6656     # this is just a preventive check, but someone might still add this
6657     # instance in the meantime, and creation will fail at lock-add time
6658     if instance_name in self.cfg.GetInstanceList():
6659       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6660                                  instance_name, errors.ECODE_EXISTS)
6661
6662     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6663
6664     if self.op.iallocator:
6665       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6666     else:
6667       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6668       nodelist = [self.op.pnode]
6669       if self.op.snode is not None:
6670         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6671         nodelist.append(self.op.snode)
6672       self.needed_locks[locking.LEVEL_NODE] = nodelist
6673
6674     # in case of import lock the source node too
6675     if self.op.mode == constants.INSTANCE_IMPORT:
6676       src_node = self.op.src_node
6677       src_path = self.op.src_path
6678
6679       if src_path is None:
6680         self.op.src_path = src_path = self.op.instance_name
6681
6682       if src_node is None:
6683         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6684         self.op.src_node = None
6685         if os.path.isabs(src_path):
6686           raise errors.OpPrereqError("Importing an instance from an absolute"
6687                                      " path requires a source node option.",
6688                                      errors.ECODE_INVAL)
6689       else:
6690         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6691         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6692           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6693         if not os.path.isabs(src_path):
6694           self.op.src_path = src_path = \
6695             utils.PathJoin(constants.EXPORT_DIR, src_path)
6696
6697   def _RunAllocator(self):
6698     """Run the allocator based on input opcode.
6699
6700     """
6701     nics = [n.ToDict() for n in self.nics]
6702     ial = IAllocator(self.cfg, self.rpc,
6703                      mode=constants.IALLOCATOR_MODE_ALLOC,
6704                      name=self.op.instance_name,
6705                      disk_template=self.op.disk_template,
6706                      tags=[],
6707                      os=self.op.os_type,
6708                      vcpus=self.be_full[constants.BE_VCPUS],
6709                      mem_size=self.be_full[constants.BE_MEMORY],
6710                      disks=self.disks,
6711                      nics=nics,
6712                      hypervisor=self.op.hypervisor,
6713                      )
6714
6715     ial.Run(self.op.iallocator)
6716
6717     if not ial.success:
6718       raise errors.OpPrereqError("Can't compute nodes using"
6719                                  " iallocator '%s': %s" %
6720                                  (self.op.iallocator, ial.info),
6721                                  errors.ECODE_NORES)
6722     if len(ial.result) != ial.required_nodes:
6723       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6724                                  " of nodes (%s), required %s" %
6725                                  (self.op.iallocator, len(ial.result),
6726                                   ial.required_nodes), errors.ECODE_FAULT)
6727     self.op.pnode = ial.result[0]
6728     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6729                  self.op.instance_name, self.op.iallocator,
6730                  utils.CommaJoin(ial.result))
6731     if ial.required_nodes == 2:
6732       self.op.snode = ial.result[1]
6733
6734   def BuildHooksEnv(self):
6735     """Build hooks env.
6736
6737     This runs on master, primary and secondary nodes of the instance.
6738
6739     """
6740     env = {
6741       "ADD_MODE": self.op.mode,
6742       }
6743     if self.op.mode == constants.INSTANCE_IMPORT:
6744       env["SRC_NODE"] = self.op.src_node
6745       env["SRC_PATH"] = self.op.src_path
6746       env["SRC_IMAGES"] = self.src_images
6747
6748     env.update(_BuildInstanceHookEnv(
6749       name=self.op.instance_name,
6750       primary_node=self.op.pnode,
6751       secondary_nodes=self.secondaries,
6752       status=self.op.start,
6753       os_type=self.op.os_type,
6754       memory=self.be_full[constants.BE_MEMORY],
6755       vcpus=self.be_full[constants.BE_VCPUS],
6756       nics=_NICListToTuple(self, self.nics),
6757       disk_template=self.op.disk_template,
6758       disks=[(d["size"], d["mode"]) for d in self.disks],
6759       bep=self.be_full,
6760       hvp=self.hv_full,
6761       hypervisor_name=self.op.hypervisor,
6762     ))
6763
6764     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6765           self.secondaries)
6766     return env, nl, nl
6767
6768   def _ReadExportInfo(self):
6769     """Reads the export information from disk.
6770
6771     It will override the opcode source node and path with the actual
6772     information, if these two were not specified before.
6773
6774     @return: the export information
6775
6776     """
6777     assert self.op.mode == constants.INSTANCE_IMPORT
6778
6779     src_node = self.op.src_node
6780     src_path = self.op.src_path
6781
6782     if src_node is None:
6783       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6784       exp_list = self.rpc.call_export_list(locked_nodes)
6785       found = False
6786       for node in exp_list:
6787         if exp_list[node].fail_msg:
6788           continue
6789         if src_path in exp_list[node].payload:
6790           found = True
6791           self.op.src_node = src_node = node
6792           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6793                                                        src_path)
6794           break
6795       if not found:
6796         raise errors.OpPrereqError("No export found for relative path %s" %
6797                                     src_path, errors.ECODE_INVAL)
6798
6799     _CheckNodeOnline(self, src_node)
6800     result = self.rpc.call_export_info(src_node, src_path)
6801     result.Raise("No export or invalid export found in dir %s" % src_path)
6802
6803     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6804     if not export_info.has_section(constants.INISECT_EXP):
6805       raise errors.ProgrammerError("Corrupted export config",
6806                                    errors.ECODE_ENVIRON)
6807
6808     ei_version = export_info.get(constants.INISECT_EXP, "version")
6809     if (int(ei_version) != constants.EXPORT_VERSION):
6810       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6811                                  (ei_version, constants.EXPORT_VERSION),
6812                                  errors.ECODE_ENVIRON)
6813     return export_info
6814
6815   def _ReadExportParams(self, einfo):
6816     """Use export parameters as defaults.
6817
6818     In case the opcode doesn't specify (as in override) some instance
6819     parameters, then try to use them from the export information, if
6820     that declares them.
6821
6822     """
6823     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6824
6825     if self.op.disk_template is None:
6826       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6827         self.op.disk_template = einfo.get(constants.INISECT_INS,
6828                                           "disk_template")
6829       else:
6830         raise errors.OpPrereqError("No disk template specified and the export"
6831                                    " is missing the disk_template information",
6832                                    errors.ECODE_INVAL)
6833
6834     if not self.op.disks:
6835       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6836         disks = []
6837         # TODO: import the disk iv_name too
6838         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6839           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6840           disks.append({"size": disk_sz})
6841         self.op.disks = disks
6842       else:
6843         raise errors.OpPrereqError("No disk info specified and the export"
6844                                    " is missing the disk information",
6845                                    errors.ECODE_INVAL)
6846
6847     if (not self.op.nics and
6848         einfo.has_option(constants.INISECT_INS, "nic_count")):
6849       nics = []
6850       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6851         ndict = {}
6852         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6853           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6854           ndict[name] = v
6855         nics.append(ndict)
6856       self.op.nics = nics
6857
6858     if (self.op.hypervisor is None and
6859         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6860       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6861     if einfo.has_section(constants.INISECT_HYP):
6862       # use the export parameters but do not override the ones
6863       # specified by the user
6864       for name, value in einfo.items(constants.INISECT_HYP):
6865         if name not in self.op.hvparams:
6866           self.op.hvparams[name] = value
6867
6868     if einfo.has_section(constants.INISECT_BEP):
6869       # use the parameters, without overriding
6870       for name, value in einfo.items(constants.INISECT_BEP):
6871         if name not in self.op.beparams:
6872           self.op.beparams[name] = value
6873     else:
6874       # try to read the parameters old style, from the main section
6875       for name in constants.BES_PARAMETERS:
6876         if (name not in self.op.beparams and
6877             einfo.has_option(constants.INISECT_INS, name)):
6878           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6879
6880     if einfo.has_section(constants.INISECT_OSP):
6881       # use the parameters, without overriding
6882       for name, value in einfo.items(constants.INISECT_OSP):
6883         if name not in self.op.osparams:
6884           self.op.osparams[name] = value
6885
6886   def _RevertToDefaults(self, cluster):
6887     """Revert the instance parameters to the default values.
6888
6889     """
6890     # hvparams
6891     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6892     for name in self.op.hvparams.keys():
6893       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6894         del self.op.hvparams[name]
6895     # beparams
6896     be_defs = cluster.SimpleFillBE({})
6897     for name in self.op.beparams.keys():
6898       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6899         del self.op.beparams[name]
6900     # nic params
6901     nic_defs = cluster.SimpleFillNIC({})
6902     for nic in self.op.nics:
6903       for name in constants.NICS_PARAMETERS:
6904         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6905           del nic[name]
6906     # osparams
6907     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6908     for name in self.op.osparams.keys():
6909       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6910         del self.op.osparams[name]
6911
6912   def CheckPrereq(self):
6913     """Check prerequisites.
6914
6915     """
6916     if self.op.mode == constants.INSTANCE_IMPORT:
6917       export_info = self._ReadExportInfo()
6918       self._ReadExportParams(export_info)
6919
6920     _CheckDiskTemplate(self.op.disk_template)
6921
6922     if (not self.cfg.GetVGName() and
6923         self.op.disk_template not in constants.DTS_NOT_LVM):
6924       raise errors.OpPrereqError("Cluster does not support lvm-based"
6925                                  " instances", errors.ECODE_STATE)
6926
6927     if self.op.hypervisor is None:
6928       self.op.hypervisor = self.cfg.GetHypervisorType()
6929
6930     cluster = self.cfg.GetClusterInfo()
6931     enabled_hvs = cluster.enabled_hypervisors
6932     if self.op.hypervisor not in enabled_hvs:
6933       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6934                                  " cluster (%s)" % (self.op.hypervisor,
6935                                   ",".join(enabled_hvs)),
6936                                  errors.ECODE_STATE)
6937
6938     # check hypervisor parameter syntax (locally)
6939     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6940     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6941                                       self.op.hvparams)
6942     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6943     hv_type.CheckParameterSyntax(filled_hvp)
6944     self.hv_full = filled_hvp
6945     # check that we don't specify global parameters on an instance
6946     _CheckGlobalHvParams(self.op.hvparams)
6947
6948     # fill and remember the beparams dict
6949     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6950     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6951
6952     # build os parameters
6953     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6954
6955     # now that hvp/bep are in final format, let's reset to defaults,
6956     # if told to do so
6957     if self.op.identify_defaults:
6958       self._RevertToDefaults(cluster)
6959
6960     # NIC buildup
6961     self.nics = []
6962     for idx, nic in enumerate(self.op.nics):
6963       nic_mode_req = nic.get("mode", None)
6964       nic_mode = nic_mode_req
6965       if nic_mode is None:
6966         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6967
6968       # in routed mode, for the first nic, the default ip is 'auto'
6969       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6970         default_ip_mode = constants.VALUE_AUTO
6971       else:
6972         default_ip_mode = constants.VALUE_NONE
6973
6974       # ip validity checks
6975       ip = nic.get("ip", default_ip_mode)
6976       if ip is None or ip.lower() == constants.VALUE_NONE:
6977         nic_ip = None
6978       elif ip.lower() == constants.VALUE_AUTO:
6979         if not self.op.name_check:
6980           raise errors.OpPrereqError("IP address set to auto but name checks"
6981                                      " have been skipped. Aborting.",
6982                                      errors.ECODE_INVAL)
6983         nic_ip = self.hostname1.ip
6984       else:
6985         if not netutils.IsValidIP4(ip):
6986           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6987                                      " like a valid IP" % ip,
6988                                      errors.ECODE_INVAL)
6989         nic_ip = ip
6990
6991       # TODO: check the ip address for uniqueness
6992       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6993         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6994                                    errors.ECODE_INVAL)
6995
6996       # MAC address verification
6997       mac = nic.get("mac", constants.VALUE_AUTO)
6998       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6999         mac = utils.NormalizeAndValidateMac(mac)
7000
7001         try:
7002           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7003         except errors.ReservationError:
7004           raise errors.OpPrereqError("MAC address %s already in use"
7005                                      " in cluster" % mac,
7006                                      errors.ECODE_NOTUNIQUE)
7007
7008       # bridge verification
7009       bridge = nic.get("bridge", None)
7010       link = nic.get("link", None)
7011       if bridge and link:
7012         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7013                                    " at the same time", errors.ECODE_INVAL)
7014       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7015         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7016                                    errors.ECODE_INVAL)
7017       elif bridge:
7018         link = bridge
7019
7020       nicparams = {}
7021       if nic_mode_req:
7022         nicparams[constants.NIC_MODE] = nic_mode_req
7023       if link:
7024         nicparams[constants.NIC_LINK] = link
7025
7026       check_params = cluster.SimpleFillNIC(nicparams)
7027       objects.NIC.CheckParameterSyntax(check_params)
7028       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7029
7030     # disk checks/pre-build
7031     self.disks = []
7032     for disk in self.op.disks:
7033       mode = disk.get("mode", constants.DISK_RDWR)
7034       if mode not in constants.DISK_ACCESS_SET:
7035         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7036                                    mode, errors.ECODE_INVAL)
7037       size = disk.get("size", None)
7038       if size is None:
7039         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7040       try:
7041         size = int(size)
7042       except (TypeError, ValueError):
7043         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7044                                    errors.ECODE_INVAL)
7045       new_disk = {"size": size, "mode": mode}
7046       if "adopt" in disk:
7047         new_disk["adopt"] = disk["adopt"]
7048       self.disks.append(new_disk)
7049
7050     if self.op.mode == constants.INSTANCE_IMPORT:
7051
7052       # Check that the new instance doesn't have less disks than the export
7053       instance_disks = len(self.disks)
7054       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7055       if instance_disks < export_disks:
7056         raise errors.OpPrereqError("Not enough disks to import."
7057                                    " (instance: %d, export: %d)" %
7058                                    (instance_disks, export_disks),
7059                                    errors.ECODE_INVAL)
7060
7061       disk_images = []
7062       for idx in range(export_disks):
7063         option = 'disk%d_dump' % idx
7064         if export_info.has_option(constants.INISECT_INS, option):
7065           # FIXME: are the old os-es, disk sizes, etc. useful?
7066           export_name = export_info.get(constants.INISECT_INS, option)
7067           image = utils.PathJoin(self.op.src_path, export_name)
7068           disk_images.append(image)
7069         else:
7070           disk_images.append(False)
7071
7072       self.src_images = disk_images
7073
7074       old_name = export_info.get(constants.INISECT_INS, 'name')
7075       try:
7076         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7077       except (TypeError, ValueError), err:
7078         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7079                                    " an integer: %s" % str(err),
7080                                    errors.ECODE_STATE)
7081       if self.op.instance_name == old_name:
7082         for idx, nic in enumerate(self.nics):
7083           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7084             nic_mac_ini = 'nic%d_mac' % idx
7085             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7086
7087     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7088
7089     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7090     if self.op.ip_check:
7091       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7092         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7093                                    (self.check_ip, self.op.instance_name),
7094                                    errors.ECODE_NOTUNIQUE)
7095
7096     #### mac address generation
7097     # By generating here the mac address both the allocator and the hooks get
7098     # the real final mac address rather than the 'auto' or 'generate' value.
7099     # There is a race condition between the generation and the instance object
7100     # creation, which means that we know the mac is valid now, but we're not
7101     # sure it will be when we actually add the instance. If things go bad
7102     # adding the instance will abort because of a duplicate mac, and the
7103     # creation job will fail.
7104     for nic in self.nics:
7105       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7106         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7107
7108     #### allocator run
7109
7110     if self.op.iallocator is not None:
7111       self._RunAllocator()
7112
7113     #### node related checks
7114
7115     # check primary node
7116     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7117     assert self.pnode is not None, \
7118       "Cannot retrieve locked node %s" % self.op.pnode
7119     if pnode.offline:
7120       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7121                                  pnode.name, errors.ECODE_STATE)
7122     if pnode.drained:
7123       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7124                                  pnode.name, errors.ECODE_STATE)
7125
7126     self.secondaries = []
7127
7128     # mirror node verification
7129     if self.op.disk_template in constants.DTS_NET_MIRROR:
7130       if self.op.snode is None:
7131         raise errors.OpPrereqError("The networked disk templates need"
7132                                    " a mirror node", errors.ECODE_INVAL)
7133       if self.op.snode == pnode.name:
7134         raise errors.OpPrereqError("The secondary node cannot be the"
7135                                    " primary node.", errors.ECODE_INVAL)
7136       _CheckNodeOnline(self, self.op.snode)
7137       _CheckNodeNotDrained(self, self.op.snode)
7138       self.secondaries.append(self.op.snode)
7139
7140     nodenames = [pnode.name] + self.secondaries
7141
7142     req_size = _ComputeDiskSize(self.op.disk_template,
7143                                 self.disks)
7144
7145     # Check lv size requirements, if not adopting
7146     if req_size is not None and not self.adopt_disks:
7147       _CheckNodesFreeDisk(self, nodenames, req_size)
7148
7149     if self.adopt_disks: # instead, we must check the adoption data
7150       all_lvs = set([i["adopt"] for i in self.disks])
7151       if len(all_lvs) != len(self.disks):
7152         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7153                                    errors.ECODE_INVAL)
7154       for lv_name in all_lvs:
7155         try:
7156           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7157         except errors.ReservationError:
7158           raise errors.OpPrereqError("LV named %s used by another instance" %
7159                                      lv_name, errors.ECODE_NOTUNIQUE)
7160
7161       node_lvs = self.rpc.call_lv_list([pnode.name],
7162                                        self.cfg.GetVGName())[pnode.name]
7163       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7164       node_lvs = node_lvs.payload
7165       delta = all_lvs.difference(node_lvs.keys())
7166       if delta:
7167         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7168                                    utils.CommaJoin(delta),
7169                                    errors.ECODE_INVAL)
7170       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7171       if online_lvs:
7172         raise errors.OpPrereqError("Online logical volumes found, cannot"
7173                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7174                                    errors.ECODE_STATE)
7175       # update the size of disk based on what is found
7176       for dsk in self.disks:
7177         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7178
7179     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7180
7181     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7182     # check OS parameters (remotely)
7183     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7184
7185     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7186
7187     # memory check on primary node
7188     if self.op.start:
7189       _CheckNodeFreeMemory(self, self.pnode.name,
7190                            "creating instance %s" % self.op.instance_name,
7191                            self.be_full[constants.BE_MEMORY],
7192                            self.op.hypervisor)
7193
7194     self.dry_run_result = list(nodenames)
7195
7196   def Exec(self, feedback_fn):
7197     """Create and add the instance to the cluster.
7198
7199     """
7200     instance = self.op.instance_name
7201     pnode_name = self.pnode.name
7202
7203     ht_kind = self.op.hypervisor
7204     if ht_kind in constants.HTS_REQ_PORT:
7205       network_port = self.cfg.AllocatePort()
7206     else:
7207       network_port = None
7208
7209     if constants.ENABLE_FILE_STORAGE:
7210       # this is needed because os.path.join does not accept None arguments
7211       if self.op.file_storage_dir is None:
7212         string_file_storage_dir = ""
7213       else:
7214         string_file_storage_dir = self.op.file_storage_dir
7215
7216       # build the full file storage dir path
7217       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7218                                         string_file_storage_dir, instance)
7219     else:
7220       file_storage_dir = ""
7221
7222     disks = _GenerateDiskTemplate(self,
7223                                   self.op.disk_template,
7224                                   instance, pnode_name,
7225                                   self.secondaries,
7226                                   self.disks,
7227                                   file_storage_dir,
7228                                   self.op.file_driver,
7229                                   0)
7230
7231     iobj = objects.Instance(name=instance, os=self.op.os_type,
7232                             primary_node=pnode_name,
7233                             nics=self.nics, disks=disks,
7234                             disk_template=self.op.disk_template,
7235                             admin_up=False,
7236                             network_port=network_port,
7237                             beparams=self.op.beparams,
7238                             hvparams=self.op.hvparams,
7239                             hypervisor=self.op.hypervisor,
7240                             osparams=self.op.osparams,
7241                             )
7242
7243     if self.adopt_disks:
7244       # rename LVs to the newly-generated names; we need to construct
7245       # 'fake' LV disks with the old data, plus the new unique_id
7246       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7247       rename_to = []
7248       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7249         rename_to.append(t_dsk.logical_id)
7250         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7251         self.cfg.SetDiskID(t_dsk, pnode_name)
7252       result = self.rpc.call_blockdev_rename(pnode_name,
7253                                              zip(tmp_disks, rename_to))
7254       result.Raise("Failed to rename adoped LVs")
7255     else:
7256       feedback_fn("* creating instance disks...")
7257       try:
7258         _CreateDisks(self, iobj)
7259       except errors.OpExecError:
7260         self.LogWarning("Device creation failed, reverting...")
7261         try:
7262           _RemoveDisks(self, iobj)
7263         finally:
7264           self.cfg.ReleaseDRBDMinors(instance)
7265           raise
7266
7267     feedback_fn("adding instance %s to cluster config" % instance)
7268
7269     self.cfg.AddInstance(iobj, self.proc.GetECId())
7270
7271     # Declare that we don't want to remove the instance lock anymore, as we've
7272     # added the instance to the config
7273     del self.remove_locks[locking.LEVEL_INSTANCE]
7274     # Unlock all the nodes
7275     if self.op.mode == constants.INSTANCE_IMPORT:
7276       nodes_keep = [self.op.src_node]
7277       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7278                        if node != self.op.src_node]
7279       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7280       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7281     else:
7282       self.context.glm.release(locking.LEVEL_NODE)
7283       del self.acquired_locks[locking.LEVEL_NODE]
7284
7285     if self.op.wait_for_sync:
7286       disk_abort = not _WaitForSync(self, iobj)
7287     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7288       # make sure the disks are not degraded (still sync-ing is ok)
7289       time.sleep(15)
7290       feedback_fn("* checking mirrors status")
7291       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7292     else:
7293       disk_abort = False
7294
7295     if disk_abort:
7296       _RemoveDisks(self, iobj)
7297       self.cfg.RemoveInstance(iobj.name)
7298       # Make sure the instance lock gets removed
7299       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7300       raise errors.OpExecError("There are some degraded disks for"
7301                                " this instance")
7302
7303     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7304       if self.op.mode == constants.INSTANCE_CREATE:
7305         if not self.op.no_install:
7306           feedback_fn("* running the instance OS create scripts...")
7307           # FIXME: pass debug option from opcode to backend
7308           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7309                                                  self.op.debug_level)
7310           result.Raise("Could not add os for instance %s"
7311                        " on node %s" % (instance, pnode_name))
7312
7313       elif self.op.mode == constants.INSTANCE_IMPORT:
7314         feedback_fn("* running the instance OS import scripts...")
7315
7316         transfers = []
7317
7318         for idx, image in enumerate(self.src_images):
7319           if not image:
7320             continue
7321
7322           # FIXME: pass debug option from opcode to backend
7323           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7324                                              constants.IEIO_FILE, (image, ),
7325                                              constants.IEIO_SCRIPT,
7326                                              (iobj.disks[idx], idx),
7327                                              None)
7328           transfers.append(dt)
7329
7330         import_result = \
7331           masterd.instance.TransferInstanceData(self, feedback_fn,
7332                                                 self.op.src_node, pnode_name,
7333                                                 self.pnode.secondary_ip,
7334                                                 iobj, transfers)
7335         if not compat.all(import_result):
7336           self.LogWarning("Some disks for instance %s on node %s were not"
7337                           " imported successfully" % (instance, pnode_name))
7338
7339       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7340         feedback_fn("* preparing remote import...")
7341         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7342         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7343
7344         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7345                                                      self.source_x509_ca,
7346                                                      self._cds, timeouts)
7347         if not compat.all(disk_results):
7348           # TODO: Should the instance still be started, even if some disks
7349           # failed to import (valid for local imports, too)?
7350           self.LogWarning("Some disks for instance %s on node %s were not"
7351                           " imported successfully" % (instance, pnode_name))
7352
7353         # Run rename script on newly imported instance
7354         assert iobj.name == instance
7355         feedback_fn("Running rename script for %s" % instance)
7356         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7357                                                    self.source_instance_name,
7358                                                    self.op.debug_level)
7359         if result.fail_msg:
7360           self.LogWarning("Failed to run rename script for %s on node"
7361                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7362
7363       else:
7364         # also checked in the prereq part
7365         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7366                                      % self.op.mode)
7367
7368     if self.op.start:
7369       iobj.admin_up = True
7370       self.cfg.Update(iobj, feedback_fn)
7371       logging.info("Starting instance %s on node %s", instance, pnode_name)
7372       feedback_fn("* starting instance...")
7373       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7374       result.Raise("Could not start instance")
7375
7376     return list(iobj.all_nodes)
7377
7378
7379 class LUConnectConsole(NoHooksLU):
7380   """Connect to an instance's console.
7381
7382   This is somewhat special in that it returns the command line that
7383   you need to run on the master node in order to connect to the
7384   console.
7385
7386   """
7387   _OP_PARAMS = [
7388     _PInstanceName
7389     ]
7390   REQ_BGL = False
7391
7392   def ExpandNames(self):
7393     self._ExpandAndLockInstance()
7394
7395   def CheckPrereq(self):
7396     """Check prerequisites.
7397
7398     This checks that the instance is in the cluster.
7399
7400     """
7401     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7402     assert self.instance is not None, \
7403       "Cannot retrieve locked instance %s" % self.op.instance_name
7404     _CheckNodeOnline(self, self.instance.primary_node)
7405
7406   def Exec(self, feedback_fn):
7407     """Connect to the console of an instance
7408
7409     """
7410     instance = self.instance
7411     node = instance.primary_node
7412
7413     node_insts = self.rpc.call_instance_list([node],
7414                                              [instance.hypervisor])[node]
7415     node_insts.Raise("Can't get node information from %s" % node)
7416
7417     if instance.name not in node_insts.payload:
7418       raise errors.OpExecError("Instance %s is not running." % instance.name)
7419
7420     logging.debug("Connecting to console of %s on %s", instance.name, node)
7421
7422     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7423     cluster = self.cfg.GetClusterInfo()
7424     # beparams and hvparams are passed separately, to avoid editing the
7425     # instance and then saving the defaults in the instance itself.
7426     hvparams = cluster.FillHV(instance)
7427     beparams = cluster.FillBE(instance)
7428     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7429
7430     # build ssh cmdline
7431     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7432
7433
7434 class LUReplaceDisks(LogicalUnit):
7435   """Replace the disks of an instance.
7436
7437   """
7438   HPATH = "mirrors-replace"
7439   HTYPE = constants.HTYPE_INSTANCE
7440   _OP_PARAMS = [
7441     _PInstanceName,
7442     ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7443     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7444     ("remote_node", None, _TMaybeString),
7445     ("iallocator", None, _TMaybeString),
7446     ("early_release", False, _TBool),
7447     ]
7448   REQ_BGL = False
7449
7450   def CheckArguments(self):
7451     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7452                                   self.op.iallocator)
7453
7454   def ExpandNames(self):
7455     self._ExpandAndLockInstance()
7456
7457     if self.op.iallocator is not None:
7458       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7459
7460     elif self.op.remote_node is not None:
7461       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7462       self.op.remote_node = remote_node
7463
7464       # Warning: do not remove the locking of the new secondary here
7465       # unless DRBD8.AddChildren is changed to work in parallel;
7466       # currently it doesn't since parallel invocations of
7467       # FindUnusedMinor will conflict
7468       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7469       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7470
7471     else:
7472       self.needed_locks[locking.LEVEL_NODE] = []
7473       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7474
7475     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7476                                    self.op.iallocator, self.op.remote_node,
7477                                    self.op.disks, False, self.op.early_release)
7478
7479     self.tasklets = [self.replacer]
7480
7481   def DeclareLocks(self, level):
7482     # If we're not already locking all nodes in the set we have to declare the
7483     # instance's primary/secondary nodes.
7484     if (level == locking.LEVEL_NODE and
7485         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7486       self._LockInstancesNodes()
7487
7488   def BuildHooksEnv(self):
7489     """Build hooks env.
7490
7491     This runs on the master, the primary and all the secondaries.
7492
7493     """
7494     instance = self.replacer.instance
7495     env = {
7496       "MODE": self.op.mode,
7497       "NEW_SECONDARY": self.op.remote_node,
7498       "OLD_SECONDARY": instance.secondary_nodes[0],
7499       }
7500     env.update(_BuildInstanceHookEnvByObject(self, instance))
7501     nl = [
7502       self.cfg.GetMasterNode(),
7503       instance.primary_node,
7504       ]
7505     if self.op.remote_node is not None:
7506       nl.append(self.op.remote_node)
7507     return env, nl, nl
7508
7509
7510 class TLReplaceDisks(Tasklet):
7511   """Replaces disks for an instance.
7512
7513   Note: Locking is not within the scope of this class.
7514
7515   """
7516   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7517                disks, delay_iallocator, early_release):
7518     """Initializes this class.
7519
7520     """
7521     Tasklet.__init__(self, lu)
7522
7523     # Parameters
7524     self.instance_name = instance_name
7525     self.mode = mode
7526     self.iallocator_name = iallocator_name
7527     self.remote_node = remote_node
7528     self.disks = disks
7529     self.delay_iallocator = delay_iallocator
7530     self.early_release = early_release
7531
7532     # Runtime data
7533     self.instance = None
7534     self.new_node = None
7535     self.target_node = None
7536     self.other_node = None
7537     self.remote_node_info = None
7538     self.node_secondary_ip = None
7539
7540   @staticmethod
7541   def CheckArguments(mode, remote_node, iallocator):
7542     """Helper function for users of this class.
7543
7544     """
7545     # check for valid parameter combination
7546     if mode == constants.REPLACE_DISK_CHG:
7547       if remote_node is None and iallocator is None:
7548         raise errors.OpPrereqError("When changing the secondary either an"
7549                                    " iallocator script must be used or the"
7550                                    " new node given", errors.ECODE_INVAL)
7551
7552       if remote_node is not None and iallocator is not None:
7553         raise errors.OpPrereqError("Give either the iallocator or the new"
7554                                    " secondary, not both", errors.ECODE_INVAL)
7555
7556     elif remote_node is not None or iallocator is not None:
7557       # Not replacing the secondary
7558       raise errors.OpPrereqError("The iallocator and new node options can"
7559                                  " only be used when changing the"
7560                                  " secondary node", errors.ECODE_INVAL)
7561
7562   @staticmethod
7563   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7564     """Compute a new secondary node using an IAllocator.
7565
7566     """
7567     ial = IAllocator(lu.cfg, lu.rpc,
7568                      mode=constants.IALLOCATOR_MODE_RELOC,
7569                      name=instance_name,
7570                      relocate_from=relocate_from)
7571
7572     ial.Run(iallocator_name)
7573
7574     if not ial.success:
7575       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7576                                  " %s" % (iallocator_name, ial.info),
7577                                  errors.ECODE_NORES)
7578
7579     if len(ial.result) != ial.required_nodes:
7580       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7581                                  " of nodes (%s), required %s" %
7582                                  (iallocator_name,
7583                                   len(ial.result), ial.required_nodes),
7584                                  errors.ECODE_FAULT)
7585
7586     remote_node_name = ial.result[0]
7587
7588     lu.LogInfo("Selected new secondary for instance '%s': %s",
7589                instance_name, remote_node_name)
7590
7591     return remote_node_name
7592
7593   def _FindFaultyDisks(self, node_name):
7594     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7595                                     node_name, True)
7596
7597   def CheckPrereq(self):
7598     """Check prerequisites.
7599
7600     This checks that the instance is in the cluster.
7601
7602     """
7603     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7604     assert instance is not None, \
7605       "Cannot retrieve locked instance %s" % self.instance_name
7606
7607     if instance.disk_template != constants.DT_DRBD8:
7608       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7609                                  " instances", errors.ECODE_INVAL)
7610
7611     if len(instance.secondary_nodes) != 1:
7612       raise errors.OpPrereqError("The instance has a strange layout,"
7613                                  " expected one secondary but found %d" %
7614                                  len(instance.secondary_nodes),
7615                                  errors.ECODE_FAULT)
7616
7617     if not self.delay_iallocator:
7618       self._CheckPrereq2()
7619
7620   def _CheckPrereq2(self):
7621     """Check prerequisites, second part.
7622
7623     This function should always be part of CheckPrereq. It was separated and is
7624     now called from Exec because during node evacuation iallocator was only
7625     called with an unmodified cluster model, not taking planned changes into
7626     account.
7627
7628     """
7629     instance = self.instance
7630     secondary_node = instance.secondary_nodes[0]
7631
7632     if self.iallocator_name is None:
7633       remote_node = self.remote_node
7634     else:
7635       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7636                                        instance.name, instance.secondary_nodes)
7637
7638     if remote_node is not None:
7639       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7640       assert self.remote_node_info is not None, \
7641         "Cannot retrieve locked node %s" % remote_node
7642     else:
7643       self.remote_node_info = None
7644
7645     if remote_node == self.instance.primary_node:
7646       raise errors.OpPrereqError("The specified node is the primary node of"
7647                                  " the instance.", errors.ECODE_INVAL)
7648
7649     if remote_node == secondary_node:
7650       raise errors.OpPrereqError("The specified node is already the"
7651                                  " secondary node of the instance.",
7652                                  errors.ECODE_INVAL)
7653
7654     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7655                                     constants.REPLACE_DISK_CHG):
7656       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7657                                  errors.ECODE_INVAL)
7658
7659     if self.mode == constants.REPLACE_DISK_AUTO:
7660       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7661       faulty_secondary = self._FindFaultyDisks(secondary_node)
7662
7663       if faulty_primary and faulty_secondary:
7664         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7665                                    " one node and can not be repaired"
7666                                    " automatically" % self.instance_name,
7667                                    errors.ECODE_STATE)
7668
7669       if faulty_primary:
7670         self.disks = faulty_primary
7671         self.target_node = instance.primary_node
7672         self.other_node = secondary_node
7673         check_nodes = [self.target_node, self.other_node]
7674       elif faulty_secondary:
7675         self.disks = faulty_secondary
7676         self.target_node = secondary_node
7677         self.other_node = instance.primary_node
7678         check_nodes = [self.target_node, self.other_node]
7679       else:
7680         self.disks = []
7681         check_nodes = []
7682
7683     else:
7684       # Non-automatic modes
7685       if self.mode == constants.REPLACE_DISK_PRI:
7686         self.target_node = instance.primary_node
7687         self.other_node = secondary_node
7688         check_nodes = [self.target_node, self.other_node]
7689
7690       elif self.mode == constants.REPLACE_DISK_SEC:
7691         self.target_node = secondary_node
7692         self.other_node = instance.primary_node
7693         check_nodes = [self.target_node, self.other_node]
7694
7695       elif self.mode == constants.REPLACE_DISK_CHG:
7696         self.new_node = remote_node
7697         self.other_node = instance.primary_node
7698         self.target_node = secondary_node
7699         check_nodes = [self.new_node, self.other_node]
7700
7701         _CheckNodeNotDrained(self.lu, remote_node)
7702
7703         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7704         assert old_node_info is not None
7705         if old_node_info.offline and not self.early_release:
7706           # doesn't make sense to delay the release
7707           self.early_release = True
7708           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7709                           " early-release mode", secondary_node)
7710
7711       else:
7712         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7713                                      self.mode)
7714
7715       # If not specified all disks should be replaced
7716       if not self.disks:
7717         self.disks = range(len(self.instance.disks))
7718
7719     for node in check_nodes:
7720       _CheckNodeOnline(self.lu, node)
7721
7722     # Check whether disks are valid
7723     for disk_idx in self.disks:
7724       instance.FindDisk(disk_idx)
7725
7726     # Get secondary node IP addresses
7727     node_2nd_ip = {}
7728
7729     for node_name in [self.target_node, self.other_node, self.new_node]:
7730       if node_name is not None:
7731         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7732
7733     self.node_secondary_ip = node_2nd_ip
7734
7735   def Exec(self, feedback_fn):
7736     """Execute disk replacement.
7737
7738     This dispatches the disk replacement to the appropriate handler.
7739
7740     """
7741     if self.delay_iallocator:
7742       self._CheckPrereq2()
7743
7744     if not self.disks:
7745       feedback_fn("No disks need replacement")
7746       return
7747
7748     feedback_fn("Replacing disk(s) %s for %s" %
7749                 (utils.CommaJoin(self.disks), self.instance.name))
7750
7751     activate_disks = (not self.instance.admin_up)
7752
7753     # Activate the instance disks if we're replacing them on a down instance
7754     if activate_disks:
7755       _StartInstanceDisks(self.lu, self.instance, True)
7756
7757     try:
7758       # Should we replace the secondary node?
7759       if self.new_node is not None:
7760         fn = self._ExecDrbd8Secondary
7761       else:
7762         fn = self._ExecDrbd8DiskOnly
7763
7764       return fn(feedback_fn)
7765
7766     finally:
7767       # Deactivate the instance disks if we're replacing them on a
7768       # down instance
7769       if activate_disks:
7770         _SafeShutdownInstanceDisks(self.lu, self.instance)
7771
7772   def _CheckVolumeGroup(self, nodes):
7773     self.lu.LogInfo("Checking volume groups")
7774
7775     vgname = self.cfg.GetVGName()
7776
7777     # Make sure volume group exists on all involved nodes
7778     results = self.rpc.call_vg_list(nodes)
7779     if not results:
7780       raise errors.OpExecError("Can't list volume groups on the nodes")
7781
7782     for node in nodes:
7783       res = results[node]
7784       res.Raise("Error checking node %s" % node)
7785       if vgname not in res.payload:
7786         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7787                                  (vgname, node))
7788
7789   def _CheckDisksExistence(self, nodes):
7790     # Check disk existence
7791     for idx, dev in enumerate(self.instance.disks):
7792       if idx not in self.disks:
7793         continue
7794
7795       for node in nodes:
7796         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7797         self.cfg.SetDiskID(dev, node)
7798
7799         result = self.rpc.call_blockdev_find(node, dev)
7800
7801         msg = result.fail_msg
7802         if msg or not result.payload:
7803           if not msg:
7804             msg = "disk not found"
7805           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7806                                    (idx, node, msg))
7807
7808   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7809     for idx, dev in enumerate(self.instance.disks):
7810       if idx not in self.disks:
7811         continue
7812
7813       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7814                       (idx, node_name))
7815
7816       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7817                                    ldisk=ldisk):
7818         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7819                                  " replace disks for instance %s" %
7820                                  (node_name, self.instance.name))
7821
7822   def _CreateNewStorage(self, node_name):
7823     vgname = self.cfg.GetVGName()
7824     iv_names = {}
7825
7826     for idx, dev in enumerate(self.instance.disks):
7827       if idx not in self.disks:
7828         continue
7829
7830       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7831
7832       self.cfg.SetDiskID(dev, node_name)
7833
7834       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7835       names = _GenerateUniqueNames(self.lu, lv_names)
7836
7837       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7838                              logical_id=(vgname, names[0]))
7839       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7840                              logical_id=(vgname, names[1]))
7841
7842       new_lvs = [lv_data, lv_meta]
7843       old_lvs = dev.children
7844       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7845
7846       # we pass force_create=True to force the LVM creation
7847       for new_lv in new_lvs:
7848         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7849                         _GetInstanceInfoText(self.instance), False)
7850
7851     return iv_names
7852
7853   def _CheckDevices(self, node_name, iv_names):
7854     for name, (dev, _, _) in iv_names.iteritems():
7855       self.cfg.SetDiskID(dev, node_name)
7856
7857       result = self.rpc.call_blockdev_find(node_name, dev)
7858
7859       msg = result.fail_msg
7860       if msg or not result.payload:
7861         if not msg:
7862           msg = "disk not found"
7863         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7864                                  (name, msg))
7865
7866       if result.payload.is_degraded:
7867         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7868
7869   def _RemoveOldStorage(self, node_name, iv_names):
7870     for name, (_, old_lvs, _) in iv_names.iteritems():
7871       self.lu.LogInfo("Remove logical volumes for %s" % name)
7872
7873       for lv in old_lvs:
7874         self.cfg.SetDiskID(lv, node_name)
7875
7876         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7877         if msg:
7878           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7879                              hint="remove unused LVs manually")
7880
7881   def _ReleaseNodeLock(self, node_name):
7882     """Releases the lock for a given node."""
7883     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7884
7885   def _ExecDrbd8DiskOnly(self, feedback_fn):
7886     """Replace a disk on the primary or secondary for DRBD 8.
7887
7888     The algorithm for replace is quite complicated:
7889
7890       1. for each disk to be replaced:
7891
7892         1. create new LVs on the target node with unique names
7893         1. detach old LVs from the drbd device
7894         1. rename old LVs to name_replaced.<time_t>
7895         1. rename new LVs to old LVs
7896         1. attach the new LVs (with the old names now) to the drbd device
7897
7898       1. wait for sync across all devices
7899
7900       1. for each modified disk:
7901
7902         1. remove old LVs (which have the name name_replaces.<time_t>)
7903
7904     Failures are not very well handled.
7905
7906     """
7907     steps_total = 6
7908
7909     # Step: check device activation
7910     self.lu.LogStep(1, steps_total, "Check device existence")
7911     self._CheckDisksExistence([self.other_node, self.target_node])
7912     self._CheckVolumeGroup([self.target_node, self.other_node])
7913
7914     # Step: check other node consistency
7915     self.lu.LogStep(2, steps_total, "Check peer consistency")
7916     self._CheckDisksConsistency(self.other_node,
7917                                 self.other_node == self.instance.primary_node,
7918                                 False)
7919
7920     # Step: create new storage
7921     self.lu.LogStep(3, steps_total, "Allocate new storage")
7922     iv_names = self._CreateNewStorage(self.target_node)
7923
7924     # Step: for each lv, detach+rename*2+attach
7925     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7926     for dev, old_lvs, new_lvs in iv_names.itervalues():
7927       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7928
7929       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7930                                                      old_lvs)
7931       result.Raise("Can't detach drbd from local storage on node"
7932                    " %s for device %s" % (self.target_node, dev.iv_name))
7933       #dev.children = []
7934       #cfg.Update(instance)
7935
7936       # ok, we created the new LVs, so now we know we have the needed
7937       # storage; as such, we proceed on the target node to rename
7938       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7939       # using the assumption that logical_id == physical_id (which in
7940       # turn is the unique_id on that node)
7941
7942       # FIXME(iustin): use a better name for the replaced LVs
7943       temp_suffix = int(time.time())
7944       ren_fn = lambda d, suff: (d.physical_id[0],
7945                                 d.physical_id[1] + "_replaced-%s" % suff)
7946
7947       # Build the rename list based on what LVs exist on the node
7948       rename_old_to_new = []
7949       for to_ren in old_lvs:
7950         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7951         if not result.fail_msg and result.payload:
7952           # device exists
7953           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7954
7955       self.lu.LogInfo("Renaming the old LVs on the target node")
7956       result = self.rpc.call_blockdev_rename(self.target_node,
7957                                              rename_old_to_new)
7958       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7959
7960       # Now we rename the new LVs to the old LVs
7961       self.lu.LogInfo("Renaming the new LVs on the target node")
7962       rename_new_to_old = [(new, old.physical_id)
7963                            for old, new in zip(old_lvs, new_lvs)]
7964       result = self.rpc.call_blockdev_rename(self.target_node,
7965                                              rename_new_to_old)
7966       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7967
7968       for old, new in zip(old_lvs, new_lvs):
7969         new.logical_id = old.logical_id
7970         self.cfg.SetDiskID(new, self.target_node)
7971
7972       for disk in old_lvs:
7973         disk.logical_id = ren_fn(disk, temp_suffix)
7974         self.cfg.SetDiskID(disk, self.target_node)
7975
7976       # Now that the new lvs have the old name, we can add them to the device
7977       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7978       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7979                                                   new_lvs)
7980       msg = result.fail_msg
7981       if msg:
7982         for new_lv in new_lvs:
7983           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7984                                                new_lv).fail_msg
7985           if msg2:
7986             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7987                                hint=("cleanup manually the unused logical"
7988                                      "volumes"))
7989         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7990
7991       dev.children = new_lvs
7992
7993       self.cfg.Update(self.instance, feedback_fn)
7994
7995     cstep = 5
7996     if self.early_release:
7997       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7998       cstep += 1
7999       self._RemoveOldStorage(self.target_node, iv_names)
8000       # WARNING: we release both node locks here, do not do other RPCs
8001       # than WaitForSync to the primary node
8002       self._ReleaseNodeLock([self.target_node, self.other_node])
8003
8004     # Wait for sync
8005     # This can fail as the old devices are degraded and _WaitForSync
8006     # does a combined result over all disks, so we don't check its return value
8007     self.lu.LogStep(cstep, steps_total, "Sync devices")
8008     cstep += 1
8009     _WaitForSync(self.lu, self.instance)
8010
8011     # Check all devices manually
8012     self._CheckDevices(self.instance.primary_node, iv_names)
8013
8014     # Step: remove old storage
8015     if not self.early_release:
8016       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8017       cstep += 1
8018       self._RemoveOldStorage(self.target_node, iv_names)
8019
8020   def _ExecDrbd8Secondary(self, feedback_fn):
8021     """Replace the secondary node for DRBD 8.
8022
8023     The algorithm for replace is quite complicated:
8024       - for all disks of the instance:
8025         - create new LVs on the new node with same names
8026         - shutdown the drbd device on the old secondary
8027         - disconnect the drbd network on the primary
8028         - create the drbd device on the new secondary
8029         - network attach the drbd on the primary, using an artifice:
8030           the drbd code for Attach() will connect to the network if it
8031           finds a device which is connected to the good local disks but
8032           not network enabled
8033       - wait for sync across all devices
8034       - remove all disks from the old secondary
8035
8036     Failures are not very well handled.
8037
8038     """
8039     steps_total = 6
8040
8041     # Step: check device activation
8042     self.lu.LogStep(1, steps_total, "Check device existence")
8043     self._CheckDisksExistence([self.instance.primary_node])
8044     self._CheckVolumeGroup([self.instance.primary_node])
8045
8046     # Step: check other node consistency
8047     self.lu.LogStep(2, steps_total, "Check peer consistency")
8048     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8049
8050     # Step: create new storage
8051     self.lu.LogStep(3, steps_total, "Allocate new storage")
8052     for idx, dev in enumerate(self.instance.disks):
8053       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8054                       (self.new_node, idx))
8055       # we pass force_create=True to force LVM creation
8056       for new_lv in dev.children:
8057         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8058                         _GetInstanceInfoText(self.instance), False)
8059
8060     # Step 4: dbrd minors and drbd setups changes
8061     # after this, we must manually remove the drbd minors on both the
8062     # error and the success paths
8063     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8064     minors = self.cfg.AllocateDRBDMinor([self.new_node
8065                                          for dev in self.instance.disks],
8066                                         self.instance.name)
8067     logging.debug("Allocated minors %r", minors)
8068
8069     iv_names = {}
8070     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8071       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8072                       (self.new_node, idx))
8073       # create new devices on new_node; note that we create two IDs:
8074       # one without port, so the drbd will be activated without
8075       # networking information on the new node at this stage, and one
8076       # with network, for the latter activation in step 4
8077       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8078       if self.instance.primary_node == o_node1:
8079         p_minor = o_minor1
8080       else:
8081         assert self.instance.primary_node == o_node2, "Three-node instance?"
8082         p_minor = o_minor2
8083
8084       new_alone_id = (self.instance.primary_node, self.new_node, None,
8085                       p_minor, new_minor, o_secret)
8086       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8087                     p_minor, new_minor, o_secret)
8088
8089       iv_names[idx] = (dev, dev.children, new_net_id)
8090       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8091                     new_net_id)
8092       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8093                               logical_id=new_alone_id,
8094                               children=dev.children,
8095                               size=dev.size)
8096       try:
8097         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8098                               _GetInstanceInfoText(self.instance), False)
8099       except errors.GenericError:
8100         self.cfg.ReleaseDRBDMinors(self.instance.name)
8101         raise
8102
8103     # We have new devices, shutdown the drbd on the old secondary
8104     for idx, dev in enumerate(self.instance.disks):
8105       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8106       self.cfg.SetDiskID(dev, self.target_node)
8107       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8108       if msg:
8109         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8110                            "node: %s" % (idx, msg),
8111                            hint=("Please cleanup this device manually as"
8112                                  " soon as possible"))
8113
8114     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8115     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8116                                                self.node_secondary_ip,
8117                                                self.instance.disks)\
8118                                               [self.instance.primary_node]
8119
8120     msg = result.fail_msg
8121     if msg:
8122       # detaches didn't succeed (unlikely)
8123       self.cfg.ReleaseDRBDMinors(self.instance.name)
8124       raise errors.OpExecError("Can't detach the disks from the network on"
8125                                " old node: %s" % (msg,))
8126
8127     # if we managed to detach at least one, we update all the disks of
8128     # the instance to point to the new secondary
8129     self.lu.LogInfo("Updating instance configuration")
8130     for dev, _, new_logical_id in iv_names.itervalues():
8131       dev.logical_id = new_logical_id
8132       self.cfg.SetDiskID(dev, self.instance.primary_node)
8133
8134     self.cfg.Update(self.instance, feedback_fn)
8135
8136     # and now perform the drbd attach
8137     self.lu.LogInfo("Attaching primary drbds to new secondary"
8138                     " (standalone => connected)")
8139     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8140                                             self.new_node],
8141                                            self.node_secondary_ip,
8142                                            self.instance.disks,
8143                                            self.instance.name,
8144                                            False)
8145     for to_node, to_result in result.items():
8146       msg = to_result.fail_msg
8147       if msg:
8148         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8149                            to_node, msg,
8150                            hint=("please do a gnt-instance info to see the"
8151                                  " status of disks"))
8152     cstep = 5
8153     if self.early_release:
8154       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8155       cstep += 1
8156       self._RemoveOldStorage(self.target_node, iv_names)
8157       # WARNING: we release all node locks here, do not do other RPCs
8158       # than WaitForSync to the primary node
8159       self._ReleaseNodeLock([self.instance.primary_node,
8160                              self.target_node,
8161                              self.new_node])
8162
8163     # Wait for sync
8164     # This can fail as the old devices are degraded and _WaitForSync
8165     # does a combined result over all disks, so we don't check its return value
8166     self.lu.LogStep(cstep, steps_total, "Sync devices")
8167     cstep += 1
8168     _WaitForSync(self.lu, self.instance)
8169
8170     # Check all devices manually
8171     self._CheckDevices(self.instance.primary_node, iv_names)
8172
8173     # Step: remove old storage
8174     if not self.early_release:
8175       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8176       self._RemoveOldStorage(self.target_node, iv_names)
8177
8178
8179 class LURepairNodeStorage(NoHooksLU):
8180   """Repairs the volume group on a node.
8181
8182   """
8183   _OP_PARAMS = [
8184     _PNodeName,
8185     ("storage_type", _NoDefault, _CheckStorageType),
8186     ("name", _NoDefault, _TNonEmptyString),
8187     ("ignore_consistency", False, _TBool),
8188     ]
8189   REQ_BGL = False
8190
8191   def CheckArguments(self):
8192     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8193
8194     storage_type = self.op.storage_type
8195
8196     if (constants.SO_FIX_CONSISTENCY not in
8197         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8198       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8199                                  " repaired" % storage_type,
8200                                  errors.ECODE_INVAL)
8201
8202   def ExpandNames(self):
8203     self.needed_locks = {
8204       locking.LEVEL_NODE: [self.op.node_name],
8205       }
8206
8207   def _CheckFaultyDisks(self, instance, node_name):
8208     """Ensure faulty disks abort the opcode or at least warn."""
8209     try:
8210       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8211                                   node_name, True):
8212         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8213                                    " node '%s'" % (instance.name, node_name),
8214                                    errors.ECODE_STATE)
8215     except errors.OpPrereqError, err:
8216       if self.op.ignore_consistency:
8217         self.proc.LogWarning(str(err.args[0]))
8218       else:
8219         raise
8220
8221   def CheckPrereq(self):
8222     """Check prerequisites.
8223
8224     """
8225     # Check whether any instance on this node has faulty disks
8226     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8227       if not inst.admin_up:
8228         continue
8229       check_nodes = set(inst.all_nodes)
8230       check_nodes.discard(self.op.node_name)
8231       for inst_node_name in check_nodes:
8232         self._CheckFaultyDisks(inst, inst_node_name)
8233
8234   def Exec(self, feedback_fn):
8235     feedback_fn("Repairing storage unit '%s' on %s ..." %
8236                 (self.op.name, self.op.node_name))
8237
8238     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8239     result = self.rpc.call_storage_execute(self.op.node_name,
8240                                            self.op.storage_type, st_args,
8241                                            self.op.name,
8242                                            constants.SO_FIX_CONSISTENCY)
8243     result.Raise("Failed to repair storage unit '%s' on %s" %
8244                  (self.op.name, self.op.node_name))
8245
8246
8247 class LUNodeEvacuationStrategy(NoHooksLU):
8248   """Computes the node evacuation strategy.
8249
8250   """
8251   _OP_PARAMS = [
8252     ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8253     ("remote_node", None, _TMaybeString),
8254     ("iallocator", None, _TMaybeString),
8255     ]
8256   REQ_BGL = False
8257
8258   def CheckArguments(self):
8259     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8260
8261   def ExpandNames(self):
8262     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8263     self.needed_locks = locks = {}
8264     if self.op.remote_node is None:
8265       locks[locking.LEVEL_NODE] = locking.ALL_SET
8266     else:
8267       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8268       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8269
8270   def Exec(self, feedback_fn):
8271     if self.op.remote_node is not None:
8272       instances = []
8273       for node in self.op.nodes:
8274         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8275       result = []
8276       for i in instances:
8277         if i.primary_node == self.op.remote_node:
8278           raise errors.OpPrereqError("Node %s is the primary node of"
8279                                      " instance %s, cannot use it as"
8280                                      " secondary" %
8281                                      (self.op.remote_node, i.name),
8282                                      errors.ECODE_INVAL)
8283         result.append([i.name, self.op.remote_node])
8284     else:
8285       ial = IAllocator(self.cfg, self.rpc,
8286                        mode=constants.IALLOCATOR_MODE_MEVAC,
8287                        evac_nodes=self.op.nodes)
8288       ial.Run(self.op.iallocator, validate=True)
8289       if not ial.success:
8290         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8291                                  errors.ECODE_NORES)
8292       result = ial.result
8293     return result
8294
8295
8296 class LUGrowDisk(LogicalUnit):
8297   """Grow a disk of an instance.
8298
8299   """
8300   HPATH = "disk-grow"
8301   HTYPE = constants.HTYPE_INSTANCE
8302   _OP_PARAMS = [
8303     _PInstanceName,
8304     ("disk", _NoDefault, _TInt),
8305     ("amount", _NoDefault, _TInt),
8306     ("wait_for_sync", True, _TBool),
8307     ]
8308   REQ_BGL = False
8309
8310   def ExpandNames(self):
8311     self._ExpandAndLockInstance()
8312     self.needed_locks[locking.LEVEL_NODE] = []
8313     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8314
8315   def DeclareLocks(self, level):
8316     if level == locking.LEVEL_NODE:
8317       self._LockInstancesNodes()
8318
8319   def BuildHooksEnv(self):
8320     """Build hooks env.
8321
8322     This runs on the master, the primary and all the secondaries.
8323
8324     """
8325     env = {
8326       "DISK": self.op.disk,
8327       "AMOUNT": self.op.amount,
8328       }
8329     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8330     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8331     return env, nl, nl
8332
8333   def CheckPrereq(self):
8334     """Check prerequisites.
8335
8336     This checks that the instance is in the cluster.
8337
8338     """
8339     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8340     assert instance is not None, \
8341       "Cannot retrieve locked instance %s" % self.op.instance_name
8342     nodenames = list(instance.all_nodes)
8343     for node in nodenames:
8344       _CheckNodeOnline(self, node)
8345
8346     self.instance = instance
8347
8348     if instance.disk_template not in constants.DTS_GROWABLE:
8349       raise errors.OpPrereqError("Instance's disk layout does not support"
8350                                  " growing.", errors.ECODE_INVAL)
8351
8352     self.disk = instance.FindDisk(self.op.disk)
8353
8354     if instance.disk_template != constants.DT_FILE:
8355       # TODO: check the free disk space for file, when that feature will be
8356       # supported
8357       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8358
8359   def Exec(self, feedback_fn):
8360     """Execute disk grow.
8361
8362     """
8363     instance = self.instance
8364     disk = self.disk
8365
8366     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8367     if not disks_ok:
8368       raise errors.OpExecError("Cannot activate block device to grow")
8369
8370     for node in instance.all_nodes:
8371       self.cfg.SetDiskID(disk, node)
8372       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8373       result.Raise("Grow request failed to node %s" % node)
8374
8375       # TODO: Rewrite code to work properly
8376       # DRBD goes into sync mode for a short amount of time after executing the
8377       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8378       # calling "resize" in sync mode fails. Sleeping for a short amount of
8379       # time is a work-around.
8380       time.sleep(5)
8381
8382     disk.RecordGrow(self.op.amount)
8383     self.cfg.Update(instance, feedback_fn)
8384     if self.op.wait_for_sync:
8385       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8386       if disk_abort:
8387         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8388                              " status.\nPlease check the instance.")
8389       if not instance.admin_up:
8390         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8391     elif not instance.admin_up:
8392       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8393                            " not supposed to be running because no wait for"
8394                            " sync mode was requested.")
8395
8396
8397 class LUQueryInstanceData(NoHooksLU):
8398   """Query runtime instance data.
8399
8400   """
8401   _OP_PARAMS = [
8402     ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8403     ("static", False, _TBool),
8404     ]
8405   REQ_BGL = False
8406
8407   def ExpandNames(self):
8408     self.needed_locks = {}
8409     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8410
8411     if self.op.instances:
8412       self.wanted_names = []
8413       for name in self.op.instances:
8414         full_name = _ExpandInstanceName(self.cfg, name)
8415         self.wanted_names.append(full_name)
8416       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8417     else:
8418       self.wanted_names = None
8419       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8420
8421     self.needed_locks[locking.LEVEL_NODE] = []
8422     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8423
8424   def DeclareLocks(self, level):
8425     if level == locking.LEVEL_NODE:
8426       self._LockInstancesNodes()
8427
8428   def CheckPrereq(self):
8429     """Check prerequisites.
8430
8431     This only checks the optional instance list against the existing names.
8432
8433     """
8434     if self.wanted_names is None:
8435       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8436
8437     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8438                              in self.wanted_names]
8439
8440   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8441     """Returns the status of a block device
8442
8443     """
8444     if self.op.static or not node:
8445       return None
8446
8447     self.cfg.SetDiskID(dev, node)
8448
8449     result = self.rpc.call_blockdev_find(node, dev)
8450     if result.offline:
8451       return None
8452
8453     result.Raise("Can't compute disk status for %s" % instance_name)
8454
8455     status = result.payload
8456     if status is None:
8457       return None
8458
8459     return (status.dev_path, status.major, status.minor,
8460             status.sync_percent, status.estimated_time,
8461             status.is_degraded, status.ldisk_status)
8462
8463   def _ComputeDiskStatus(self, instance, snode, dev):
8464     """Compute block device status.
8465
8466     """
8467     if dev.dev_type in constants.LDS_DRBD:
8468       # we change the snode then (otherwise we use the one passed in)
8469       if dev.logical_id[0] == instance.primary_node:
8470         snode = dev.logical_id[1]
8471       else:
8472         snode = dev.logical_id[0]
8473
8474     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8475                                               instance.name, dev)
8476     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8477
8478     if dev.children:
8479       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8480                       for child in dev.children]
8481     else:
8482       dev_children = []
8483
8484     data = {
8485       "iv_name": dev.iv_name,
8486       "dev_type": dev.dev_type,
8487       "logical_id": dev.logical_id,
8488       "physical_id": dev.physical_id,
8489       "pstatus": dev_pstatus,
8490       "sstatus": dev_sstatus,
8491       "children": dev_children,
8492       "mode": dev.mode,
8493       "size": dev.size,
8494       }
8495
8496     return data
8497
8498   def Exec(self, feedback_fn):
8499     """Gather and return data"""
8500     result = {}
8501
8502     cluster = self.cfg.GetClusterInfo()
8503
8504     for instance in self.wanted_instances:
8505       if not self.op.static:
8506         remote_info = self.rpc.call_instance_info(instance.primary_node,
8507                                                   instance.name,
8508                                                   instance.hypervisor)
8509         remote_info.Raise("Error checking node %s" % instance.primary_node)
8510         remote_info = remote_info.payload
8511         if remote_info and "state" in remote_info:
8512           remote_state = "up"
8513         else:
8514           remote_state = "down"
8515       else:
8516         remote_state = None
8517       if instance.admin_up:
8518         config_state = "up"
8519       else:
8520         config_state = "down"
8521
8522       disks = [self._ComputeDiskStatus(instance, None, device)
8523                for device in instance.disks]
8524
8525       idict = {
8526         "name": instance.name,
8527         "config_state": config_state,
8528         "run_state": remote_state,
8529         "pnode": instance.primary_node,
8530         "snodes": instance.secondary_nodes,
8531         "os": instance.os,
8532         # this happens to be the same format used for hooks
8533         "nics": _NICListToTuple(self, instance.nics),
8534         "disk_template": instance.disk_template,
8535         "disks": disks,
8536         "hypervisor": instance.hypervisor,
8537         "network_port": instance.network_port,
8538         "hv_instance": instance.hvparams,
8539         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8540         "be_instance": instance.beparams,
8541         "be_actual": cluster.FillBE(instance),
8542         "os_instance": instance.osparams,
8543         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8544         "serial_no": instance.serial_no,
8545         "mtime": instance.mtime,
8546         "ctime": instance.ctime,
8547         "uuid": instance.uuid,
8548         }
8549
8550       result[instance.name] = idict
8551
8552     return result
8553
8554
8555 class LUSetInstanceParams(LogicalUnit):
8556   """Modifies an instances's parameters.
8557
8558   """
8559   HPATH = "instance-modify"
8560   HTYPE = constants.HTYPE_INSTANCE
8561   _OP_PARAMS = [
8562     _PInstanceName,
8563     ("nics", _EmptyList, _TList),
8564     ("disks", _EmptyList, _TList),
8565     ("beparams", _EmptyDict, _TDict),
8566     ("hvparams", _EmptyDict, _TDict),
8567     ("disk_template", None, _TMaybeString),
8568     ("remote_node", None, _TMaybeString),
8569     ("os_name", None, _TMaybeString),
8570     ("force_variant", False, _TBool),
8571     ("osparams", None, _TOr(_TDict, _TNone)),
8572     _PForce,
8573     ]
8574   REQ_BGL = False
8575
8576   def CheckArguments(self):
8577     if not (self.op.nics or self.op.disks or self.op.disk_template or
8578             self.op.hvparams or self.op.beparams or self.op.os_name):
8579       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8580
8581     if self.op.hvparams:
8582       _CheckGlobalHvParams(self.op.hvparams)
8583
8584     # Disk validation
8585     disk_addremove = 0
8586     for disk_op, disk_dict in self.op.disks:
8587       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8588       if disk_op == constants.DDM_REMOVE:
8589         disk_addremove += 1
8590         continue
8591       elif disk_op == constants.DDM_ADD:
8592         disk_addremove += 1
8593       else:
8594         if not isinstance(disk_op, int):
8595           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8596         if not isinstance(disk_dict, dict):
8597           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8598           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8599
8600       if disk_op == constants.DDM_ADD:
8601         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8602         if mode not in constants.DISK_ACCESS_SET:
8603           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8604                                      errors.ECODE_INVAL)
8605         size = disk_dict.get('size', None)
8606         if size is None:
8607           raise errors.OpPrereqError("Required disk parameter size missing",
8608                                      errors.ECODE_INVAL)
8609         try:
8610           size = int(size)
8611         except (TypeError, ValueError), err:
8612           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8613                                      str(err), errors.ECODE_INVAL)
8614         disk_dict['size'] = size
8615       else:
8616         # modification of disk
8617         if 'size' in disk_dict:
8618           raise errors.OpPrereqError("Disk size change not possible, use"
8619                                      " grow-disk", errors.ECODE_INVAL)
8620
8621     if disk_addremove > 1:
8622       raise errors.OpPrereqError("Only one disk add or remove operation"
8623                                  " supported at a time", errors.ECODE_INVAL)
8624
8625     if self.op.disks and self.op.disk_template is not None:
8626       raise errors.OpPrereqError("Disk template conversion and other disk"
8627                                  " changes not supported at the same time",
8628                                  errors.ECODE_INVAL)
8629
8630     if self.op.disk_template:
8631       _CheckDiskTemplate(self.op.disk_template)
8632       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8633           self.op.remote_node is None):
8634         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8635                                    " one requires specifying a secondary node",
8636                                    errors.ECODE_INVAL)
8637
8638     # NIC validation
8639     nic_addremove = 0
8640     for nic_op, nic_dict in self.op.nics:
8641       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8642       if nic_op == constants.DDM_REMOVE:
8643         nic_addremove += 1
8644         continue
8645       elif nic_op == constants.DDM_ADD:
8646         nic_addremove += 1
8647       else:
8648         if not isinstance(nic_op, int):
8649           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8650         if not isinstance(nic_dict, dict):
8651           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8652           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8653
8654       # nic_dict should be a dict
8655       nic_ip = nic_dict.get('ip', None)
8656       if nic_ip is not None:
8657         if nic_ip.lower() == constants.VALUE_NONE:
8658           nic_dict['ip'] = None
8659         else:
8660           if not netutils.IsValidIP4(nic_ip):
8661             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8662                                        errors.ECODE_INVAL)
8663
8664       nic_bridge = nic_dict.get('bridge', None)
8665       nic_link = nic_dict.get('link', None)
8666       if nic_bridge and nic_link:
8667         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8668                                    " at the same time", errors.ECODE_INVAL)
8669       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8670         nic_dict['bridge'] = None
8671       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8672         nic_dict['link'] = None
8673
8674       if nic_op == constants.DDM_ADD:
8675         nic_mac = nic_dict.get('mac', None)
8676         if nic_mac is None:
8677           nic_dict['mac'] = constants.VALUE_AUTO
8678
8679       if 'mac' in nic_dict:
8680         nic_mac = nic_dict['mac']
8681         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8682           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8683
8684         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8685           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8686                                      " modifying an existing nic",
8687                                      errors.ECODE_INVAL)
8688
8689     if nic_addremove > 1:
8690       raise errors.OpPrereqError("Only one NIC add or remove operation"
8691                                  " supported at a time", errors.ECODE_INVAL)
8692
8693   def ExpandNames(self):
8694     self._ExpandAndLockInstance()
8695     self.needed_locks[locking.LEVEL_NODE] = []
8696     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8697
8698   def DeclareLocks(self, level):
8699     if level == locking.LEVEL_NODE:
8700       self._LockInstancesNodes()
8701       if self.op.disk_template and self.op.remote_node:
8702         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8703         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8704
8705   def BuildHooksEnv(self):
8706     """Build hooks env.
8707
8708     This runs on the master, primary and secondaries.
8709
8710     """
8711     args = dict()
8712     if constants.BE_MEMORY in self.be_new:
8713       args['memory'] = self.be_new[constants.BE_MEMORY]
8714     if constants.BE_VCPUS in self.be_new:
8715       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8716     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8717     # information at all.
8718     if self.op.nics:
8719       args['nics'] = []
8720       nic_override = dict(self.op.nics)
8721       for idx, nic in enumerate(self.instance.nics):
8722         if idx in nic_override:
8723           this_nic_override = nic_override[idx]
8724         else:
8725           this_nic_override = {}
8726         if 'ip' in this_nic_override:
8727           ip = this_nic_override['ip']
8728         else:
8729           ip = nic.ip
8730         if 'mac' in this_nic_override:
8731           mac = this_nic_override['mac']
8732         else:
8733           mac = nic.mac
8734         if idx in self.nic_pnew:
8735           nicparams = self.nic_pnew[idx]
8736         else:
8737           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8738         mode = nicparams[constants.NIC_MODE]
8739         link = nicparams[constants.NIC_LINK]
8740         args['nics'].append((ip, mac, mode, link))
8741       if constants.DDM_ADD in nic_override:
8742         ip = nic_override[constants.DDM_ADD].get('ip', None)
8743         mac = nic_override[constants.DDM_ADD]['mac']
8744         nicparams = self.nic_pnew[constants.DDM_ADD]
8745         mode = nicparams[constants.NIC_MODE]
8746         link = nicparams[constants.NIC_LINK]
8747         args['nics'].append((ip, mac, mode, link))
8748       elif constants.DDM_REMOVE in nic_override:
8749         del args['nics'][-1]
8750
8751     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8752     if self.op.disk_template:
8753       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8754     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8755     return env, nl, nl
8756
8757   def CheckPrereq(self):
8758     """Check prerequisites.
8759
8760     This only checks the instance list against the existing names.
8761
8762     """
8763     # checking the new params on the primary/secondary nodes
8764
8765     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8766     cluster = self.cluster = self.cfg.GetClusterInfo()
8767     assert self.instance is not None, \
8768       "Cannot retrieve locked instance %s" % self.op.instance_name
8769     pnode = instance.primary_node
8770     nodelist = list(instance.all_nodes)
8771
8772     # OS change
8773     if self.op.os_name and not self.op.force:
8774       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8775                       self.op.force_variant)
8776       instance_os = self.op.os_name
8777     else:
8778       instance_os = instance.os
8779
8780     if self.op.disk_template:
8781       if instance.disk_template == self.op.disk_template:
8782         raise errors.OpPrereqError("Instance already has disk template %s" %
8783                                    instance.disk_template, errors.ECODE_INVAL)
8784
8785       if (instance.disk_template,
8786           self.op.disk_template) not in self._DISK_CONVERSIONS:
8787         raise errors.OpPrereqError("Unsupported disk template conversion from"
8788                                    " %s to %s" % (instance.disk_template,
8789                                                   self.op.disk_template),
8790                                    errors.ECODE_INVAL)
8791       _CheckInstanceDown(self, instance, "cannot change disk template")
8792       if self.op.disk_template in constants.DTS_NET_MIRROR:
8793         if self.op.remote_node == pnode:
8794           raise errors.OpPrereqError("Given new secondary node %s is the same"
8795                                      " as the primary node of the instance" %
8796                                      self.op.remote_node, errors.ECODE_STATE)
8797         _CheckNodeOnline(self, self.op.remote_node)
8798         _CheckNodeNotDrained(self, self.op.remote_node)
8799         disks = [{"size": d.size} for d in instance.disks]
8800         required = _ComputeDiskSize(self.op.disk_template, disks)
8801         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8802
8803     # hvparams processing
8804     if self.op.hvparams:
8805       hv_type = instance.hypervisor
8806       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8807       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8808       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8809
8810       # local check
8811       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8812       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8813       self.hv_new = hv_new # the new actual values
8814       self.hv_inst = i_hvdict # the new dict (without defaults)
8815     else:
8816       self.hv_new = self.hv_inst = {}
8817
8818     # beparams processing
8819     if self.op.beparams:
8820       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8821                                    use_none=True)
8822       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8823       be_new = cluster.SimpleFillBE(i_bedict)
8824       self.be_new = be_new # the new actual values
8825       self.be_inst = i_bedict # the new dict (without defaults)
8826     else:
8827       self.be_new = self.be_inst = {}
8828
8829     # osparams processing
8830     if self.op.osparams:
8831       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8832       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8833       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8834       self.os_inst = i_osdict # the new dict (without defaults)
8835     else:
8836       self.os_new = self.os_inst = {}
8837
8838     self.warn = []
8839
8840     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8841       mem_check_list = [pnode]
8842       if be_new[constants.BE_AUTO_BALANCE]:
8843         # either we changed auto_balance to yes or it was from before
8844         mem_check_list.extend(instance.secondary_nodes)
8845       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8846                                                   instance.hypervisor)
8847       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8848                                          instance.hypervisor)
8849       pninfo = nodeinfo[pnode]
8850       msg = pninfo.fail_msg
8851       if msg:
8852         # Assume the primary node is unreachable and go ahead
8853         self.warn.append("Can't get info from primary node %s: %s" %
8854                          (pnode,  msg))
8855       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8856         self.warn.append("Node data from primary node %s doesn't contain"
8857                          " free memory information" % pnode)
8858       elif instance_info.fail_msg:
8859         self.warn.append("Can't get instance runtime information: %s" %
8860                         instance_info.fail_msg)
8861       else:
8862         if instance_info.payload:
8863           current_mem = int(instance_info.payload['memory'])
8864         else:
8865           # Assume instance not running
8866           # (there is a slight race condition here, but it's not very probable,
8867           # and we have no other way to check)
8868           current_mem = 0
8869         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8870                     pninfo.payload['memory_free'])
8871         if miss_mem > 0:
8872           raise errors.OpPrereqError("This change will prevent the instance"
8873                                      " from starting, due to %d MB of memory"
8874                                      " missing on its primary node" % miss_mem,
8875                                      errors.ECODE_NORES)
8876
8877       if be_new[constants.BE_AUTO_BALANCE]:
8878         for node, nres in nodeinfo.items():
8879           if node not in instance.secondary_nodes:
8880             continue
8881           msg = nres.fail_msg
8882           if msg:
8883             self.warn.append("Can't get info from secondary node %s: %s" %
8884                              (node, msg))
8885           elif not isinstance(nres.payload.get('memory_free', None), int):
8886             self.warn.append("Secondary node %s didn't return free"
8887                              " memory information" % node)
8888           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8889             self.warn.append("Not enough memory to failover instance to"
8890                              " secondary node %s" % node)
8891
8892     # NIC processing
8893     self.nic_pnew = {}
8894     self.nic_pinst = {}
8895     for nic_op, nic_dict in self.op.nics:
8896       if nic_op == constants.DDM_REMOVE:
8897         if not instance.nics:
8898           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8899                                      errors.ECODE_INVAL)
8900         continue
8901       if nic_op != constants.DDM_ADD:
8902         # an existing nic
8903         if not instance.nics:
8904           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8905                                      " no NICs" % nic_op,
8906                                      errors.ECODE_INVAL)
8907         if nic_op < 0 or nic_op >= len(instance.nics):
8908           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8909                                      " are 0 to %d" %
8910                                      (nic_op, len(instance.nics) - 1),
8911                                      errors.ECODE_INVAL)
8912         old_nic_params = instance.nics[nic_op].nicparams
8913         old_nic_ip = instance.nics[nic_op].ip
8914       else:
8915         old_nic_params = {}
8916         old_nic_ip = None
8917
8918       update_params_dict = dict([(key, nic_dict[key])
8919                                  for key in constants.NICS_PARAMETERS
8920                                  if key in nic_dict])
8921
8922       if 'bridge' in nic_dict:
8923         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8924
8925       new_nic_params = _GetUpdatedParams(old_nic_params,
8926                                          update_params_dict)
8927       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8928       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8929       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8930       self.nic_pinst[nic_op] = new_nic_params
8931       self.nic_pnew[nic_op] = new_filled_nic_params
8932       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8933
8934       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8935         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8936         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8937         if msg:
8938           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8939           if self.op.force:
8940             self.warn.append(msg)
8941           else:
8942             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8943       if new_nic_mode == constants.NIC_MODE_ROUTED:
8944         if 'ip' in nic_dict:
8945           nic_ip = nic_dict['ip']
8946         else:
8947           nic_ip = old_nic_ip
8948         if nic_ip is None:
8949           raise errors.OpPrereqError('Cannot set the nic ip to None'
8950                                      ' on a routed nic', errors.ECODE_INVAL)
8951       if 'mac' in nic_dict:
8952         nic_mac = nic_dict['mac']
8953         if nic_mac is None:
8954           raise errors.OpPrereqError('Cannot set the nic mac to None',
8955                                      errors.ECODE_INVAL)
8956         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8957           # otherwise generate the mac
8958           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8959         else:
8960           # or validate/reserve the current one
8961           try:
8962             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8963           except errors.ReservationError:
8964             raise errors.OpPrereqError("MAC address %s already in use"
8965                                        " in cluster" % nic_mac,
8966                                        errors.ECODE_NOTUNIQUE)
8967
8968     # DISK processing
8969     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8970       raise errors.OpPrereqError("Disk operations not supported for"
8971                                  " diskless instances",
8972                                  errors.ECODE_INVAL)
8973     for disk_op, _ in self.op.disks:
8974       if disk_op == constants.DDM_REMOVE:
8975         if len(instance.disks) == 1:
8976           raise errors.OpPrereqError("Cannot remove the last disk of"
8977                                      " an instance", errors.ECODE_INVAL)
8978         _CheckInstanceDown(self, instance, "cannot remove disks")
8979
8980       if (disk_op == constants.DDM_ADD and
8981           len(instance.nics) >= constants.MAX_DISKS):
8982         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8983                                    " add more" % constants.MAX_DISKS,
8984                                    errors.ECODE_STATE)
8985       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8986         # an existing disk
8987         if disk_op < 0 or disk_op >= len(instance.disks):
8988           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8989                                      " are 0 to %d" %
8990                                      (disk_op, len(instance.disks)),
8991                                      errors.ECODE_INVAL)
8992
8993     return
8994
8995   def _ConvertPlainToDrbd(self, feedback_fn):
8996     """Converts an instance from plain to drbd.
8997
8998     """
8999     feedback_fn("Converting template to drbd")
9000     instance = self.instance
9001     pnode = instance.primary_node
9002     snode = self.op.remote_node
9003
9004     # create a fake disk info for _GenerateDiskTemplate
9005     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9006     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9007                                       instance.name, pnode, [snode],
9008                                       disk_info, None, None, 0)
9009     info = _GetInstanceInfoText(instance)
9010     feedback_fn("Creating aditional volumes...")
9011     # first, create the missing data and meta devices
9012     for disk in new_disks:
9013       # unfortunately this is... not too nice
9014       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9015                             info, True)
9016       for child in disk.children:
9017         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9018     # at this stage, all new LVs have been created, we can rename the
9019     # old ones
9020     feedback_fn("Renaming original volumes...")
9021     rename_list = [(o, n.children[0].logical_id)
9022                    for (o, n) in zip(instance.disks, new_disks)]
9023     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9024     result.Raise("Failed to rename original LVs")
9025
9026     feedback_fn("Initializing DRBD devices...")
9027     # all child devices are in place, we can now create the DRBD devices
9028     for disk in new_disks:
9029       for node in [pnode, snode]:
9030         f_create = node == pnode
9031         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9032
9033     # at this point, the instance has been modified
9034     instance.disk_template = constants.DT_DRBD8
9035     instance.disks = new_disks
9036     self.cfg.Update(instance, feedback_fn)
9037
9038     # disks are created, waiting for sync
9039     disk_abort = not _WaitForSync(self, instance)
9040     if disk_abort:
9041       raise errors.OpExecError("There are some degraded disks for"
9042                                " this instance, please cleanup manually")
9043
9044   def _ConvertDrbdToPlain(self, feedback_fn):
9045     """Converts an instance from drbd to plain.
9046
9047     """
9048     instance = self.instance
9049     assert len(instance.secondary_nodes) == 1
9050     pnode = instance.primary_node
9051     snode = instance.secondary_nodes[0]
9052     feedback_fn("Converting template to plain")
9053
9054     old_disks = instance.disks
9055     new_disks = [d.children[0] for d in old_disks]
9056
9057     # copy over size and mode
9058     for parent, child in zip(old_disks, new_disks):
9059       child.size = parent.size
9060       child.mode = parent.mode
9061
9062     # update instance structure
9063     instance.disks = new_disks
9064     instance.disk_template = constants.DT_PLAIN
9065     self.cfg.Update(instance, feedback_fn)
9066
9067     feedback_fn("Removing volumes on the secondary node...")
9068     for disk in old_disks:
9069       self.cfg.SetDiskID(disk, snode)
9070       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9071       if msg:
9072         self.LogWarning("Could not remove block device %s on node %s,"
9073                         " continuing anyway: %s", disk.iv_name, snode, msg)
9074
9075     feedback_fn("Removing unneeded volumes on the primary node...")
9076     for idx, disk in enumerate(old_disks):
9077       meta = disk.children[1]
9078       self.cfg.SetDiskID(meta, pnode)
9079       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9080       if msg:
9081         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9082                         " continuing anyway: %s", idx, pnode, msg)
9083
9084
9085   def Exec(self, feedback_fn):
9086     """Modifies an instance.
9087
9088     All parameters take effect only at the next restart of the instance.
9089
9090     """
9091     # Process here the warnings from CheckPrereq, as we don't have a
9092     # feedback_fn there.
9093     for warn in self.warn:
9094       feedback_fn("WARNING: %s" % warn)
9095
9096     result = []
9097     instance = self.instance
9098     # disk changes
9099     for disk_op, disk_dict in self.op.disks:
9100       if disk_op == constants.DDM_REMOVE:
9101         # remove the last disk
9102         device = instance.disks.pop()
9103         device_idx = len(instance.disks)
9104         for node, disk in device.ComputeNodeTree(instance.primary_node):
9105           self.cfg.SetDiskID(disk, node)
9106           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9107           if msg:
9108             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9109                             " continuing anyway", device_idx, node, msg)
9110         result.append(("disk/%d" % device_idx, "remove"))
9111       elif disk_op == constants.DDM_ADD:
9112         # add a new disk
9113         if instance.disk_template == constants.DT_FILE:
9114           file_driver, file_path = instance.disks[0].logical_id
9115           file_path = os.path.dirname(file_path)
9116         else:
9117           file_driver = file_path = None
9118         disk_idx_base = len(instance.disks)
9119         new_disk = _GenerateDiskTemplate(self,
9120                                          instance.disk_template,
9121                                          instance.name, instance.primary_node,
9122                                          instance.secondary_nodes,
9123                                          [disk_dict],
9124                                          file_path,
9125                                          file_driver,
9126                                          disk_idx_base)[0]
9127         instance.disks.append(new_disk)
9128         info = _GetInstanceInfoText(instance)
9129
9130         logging.info("Creating volume %s for instance %s",
9131                      new_disk.iv_name, instance.name)
9132         # Note: this needs to be kept in sync with _CreateDisks
9133         #HARDCODE
9134         for node in instance.all_nodes:
9135           f_create = node == instance.primary_node
9136           try:
9137             _CreateBlockDev(self, node, instance, new_disk,
9138                             f_create, info, f_create)
9139           except errors.OpExecError, err:
9140             self.LogWarning("Failed to create volume %s (%s) on"
9141                             " node %s: %s",
9142                             new_disk.iv_name, new_disk, node, err)
9143         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9144                        (new_disk.size, new_disk.mode)))
9145       else:
9146         # change a given disk
9147         instance.disks[disk_op].mode = disk_dict['mode']
9148         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9149
9150     if self.op.disk_template:
9151       r_shut = _ShutdownInstanceDisks(self, instance)
9152       if not r_shut:
9153         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9154                                  " proceed with disk template conversion")
9155       mode = (instance.disk_template, self.op.disk_template)
9156       try:
9157         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9158       except:
9159         self.cfg.ReleaseDRBDMinors(instance.name)
9160         raise
9161       result.append(("disk_template", self.op.disk_template))
9162
9163     # NIC changes
9164     for nic_op, nic_dict in self.op.nics:
9165       if nic_op == constants.DDM_REMOVE:
9166         # remove the last nic
9167         del instance.nics[-1]
9168         result.append(("nic.%d" % len(instance.nics), "remove"))
9169       elif nic_op == constants.DDM_ADD:
9170         # mac and bridge should be set, by now
9171         mac = nic_dict['mac']
9172         ip = nic_dict.get('ip', None)
9173         nicparams = self.nic_pinst[constants.DDM_ADD]
9174         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9175         instance.nics.append(new_nic)
9176         result.append(("nic.%d" % (len(instance.nics) - 1),
9177                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9178                        (new_nic.mac, new_nic.ip,
9179                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9180                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9181                        )))
9182       else:
9183         for key in 'mac', 'ip':
9184           if key in nic_dict:
9185             setattr(instance.nics[nic_op], key, nic_dict[key])
9186         if nic_op in self.nic_pinst:
9187           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9188         for key, val in nic_dict.iteritems():
9189           result.append(("nic.%s/%d" % (key, nic_op), val))
9190
9191     # hvparams changes
9192     if self.op.hvparams:
9193       instance.hvparams = self.hv_inst
9194       for key, val in self.op.hvparams.iteritems():
9195         result.append(("hv/%s" % key, val))
9196
9197     # beparams changes
9198     if self.op.beparams:
9199       instance.beparams = self.be_inst
9200       for key, val in self.op.beparams.iteritems():
9201         result.append(("be/%s" % key, val))
9202
9203     # OS change
9204     if self.op.os_name:
9205       instance.os = self.op.os_name
9206
9207     # osparams changes
9208     if self.op.osparams:
9209       instance.osparams = self.os_inst
9210       for key, val in self.op.osparams.iteritems():
9211         result.append(("os/%s" % key, val))
9212
9213     self.cfg.Update(instance, feedback_fn)
9214
9215     return result
9216
9217   _DISK_CONVERSIONS = {
9218     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9219     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9220     }
9221
9222
9223 class LUQueryExports(NoHooksLU):
9224   """Query the exports list
9225
9226   """
9227   _OP_PARAMS = [
9228     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9229     ("use_locking", False, _TBool),
9230     ]
9231   REQ_BGL = False
9232
9233   def ExpandNames(self):
9234     self.needed_locks = {}
9235     self.share_locks[locking.LEVEL_NODE] = 1
9236     if not self.op.nodes:
9237       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9238     else:
9239       self.needed_locks[locking.LEVEL_NODE] = \
9240         _GetWantedNodes(self, self.op.nodes)
9241
9242   def Exec(self, feedback_fn):
9243     """Compute the list of all the exported system images.
9244
9245     @rtype: dict
9246     @return: a dictionary with the structure node->(export-list)
9247         where export-list is a list of the instances exported on
9248         that node.
9249
9250     """
9251     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9252     rpcresult = self.rpc.call_export_list(self.nodes)
9253     result = {}
9254     for node in rpcresult:
9255       if rpcresult[node].fail_msg:
9256         result[node] = False
9257       else:
9258         result[node] = rpcresult[node].payload
9259
9260     return result
9261
9262
9263 class LUPrepareExport(NoHooksLU):
9264   """Prepares an instance for an export and returns useful information.
9265
9266   """
9267   _OP_PARAMS = [
9268     _PInstanceName,
9269     ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9270     ]
9271   REQ_BGL = False
9272
9273   def ExpandNames(self):
9274     self._ExpandAndLockInstance()
9275
9276   def CheckPrereq(self):
9277     """Check prerequisites.
9278
9279     """
9280     instance_name = self.op.instance_name
9281
9282     self.instance = self.cfg.GetInstanceInfo(instance_name)
9283     assert self.instance is not None, \
9284           "Cannot retrieve locked instance %s" % self.op.instance_name
9285     _CheckNodeOnline(self, self.instance.primary_node)
9286
9287     self._cds = _GetClusterDomainSecret()
9288
9289   def Exec(self, feedback_fn):
9290     """Prepares an instance for an export.
9291
9292     """
9293     instance = self.instance
9294
9295     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9296       salt = utils.GenerateSecret(8)
9297
9298       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9299       result = self.rpc.call_x509_cert_create(instance.primary_node,
9300                                               constants.RIE_CERT_VALIDITY)
9301       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9302
9303       (name, cert_pem) = result.payload
9304
9305       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9306                                              cert_pem)
9307
9308       return {
9309         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9310         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9311                           salt),
9312         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9313         }
9314
9315     return None
9316
9317
9318 class LUExportInstance(LogicalUnit):
9319   """Export an instance to an image in the cluster.
9320
9321   """
9322   HPATH = "instance-export"
9323   HTYPE = constants.HTYPE_INSTANCE
9324   _OP_PARAMS = [
9325     _PInstanceName,
9326     ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9327     ("shutdown", True, _TBool),
9328     _PShutdownTimeout,
9329     ("remove_instance", False, _TBool),
9330     ("ignore_remove_failures", False, _TBool),
9331     ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9332     ("x509_key_name", None, _TOr(_TList, _TNone)),
9333     ("destination_x509_ca", None, _TMaybeString),
9334     ]
9335   REQ_BGL = False
9336
9337   def CheckArguments(self):
9338     """Check the arguments.
9339
9340     """
9341     self.x509_key_name = self.op.x509_key_name
9342     self.dest_x509_ca_pem = self.op.destination_x509_ca
9343
9344     if self.op.remove_instance and not self.op.shutdown:
9345       raise errors.OpPrereqError("Can not remove instance without shutting it"
9346                                  " down before")
9347
9348     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9349       if not self.x509_key_name:
9350         raise errors.OpPrereqError("Missing X509 key name for encryption",
9351                                    errors.ECODE_INVAL)
9352
9353       if not self.dest_x509_ca_pem:
9354         raise errors.OpPrereqError("Missing destination X509 CA",
9355                                    errors.ECODE_INVAL)
9356
9357   def ExpandNames(self):
9358     self._ExpandAndLockInstance()
9359
9360     # Lock all nodes for local exports
9361     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9362       # FIXME: lock only instance primary and destination node
9363       #
9364       # Sad but true, for now we have do lock all nodes, as we don't know where
9365       # the previous export might be, and in this LU we search for it and
9366       # remove it from its current node. In the future we could fix this by:
9367       #  - making a tasklet to search (share-lock all), then create the
9368       #    new one, then one to remove, after
9369       #  - removing the removal operation altogether
9370       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9371
9372   def DeclareLocks(self, level):
9373     """Last minute lock declaration."""
9374     # All nodes are locked anyway, so nothing to do here.
9375
9376   def BuildHooksEnv(self):
9377     """Build hooks env.
9378
9379     This will run on the master, primary node and target node.
9380
9381     """
9382     env = {
9383       "EXPORT_MODE": self.op.mode,
9384       "EXPORT_NODE": self.op.target_node,
9385       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9386       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9387       # TODO: Generic function for boolean env variables
9388       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9389       }
9390
9391     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9392
9393     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9394
9395     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9396       nl.append(self.op.target_node)
9397
9398     return env, nl, nl
9399
9400   def CheckPrereq(self):
9401     """Check prerequisites.
9402
9403     This checks that the instance and node names are valid.
9404
9405     """
9406     instance_name = self.op.instance_name
9407
9408     self.instance = self.cfg.GetInstanceInfo(instance_name)
9409     assert self.instance is not None, \
9410           "Cannot retrieve locked instance %s" % self.op.instance_name
9411     _CheckNodeOnline(self, self.instance.primary_node)
9412
9413     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9414       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9415       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9416       assert self.dst_node is not None
9417
9418       _CheckNodeOnline(self, self.dst_node.name)
9419       _CheckNodeNotDrained(self, self.dst_node.name)
9420
9421       self._cds = None
9422       self.dest_disk_info = None
9423       self.dest_x509_ca = None
9424
9425     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9426       self.dst_node = None
9427
9428       if len(self.op.target_node) != len(self.instance.disks):
9429         raise errors.OpPrereqError(("Received destination information for %s"
9430                                     " disks, but instance %s has %s disks") %
9431                                    (len(self.op.target_node), instance_name,
9432                                     len(self.instance.disks)),
9433                                    errors.ECODE_INVAL)
9434
9435       cds = _GetClusterDomainSecret()
9436
9437       # Check X509 key name
9438       try:
9439         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9440       except (TypeError, ValueError), err:
9441         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9442
9443       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9444         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9445                                    errors.ECODE_INVAL)
9446
9447       # Load and verify CA
9448       try:
9449         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9450       except OpenSSL.crypto.Error, err:
9451         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9452                                    (err, ), errors.ECODE_INVAL)
9453
9454       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9455       if errcode is not None:
9456         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9457                                    (msg, ), errors.ECODE_INVAL)
9458
9459       self.dest_x509_ca = cert
9460
9461       # Verify target information
9462       disk_info = []
9463       for idx, disk_data in enumerate(self.op.target_node):
9464         try:
9465           (host, port, magic) = \
9466             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9467         except errors.GenericError, err:
9468           raise errors.OpPrereqError("Target info for disk %s: %s" %
9469                                      (idx, err), errors.ECODE_INVAL)
9470
9471         disk_info.append((host, port, magic))
9472
9473       assert len(disk_info) == len(self.op.target_node)
9474       self.dest_disk_info = disk_info
9475
9476     else:
9477       raise errors.ProgrammerError("Unhandled export mode %r" %
9478                                    self.op.mode)
9479
9480     # instance disk type verification
9481     # TODO: Implement export support for file-based disks
9482     for disk in self.instance.disks:
9483       if disk.dev_type == constants.LD_FILE:
9484         raise errors.OpPrereqError("Export not supported for instances with"
9485                                    " file-based disks", errors.ECODE_INVAL)
9486
9487   def _CleanupExports(self, feedback_fn):
9488     """Removes exports of current instance from all other nodes.
9489
9490     If an instance in a cluster with nodes A..D was exported to node C, its
9491     exports will be removed from the nodes A, B and D.
9492
9493     """
9494     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9495
9496     nodelist = self.cfg.GetNodeList()
9497     nodelist.remove(self.dst_node.name)
9498
9499     # on one-node clusters nodelist will be empty after the removal
9500     # if we proceed the backup would be removed because OpQueryExports
9501     # substitutes an empty list with the full cluster node list.
9502     iname = self.instance.name
9503     if nodelist:
9504       feedback_fn("Removing old exports for instance %s" % iname)
9505       exportlist = self.rpc.call_export_list(nodelist)
9506       for node in exportlist:
9507         if exportlist[node].fail_msg:
9508           continue
9509         if iname in exportlist[node].payload:
9510           msg = self.rpc.call_export_remove(node, iname).fail_msg
9511           if msg:
9512             self.LogWarning("Could not remove older export for instance %s"
9513                             " on node %s: %s", iname, node, msg)
9514
9515   def Exec(self, feedback_fn):
9516     """Export an instance to an image in the cluster.
9517
9518     """
9519     assert self.op.mode in constants.EXPORT_MODES
9520
9521     instance = self.instance
9522     src_node = instance.primary_node
9523
9524     if self.op.shutdown:
9525       # shutdown the instance, but not the disks
9526       feedback_fn("Shutting down instance %s" % instance.name)
9527       result = self.rpc.call_instance_shutdown(src_node, instance,
9528                                                self.op.shutdown_timeout)
9529       # TODO: Maybe ignore failures if ignore_remove_failures is set
9530       result.Raise("Could not shutdown instance %s on"
9531                    " node %s" % (instance.name, src_node))
9532
9533     # set the disks ID correctly since call_instance_start needs the
9534     # correct drbd minor to create the symlinks
9535     for disk in instance.disks:
9536       self.cfg.SetDiskID(disk, src_node)
9537
9538     activate_disks = (not instance.admin_up)
9539
9540     if activate_disks:
9541       # Activate the instance disks if we'exporting a stopped instance
9542       feedback_fn("Activating disks for %s" % instance.name)
9543       _StartInstanceDisks(self, instance, None)
9544
9545     try:
9546       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9547                                                      instance)
9548
9549       helper.CreateSnapshots()
9550       try:
9551         if (self.op.shutdown and instance.admin_up and
9552             not self.op.remove_instance):
9553           assert not activate_disks
9554           feedback_fn("Starting instance %s" % instance.name)
9555           result = self.rpc.call_instance_start(src_node, instance, None, None)
9556           msg = result.fail_msg
9557           if msg:
9558             feedback_fn("Failed to start instance: %s" % msg)
9559             _ShutdownInstanceDisks(self, instance)
9560             raise errors.OpExecError("Could not start instance: %s" % msg)
9561
9562         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9563           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9564         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9565           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9566           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9567
9568           (key_name, _, _) = self.x509_key_name
9569
9570           dest_ca_pem = \
9571             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9572                                             self.dest_x509_ca)
9573
9574           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9575                                                      key_name, dest_ca_pem,
9576                                                      timeouts)
9577       finally:
9578         helper.Cleanup()
9579
9580       # Check for backwards compatibility
9581       assert len(dresults) == len(instance.disks)
9582       assert compat.all(isinstance(i, bool) for i in dresults), \
9583              "Not all results are boolean: %r" % dresults
9584
9585     finally:
9586       if activate_disks:
9587         feedback_fn("Deactivating disks for %s" % instance.name)
9588         _ShutdownInstanceDisks(self, instance)
9589
9590     if not (compat.all(dresults) and fin_resu):
9591       failures = []
9592       if not fin_resu:
9593         failures.append("export finalization")
9594       if not compat.all(dresults):
9595         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9596                                if not dsk)
9597         failures.append("disk export: disk(s) %s" % fdsk)
9598
9599       raise errors.OpExecError("Export failed, errors in %s" %
9600                                utils.CommaJoin(failures))
9601
9602     # At this point, the export was successful, we can cleanup/finish
9603
9604     # Remove instance if requested
9605     if self.op.remove_instance:
9606       feedback_fn("Removing instance %s" % instance.name)
9607       _RemoveInstance(self, feedback_fn, instance,
9608                       self.op.ignore_remove_failures)
9609
9610     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9611       self._CleanupExports(feedback_fn)
9612
9613     return fin_resu, dresults
9614
9615
9616 class LURemoveExport(NoHooksLU):
9617   """Remove exports related to the named instance.
9618
9619   """
9620   _OP_PARAMS = [
9621     _PInstanceName,
9622     ]
9623   REQ_BGL = False
9624
9625   def ExpandNames(self):
9626     self.needed_locks = {}
9627     # We need all nodes to be locked in order for RemoveExport to work, but we
9628     # don't need to lock the instance itself, as nothing will happen to it (and
9629     # we can remove exports also for a removed instance)
9630     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9631
9632   def Exec(self, feedback_fn):
9633     """Remove any export.
9634
9635     """
9636     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9637     # If the instance was not found we'll try with the name that was passed in.
9638     # This will only work if it was an FQDN, though.
9639     fqdn_warn = False
9640     if not instance_name:
9641       fqdn_warn = True
9642       instance_name = self.op.instance_name
9643
9644     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9645     exportlist = self.rpc.call_export_list(locked_nodes)
9646     found = False
9647     for node in exportlist:
9648       msg = exportlist[node].fail_msg
9649       if msg:
9650         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9651         continue
9652       if instance_name in exportlist[node].payload:
9653         found = True
9654         result = self.rpc.call_export_remove(node, instance_name)
9655         msg = result.fail_msg
9656         if msg:
9657           logging.error("Could not remove export for instance %s"
9658                         " on node %s: %s", instance_name, node, msg)
9659
9660     if fqdn_warn and not found:
9661       feedback_fn("Export not found. If trying to remove an export belonging"
9662                   " to a deleted instance please use its Fully Qualified"
9663                   " Domain Name.")
9664
9665
9666 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9667   """Generic tags LU.
9668
9669   This is an abstract class which is the parent of all the other tags LUs.
9670
9671   """
9672
9673   def ExpandNames(self):
9674     self.needed_locks = {}
9675     if self.op.kind == constants.TAG_NODE:
9676       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9677       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9678     elif self.op.kind == constants.TAG_INSTANCE:
9679       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9680       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9681
9682   def CheckPrereq(self):
9683     """Check prerequisites.
9684
9685     """
9686     if self.op.kind == constants.TAG_CLUSTER:
9687       self.target = self.cfg.GetClusterInfo()
9688     elif self.op.kind == constants.TAG_NODE:
9689       self.target = self.cfg.GetNodeInfo(self.op.name)
9690     elif self.op.kind == constants.TAG_INSTANCE:
9691       self.target = self.cfg.GetInstanceInfo(self.op.name)
9692     else:
9693       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9694                                  str(self.op.kind), errors.ECODE_INVAL)
9695
9696
9697 class LUGetTags(TagsLU):
9698   """Returns the tags of a given object.
9699
9700   """
9701   _OP_PARAMS = [
9702     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9703     ("name", _NoDefault, _TNonEmptyString),
9704     ]
9705   REQ_BGL = False
9706
9707   def Exec(self, feedback_fn):
9708     """Returns the tag list.
9709
9710     """
9711     return list(self.target.GetTags())
9712
9713
9714 class LUSearchTags(NoHooksLU):
9715   """Searches the tags for a given pattern.
9716
9717   """
9718   _OP_PARAMS = [
9719     ("pattern", _NoDefault, _TNonEmptyString),
9720     ]
9721   REQ_BGL = False
9722
9723   def ExpandNames(self):
9724     self.needed_locks = {}
9725
9726   def CheckPrereq(self):
9727     """Check prerequisites.
9728
9729     This checks the pattern passed for validity by compiling it.
9730
9731     """
9732     try:
9733       self.re = re.compile(self.op.pattern)
9734     except re.error, err:
9735       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9736                                  (self.op.pattern, err), errors.ECODE_INVAL)
9737
9738   def Exec(self, feedback_fn):
9739     """Returns the tag list.
9740
9741     """
9742     cfg = self.cfg
9743     tgts = [("/cluster", cfg.GetClusterInfo())]
9744     ilist = cfg.GetAllInstancesInfo().values()
9745     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9746     nlist = cfg.GetAllNodesInfo().values()
9747     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9748     results = []
9749     for path, target in tgts:
9750       for tag in target.GetTags():
9751         if self.re.search(tag):
9752           results.append((path, tag))
9753     return results
9754
9755
9756 class LUAddTags(TagsLU):
9757   """Sets a tag on a given object.
9758
9759   """
9760   _OP_PARAMS = [
9761     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9762     ("name", _NoDefault, _TNonEmptyString),
9763     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9764     ]
9765   REQ_BGL = False
9766
9767   def CheckPrereq(self):
9768     """Check prerequisites.
9769
9770     This checks the type and length of the tag name and value.
9771
9772     """
9773     TagsLU.CheckPrereq(self)
9774     for tag in self.op.tags:
9775       objects.TaggableObject.ValidateTag(tag)
9776
9777   def Exec(self, feedback_fn):
9778     """Sets the tag.
9779
9780     """
9781     try:
9782       for tag in self.op.tags:
9783         self.target.AddTag(tag)
9784     except errors.TagError, err:
9785       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9786     self.cfg.Update(self.target, feedback_fn)
9787
9788
9789 class LUDelTags(TagsLU):
9790   """Delete a list of tags from a given object.
9791
9792   """
9793   _OP_PARAMS = [
9794     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9795     ("name", _NoDefault, _TNonEmptyString),
9796     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9797     ]
9798   REQ_BGL = False
9799
9800   def CheckPrereq(self):
9801     """Check prerequisites.
9802
9803     This checks that we have the given tag.
9804
9805     """
9806     TagsLU.CheckPrereq(self)
9807     for tag in self.op.tags:
9808       objects.TaggableObject.ValidateTag(tag)
9809     del_tags = frozenset(self.op.tags)
9810     cur_tags = self.target.GetTags()
9811     if not del_tags <= cur_tags:
9812       diff_tags = del_tags - cur_tags
9813       diff_names = ["'%s'" % tag for tag in diff_tags]
9814       diff_names.sort()
9815       raise errors.OpPrereqError("Tag(s) %s not found" %
9816                                  (",".join(diff_names)), errors.ECODE_NOENT)
9817
9818   def Exec(self, feedback_fn):
9819     """Remove the tag from the object.
9820
9821     """
9822     for tag in self.op.tags:
9823       self.target.RemoveTag(tag)
9824     self.cfg.Update(self.target, feedback_fn)
9825
9826
9827 class LUTestDelay(NoHooksLU):
9828   """Sleep for a specified amount of time.
9829
9830   This LU sleeps on the master and/or nodes for a specified amount of
9831   time.
9832
9833   """
9834   _OP_PARAMS = [
9835     ("duration", _NoDefault, _TFloat),
9836     ("on_master", True, _TBool),
9837     ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9838     ("repeat", 0, _TPositiveInt)
9839     ]
9840   REQ_BGL = False
9841
9842   def ExpandNames(self):
9843     """Expand names and set required locks.
9844
9845     This expands the node list, if any.
9846
9847     """
9848     self.needed_locks = {}
9849     if self.op.on_nodes:
9850       # _GetWantedNodes can be used here, but is not always appropriate to use
9851       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9852       # more information.
9853       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9854       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9855
9856   def _TestDelay(self):
9857     """Do the actual sleep.
9858
9859     """
9860     if self.op.on_master:
9861       if not utils.TestDelay(self.op.duration):
9862         raise errors.OpExecError("Error during master delay test")
9863     if self.op.on_nodes:
9864       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9865       for node, node_result in result.items():
9866         node_result.Raise("Failure during rpc call to node %s" % node)
9867
9868   def Exec(self, feedback_fn):
9869     """Execute the test delay opcode, with the wanted repetitions.
9870
9871     """
9872     if self.op.repeat == 0:
9873       self._TestDelay()
9874     else:
9875       top_value = self.op.repeat - 1
9876       for i in range(self.op.repeat):
9877         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9878         self._TestDelay()
9879
9880
9881 class LUTestJobqueue(NoHooksLU):
9882   """Utility LU to test some aspects of the job queue.
9883
9884   """
9885   _OP_PARAMS = [
9886     ("notify_waitlock", False, _TBool),
9887     ("notify_exec", False, _TBool),
9888     ("log_messages", _EmptyList, _TListOf(_TString)),
9889     ("fail", False, _TBool),
9890     ]
9891   REQ_BGL = False
9892
9893   # Must be lower than default timeout for WaitForJobChange to see whether it
9894   # notices changed jobs
9895   _CLIENT_CONNECT_TIMEOUT = 20.0
9896   _CLIENT_CONFIRM_TIMEOUT = 60.0
9897
9898   @classmethod
9899   def _NotifyUsingSocket(cls, cb, errcls):
9900     """Opens a Unix socket and waits for another program to connect.
9901
9902     @type cb: callable
9903     @param cb: Callback to send socket name to client
9904     @type errcls: class
9905     @param errcls: Exception class to use for errors
9906
9907     """
9908     # Using a temporary directory as there's no easy way to create temporary
9909     # sockets without writing a custom loop around tempfile.mktemp and
9910     # socket.bind
9911     tmpdir = tempfile.mkdtemp()
9912     try:
9913       tmpsock = utils.PathJoin(tmpdir, "sock")
9914
9915       logging.debug("Creating temporary socket at %s", tmpsock)
9916       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
9917       try:
9918         sock.bind(tmpsock)
9919         sock.listen(1)
9920
9921         # Send details to client
9922         cb(tmpsock)
9923
9924         # Wait for client to connect before continuing
9925         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
9926         try:
9927           (conn, _) = sock.accept()
9928         except socket.error, err:
9929           raise errcls("Client didn't connect in time (%s)" % err)
9930       finally:
9931         sock.close()
9932     finally:
9933       # Remove as soon as client is connected
9934       shutil.rmtree(tmpdir)
9935
9936     # Wait for client to close
9937     try:
9938       try:
9939         # pylint: disable-msg=E1101
9940         # Instance of '_socketobject' has no ... member
9941         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
9942         conn.recv(1)
9943       except socket.error, err:
9944         raise errcls("Client failed to confirm notification (%s)" % err)
9945     finally:
9946       conn.close()
9947
9948   def _SendNotification(self, test, arg, sockname):
9949     """Sends a notification to the client.
9950
9951     @type test: string
9952     @param test: Test name
9953     @param arg: Test argument (depends on test)
9954     @type sockname: string
9955     @param sockname: Socket path
9956
9957     """
9958     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
9959
9960   def _Notify(self, prereq, test, arg):
9961     """Notifies the client of a test.
9962
9963     @type prereq: bool
9964     @param prereq: Whether this is a prereq-phase test
9965     @type test: string
9966     @param test: Test name
9967     @param arg: Test argument (depends on test)
9968
9969     """
9970     if prereq:
9971       errcls = errors.OpPrereqError
9972     else:
9973       errcls = errors.OpExecError
9974
9975     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
9976                                                   test, arg),
9977                                    errcls)
9978
9979   def CheckArguments(self):
9980     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
9981     self.expandnames_calls = 0
9982
9983   def ExpandNames(self):
9984     checkargs_calls = getattr(self, "checkargs_calls", 0)
9985     if checkargs_calls < 1:
9986       raise errors.ProgrammerError("CheckArguments was not called")
9987
9988     self.expandnames_calls += 1
9989
9990     if self.op.notify_waitlock:
9991       self._Notify(True, constants.JQT_EXPANDNAMES, None)
9992
9993     self.LogInfo("Expanding names")
9994
9995     # Get lock on master node (just to get a lock, not for a particular reason)
9996     self.needed_locks = {
9997       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
9998       }
9999
10000   def Exec(self, feedback_fn):
10001     if self.expandnames_calls < 1:
10002       raise errors.ProgrammerError("ExpandNames was not called")
10003
10004     if self.op.notify_exec:
10005       self._Notify(False, constants.JQT_EXEC, None)
10006
10007     self.LogInfo("Executing")
10008
10009     if self.op.log_messages:
10010       for idx, msg in enumerate(self.op.log_messages):
10011         self.LogInfo("Sending log message %s", idx + 1)
10012         feedback_fn(constants.JQT_MSGPREFIX + msg)
10013         # Report how many test messages have been sent
10014         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10015
10016     if self.op.fail:
10017       raise errors.OpExecError("Opcode failure was requested")
10018
10019     return True
10020
10021
10022 class IAllocator(object):
10023   """IAllocator framework.
10024
10025   An IAllocator instance has three sets of attributes:
10026     - cfg that is needed to query the cluster
10027     - input data (all members of the _KEYS class attribute are required)
10028     - four buffer attributes (in|out_data|text), that represent the
10029       input (to the external script) in text and data structure format,
10030       and the output from it, again in two formats
10031     - the result variables from the script (success, info, nodes) for
10032       easy usage
10033
10034   """
10035   # pylint: disable-msg=R0902
10036   # lots of instance attributes
10037   _ALLO_KEYS = [
10038     "name", "mem_size", "disks", "disk_template",
10039     "os", "tags", "nics", "vcpus", "hypervisor",
10040     ]
10041   _RELO_KEYS = [
10042     "name", "relocate_from",
10043     ]
10044   _EVAC_KEYS = [
10045     "evac_nodes",
10046     ]
10047
10048   def __init__(self, cfg, rpc, mode, **kwargs):
10049     self.cfg = cfg
10050     self.rpc = rpc
10051     # init buffer variables
10052     self.in_text = self.out_text = self.in_data = self.out_data = None
10053     # init all input fields so that pylint is happy
10054     self.mode = mode
10055     self.mem_size = self.disks = self.disk_template = None
10056     self.os = self.tags = self.nics = self.vcpus = None
10057     self.hypervisor = None
10058     self.relocate_from = None
10059     self.name = None
10060     self.evac_nodes = None
10061     # computed fields
10062     self.required_nodes = None
10063     # init result fields
10064     self.success = self.info = self.result = None
10065     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10066       keyset = self._ALLO_KEYS
10067       fn = self._AddNewInstance
10068     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10069       keyset = self._RELO_KEYS
10070       fn = self._AddRelocateInstance
10071     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10072       keyset = self._EVAC_KEYS
10073       fn = self._AddEvacuateNodes
10074     else:
10075       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10076                                    " IAllocator" % self.mode)
10077     for key in kwargs:
10078       if key not in keyset:
10079         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10080                                      " IAllocator" % key)
10081       setattr(self, key, kwargs[key])
10082
10083     for key in keyset:
10084       if key not in kwargs:
10085         raise errors.ProgrammerError("Missing input parameter '%s' to"
10086                                      " IAllocator" % key)
10087     self._BuildInputData(fn)
10088
10089   def _ComputeClusterData(self):
10090     """Compute the generic allocator input data.
10091
10092     This is the data that is independent of the actual operation.
10093
10094     """
10095     cfg = self.cfg
10096     cluster_info = cfg.GetClusterInfo()
10097     # cluster data
10098     data = {
10099       "version": constants.IALLOCATOR_VERSION,
10100       "cluster_name": cfg.GetClusterName(),
10101       "cluster_tags": list(cluster_info.GetTags()),
10102       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10103       # we don't have job IDs
10104       }
10105     iinfo = cfg.GetAllInstancesInfo().values()
10106     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10107
10108     # node data
10109     node_results = {}
10110     node_list = cfg.GetNodeList()
10111
10112     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10113       hypervisor_name = self.hypervisor
10114     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10115       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10116     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10117       hypervisor_name = cluster_info.enabled_hypervisors[0]
10118
10119     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10120                                         hypervisor_name)
10121     node_iinfo = \
10122       self.rpc.call_all_instances_info(node_list,
10123                                        cluster_info.enabled_hypervisors)
10124     for nname, nresult in node_data.items():
10125       # first fill in static (config-based) values
10126       ninfo = cfg.GetNodeInfo(nname)
10127       pnr = {
10128         "tags": list(ninfo.GetTags()),
10129         "primary_ip": ninfo.primary_ip,
10130         "secondary_ip": ninfo.secondary_ip,
10131         "offline": ninfo.offline,
10132         "drained": ninfo.drained,
10133         "master_candidate": ninfo.master_candidate,
10134         }
10135
10136       if not (ninfo.offline or ninfo.drained):
10137         nresult.Raise("Can't get data for node %s" % nname)
10138         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10139                                 nname)
10140         remote_info = nresult.payload
10141
10142         for attr in ['memory_total', 'memory_free', 'memory_dom0',
10143                      'vg_size', 'vg_free', 'cpu_total']:
10144           if attr not in remote_info:
10145             raise errors.OpExecError("Node '%s' didn't return attribute"
10146                                      " '%s'" % (nname, attr))
10147           if not isinstance(remote_info[attr], int):
10148             raise errors.OpExecError("Node '%s' returned invalid value"
10149                                      " for '%s': %s" %
10150                                      (nname, attr, remote_info[attr]))
10151         # compute memory used by primary instances
10152         i_p_mem = i_p_up_mem = 0
10153         for iinfo, beinfo in i_list:
10154           if iinfo.primary_node == nname:
10155             i_p_mem += beinfo[constants.BE_MEMORY]
10156             if iinfo.name not in node_iinfo[nname].payload:
10157               i_used_mem = 0
10158             else:
10159               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10160             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10161             remote_info['memory_free'] -= max(0, i_mem_diff)
10162
10163             if iinfo.admin_up:
10164               i_p_up_mem += beinfo[constants.BE_MEMORY]
10165
10166         # compute memory used by instances
10167         pnr_dyn = {
10168           "total_memory": remote_info['memory_total'],
10169           "reserved_memory": remote_info['memory_dom0'],
10170           "free_memory": remote_info['memory_free'],
10171           "total_disk": remote_info['vg_size'],
10172           "free_disk": remote_info['vg_free'],
10173           "total_cpus": remote_info['cpu_total'],
10174           "i_pri_memory": i_p_mem,
10175           "i_pri_up_memory": i_p_up_mem,
10176           }
10177         pnr.update(pnr_dyn)
10178
10179       node_results[nname] = pnr
10180     data["nodes"] = node_results
10181
10182     # instance data
10183     instance_data = {}
10184     for iinfo, beinfo in i_list:
10185       nic_data = []
10186       for nic in iinfo.nics:
10187         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10188         nic_dict = {"mac": nic.mac,
10189                     "ip": nic.ip,
10190                     "mode": filled_params[constants.NIC_MODE],
10191                     "link": filled_params[constants.NIC_LINK],
10192                    }
10193         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10194           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10195         nic_data.append(nic_dict)
10196       pir = {
10197         "tags": list(iinfo.GetTags()),
10198         "admin_up": iinfo.admin_up,
10199         "vcpus": beinfo[constants.BE_VCPUS],
10200         "memory": beinfo[constants.BE_MEMORY],
10201         "os": iinfo.os,
10202         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10203         "nics": nic_data,
10204         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10205         "disk_template": iinfo.disk_template,
10206         "hypervisor": iinfo.hypervisor,
10207         }
10208       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10209                                                  pir["disks"])
10210       instance_data[iinfo.name] = pir
10211
10212     data["instances"] = instance_data
10213
10214     self.in_data = data
10215
10216   def _AddNewInstance(self):
10217     """Add new instance data to allocator structure.
10218
10219     This in combination with _AllocatorGetClusterData will create the
10220     correct structure needed as input for the allocator.
10221
10222     The checks for the completeness of the opcode must have already been
10223     done.
10224
10225     """
10226     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10227
10228     if self.disk_template in constants.DTS_NET_MIRROR:
10229       self.required_nodes = 2
10230     else:
10231       self.required_nodes = 1
10232     request = {
10233       "name": self.name,
10234       "disk_template": self.disk_template,
10235       "tags": self.tags,
10236       "os": self.os,
10237       "vcpus": self.vcpus,
10238       "memory": self.mem_size,
10239       "disks": self.disks,
10240       "disk_space_total": disk_space,
10241       "nics": self.nics,
10242       "required_nodes": self.required_nodes,
10243       }
10244     return request
10245
10246   def _AddRelocateInstance(self):
10247     """Add relocate instance data to allocator structure.
10248
10249     This in combination with _IAllocatorGetClusterData will create the
10250     correct structure needed as input for the allocator.
10251
10252     The checks for the completeness of the opcode must have already been
10253     done.
10254
10255     """
10256     instance = self.cfg.GetInstanceInfo(self.name)
10257     if instance is None:
10258       raise errors.ProgrammerError("Unknown instance '%s' passed to"
10259                                    " IAllocator" % self.name)
10260
10261     if instance.disk_template not in constants.DTS_NET_MIRROR:
10262       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10263                                  errors.ECODE_INVAL)
10264
10265     if len(instance.secondary_nodes) != 1:
10266       raise errors.OpPrereqError("Instance has not exactly one secondary node",
10267                                  errors.ECODE_STATE)
10268
10269     self.required_nodes = 1
10270     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10271     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10272
10273     request = {
10274       "name": self.name,
10275       "disk_space_total": disk_space,
10276       "required_nodes": self.required_nodes,
10277       "relocate_from": self.relocate_from,
10278       }
10279     return request
10280
10281   def _AddEvacuateNodes(self):
10282     """Add evacuate nodes data to allocator structure.
10283
10284     """
10285     request = {
10286       "evac_nodes": self.evac_nodes
10287       }
10288     return request
10289
10290   def _BuildInputData(self, fn):
10291     """Build input data structures.
10292
10293     """
10294     self._ComputeClusterData()
10295
10296     request = fn()
10297     request["type"] = self.mode
10298     self.in_data["request"] = request
10299
10300     self.in_text = serializer.Dump(self.in_data)
10301
10302   def Run(self, name, validate=True, call_fn=None):
10303     """Run an instance allocator and return the results.
10304
10305     """
10306     if call_fn is None:
10307       call_fn = self.rpc.call_iallocator_runner
10308
10309     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10310     result.Raise("Failure while running the iallocator script")
10311
10312     self.out_text = result.payload
10313     if validate:
10314       self._ValidateResult()
10315
10316   def _ValidateResult(self):
10317     """Process the allocator results.
10318
10319     This will process and if successful save the result in
10320     self.out_data and the other parameters.
10321
10322     """
10323     try:
10324       rdict = serializer.Load(self.out_text)
10325     except Exception, err:
10326       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10327
10328     if not isinstance(rdict, dict):
10329       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10330
10331     # TODO: remove backwards compatiblity in later versions
10332     if "nodes" in rdict and "result" not in rdict:
10333       rdict["result"] = rdict["nodes"]
10334       del rdict["nodes"]
10335
10336     for key in "success", "info", "result":
10337       if key not in rdict:
10338         raise errors.OpExecError("Can't parse iallocator results:"
10339                                  " missing key '%s'" % key)
10340       setattr(self, key, rdict[key])
10341
10342     if not isinstance(rdict["result"], list):
10343       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10344                                " is not a list")
10345     self.out_data = rdict
10346
10347
10348 class LUTestAllocator(NoHooksLU):
10349   """Run allocator tests.
10350
10351   This LU runs the allocator tests
10352
10353   """
10354   _OP_PARAMS = [
10355     ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10356     ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10357     ("name", _NoDefault, _TNonEmptyString),
10358     ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10359       _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10360                _TOr(_TNone, _TNonEmptyString))))),
10361     ("disks", _NoDefault, _TOr(_TNone, _TList)),
10362     ("hypervisor", None, _TMaybeString),
10363     ("allocator", None, _TMaybeString),
10364     ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10365     ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10366     ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10367     ("os", None, _TMaybeString),
10368     ("disk_template", None, _TMaybeString),
10369     ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10370     ]
10371
10372   def CheckPrereq(self):
10373     """Check prerequisites.
10374
10375     This checks the opcode parameters depending on the director and mode test.
10376
10377     """
10378     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10379       for attr in ["mem_size", "disks", "disk_template",
10380                    "os", "tags", "nics", "vcpus"]:
10381         if not hasattr(self.op, attr):
10382           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10383                                      attr, errors.ECODE_INVAL)
10384       iname = self.cfg.ExpandInstanceName(self.op.name)
10385       if iname is not None:
10386         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10387                                    iname, errors.ECODE_EXISTS)
10388       if not isinstance(self.op.nics, list):
10389         raise errors.OpPrereqError("Invalid parameter 'nics'",
10390                                    errors.ECODE_INVAL)
10391       if not isinstance(self.op.disks, list):
10392         raise errors.OpPrereqError("Invalid parameter 'disks'",
10393                                    errors.ECODE_INVAL)
10394       for row in self.op.disks:
10395         if (not isinstance(row, dict) or
10396             "size" not in row or
10397             not isinstance(row["size"], int) or
10398             "mode" not in row or
10399             row["mode"] not in ['r', 'w']):
10400           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10401                                      " parameter", errors.ECODE_INVAL)
10402       if self.op.hypervisor is None:
10403         self.op.hypervisor = self.cfg.GetHypervisorType()
10404     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10405       fname = _ExpandInstanceName(self.cfg, self.op.name)
10406       self.op.name = fname
10407       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10408     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10409       if not hasattr(self.op, "evac_nodes"):
10410         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10411                                    " opcode input", errors.ECODE_INVAL)
10412     else:
10413       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10414                                  self.op.mode, errors.ECODE_INVAL)
10415
10416     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10417       if self.op.allocator is None:
10418         raise errors.OpPrereqError("Missing allocator name",
10419                                    errors.ECODE_INVAL)
10420     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10421       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10422                                  self.op.direction, errors.ECODE_INVAL)
10423
10424   def Exec(self, feedback_fn):
10425     """Run the allocator test.
10426
10427     """
10428     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10429       ial = IAllocator(self.cfg, self.rpc,
10430                        mode=self.op.mode,
10431                        name=self.op.name,
10432                        mem_size=self.op.mem_size,
10433                        disks=self.op.disks,
10434                        disk_template=self.op.disk_template,
10435                        os=self.op.os,
10436                        tags=self.op.tags,
10437                        nics=self.op.nics,
10438                        vcpus=self.op.vcpus,
10439                        hypervisor=self.op.hypervisor,
10440                        )
10441     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10442       ial = IAllocator(self.cfg, self.rpc,
10443                        mode=self.op.mode,
10444                        name=self.op.name,
10445                        relocate_from=list(self.relocate_from),
10446                        )
10447     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10448       ial = IAllocator(self.cfg, self.rpc,
10449                        mode=self.op.mode,
10450                        evac_nodes=self.op.evac_nodes)
10451     else:
10452       raise errors.ProgrammerError("Uncatched mode %s in"
10453                                    " LUTestAllocator.Exec", self.op.mode)
10454
10455     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10456       result = ial.in_text
10457     else:
10458       ial.Run(self.op.allocator, validate=False)
10459       result = ial.out_text
10460     return result