code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import ht
  58 from ganeti import query
  59 from ganeti import qlang
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63 # Common opcode attributes
  64
  65 #: output fields for a query operation
  66 _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
  67
  68
  69 #: the shutdown timeout
  70 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
  71                      ht.TPositiveInt)
  72
  73 #: the force parameter
  74 _PForce = ("force", False, ht.TBool)
  75
  76 #: a required instance name (for single-instance LUs)
  77 _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
  78
  79 #: Whether to ignore offline nodes
  80 _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
  81
  82 #: a required node name (for single-node LUs)
  83 _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
  84
  85 #: a required node group name (for single-group LUs)
  86 _PGroupName = ("group_name", ht.NoDefault, ht.TNonEmptyString)
  87
  88 #: the migration type (live/non-live)
  89 _PMigrationMode = ("mode", None,
  90                    ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
  91
  92 #: the obsolete 'live' mode (boolean)
  93 _PMigrationLive = ("live", None, ht.TMaybeBool)
  94
  95
  96 # End types
  97 class LogicalUnit(object):
  98   """Logical Unit base class.
  99
 100   Subclasses must follow these rules:
 101     - implement ExpandNames
 102     - implement CheckPrereq (except when tasklets are used)
 103     - implement Exec (except when tasklets are used)
 104     - implement BuildHooksEnv
 105     - redefine HPATH and HTYPE
 106     - optionally redefine their run requirements:
 107         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 108
 109   Note that all commands require root permissions.
 110
 111   @ivar dry_run_result: the value (if any) that will be returned to the caller
 112       in dry-run mode (signalled by opcode dry_run parameter)
 113   @cvar _OP_PARAMS: a list of opcode attributes, the default values
 114       they should get if not already defined, and types they must match
 115
 116   """
 117   HPATH = None
 118   HTYPE = None
 119   _OP_PARAMS = []
 120   REQ_BGL = True
 121
 122   def __init__(self, processor, op, context, rpc):
 123     """Constructor for LogicalUnit.
 124
 125     This needs to be overridden in derived classes in order to check op
 126     validity.
 127
 128     """
 129     self.proc = processor
 130     self.op = op
 131     self.cfg = context.cfg
 132     self.context = context
 133     self.rpc = rpc
 134     # Dicts used to declare locking needs to mcpu
 135     self.needed_locks = None
 136     self.acquired_locks = {}
 137     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 138     self.add_locks = {}
 139     self.remove_locks = {}
 140     # Used to force good behavior when calling helper functions
 141     self.recalculate_locks = {}
 142     self.__ssh = None
 143     # logging
 144     self.Log = processor.Log # pylint: disable-msg=C0103
 145     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 146     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 147     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 148     # support for dry-run
 149     self.dry_run_result = None
 150     # support for generic debug attribute
 151     if (not hasattr(self.op, "debug_level") or
 152         not isinstance(self.op.debug_level, int)):
 153       self.op.debug_level = 0
 154
 155     # Tasklets
 156     self.tasklets = None
 157
 158     # The new kind-of-type-system
 159     op_id = self.op.OP_ID
 160     for attr_name, aval, test in self._OP_PARAMS:
 161       if not hasattr(op, attr_name):
 162         if aval == ht.NoDefault:
 163           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 164                                      (op_id, attr_name), errors.ECODE_INVAL)
 165         else:
 166           if callable(aval):
 167             dval = aval()
 168           else:
 169             dval = aval
 170           setattr(self.op, attr_name, dval)
 171       attr_val = getattr(op, attr_name)
 172       if test == ht.NoType:
 173         # no tests here
 174         continue
 175       if not callable(test):
 176         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 177                                      " given type is not a proper type (%s)" %
 178                                      (op_id, attr_name, test))
 179       if not test(attr_val):
 180         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 181                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 182         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 183                                    (op_id, attr_name), errors.ECODE_INVAL)
 184
 185     self.CheckArguments()
 186
 187   def __GetSSH(self):
 188     """Returns the SshRunner object
 189
 190     """
 191     if not self.__ssh:
 192       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 193     return self.__ssh
 194
 195   ssh = property(fget=__GetSSH)
 196
 197   def CheckArguments(self):
 198     """Check syntactic validity for the opcode arguments.
 199
 200     This method is for doing a simple syntactic check and ensure
 201     validity of opcode parameters, without any cluster-related
 202     checks. While the same can be accomplished in ExpandNames and/or
 203     CheckPrereq, doing these separate is better because:
 204
 205       - ExpandNames is left as as purely a lock-related function
 206       - CheckPrereq is run after we have acquired locks (and possible
 207         waited for them)
 208
 209     The function is allowed to change the self.op attribute so that
 210     later methods can no longer worry about missing parameters.
 211
 212     """
 213     pass
 214
 215   def ExpandNames(self):
 216     """Expand names for this LU.
 217
 218     This method is called before starting to execute the opcode, and it should
 219     update all the parameters of the opcode to their canonical form (e.g. a
 220     short node name must be fully expanded after this method has successfully
 221     completed). This way locking, hooks, logging, etc. can work correctly.
 222
 223     LUs which implement this method must also populate the self.needed_locks
 224     member, as a dict with lock levels as keys, and a list of needed lock names
 225     as values. Rules:
 226
 227       - use an empty dict if you don't need any lock
 228       - if you don't need any lock at a particular level omit that level
 229       - don't put anything for the BGL level
 230       - if you want all locks at a level use locking.ALL_SET as a value
 231
 232     If you need to share locks (rather than acquire them exclusively) at one
 233     level you can modify self.share_locks, setting a true value (usually 1) for
 234     that level. By default locks are not shared.
 235
 236     This function can also define a list of tasklets, which then will be
 237     executed in order instead of the usual LU-level CheckPrereq and Exec
 238     functions, if those are not defined by the LU.
 239
 240     Examples::
 241
 242       # Acquire all nodes and one instance
 243       self.needed_locks = {
 244         locking.LEVEL_NODE: locking.ALL_SET,
 245         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 246       }
 247       # Acquire just two nodes
 248       self.needed_locks = {
 249         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 250       }
 251       # Acquire no locks
 252       self.needed_locks = {} # No, you can't leave it to the default value None
 253
 254     """
 255     # The implementation of this method is mandatory only if the new LU is
 256     # concurrent, so that old LUs don't need to be changed all at the same
 257     # time.
 258     if self.REQ_BGL:
 259       self.needed_locks = {} # Exclusive LUs don't need locks.
 260     else:
 261       raise NotImplementedError
 262
 263   def DeclareLocks(self, level):
 264     """Declare LU locking needs for a level
 265
 266     While most LUs can just declare their locking needs at ExpandNames time,
 267     sometimes there's the need to calculate some locks after having acquired
 268     the ones before. This function is called just before acquiring locks at a
 269     particular level, but after acquiring the ones at lower levels, and permits
 270     such calculations. It can be used to modify self.needed_locks, and by
 271     default it does nothing.
 272
 273     This function is only called if you have something already set in
 274     self.needed_locks for the level.
 275
 276     @param level: Locking level which is going to be locked
 277     @type level: member of ganeti.locking.LEVELS
 278
 279     """
 280
 281   def CheckPrereq(self):
 282     """Check prerequisites for this LU.
 283
 284     This method should check that the prerequisites for the execution
 285     of this LU are fulfilled. It can do internode communication, but
 286     it should be idempotent - no cluster or system changes are
 287     allowed.
 288
 289     The method should raise errors.OpPrereqError in case something is
 290     not fulfilled. Its return value is ignored.
 291
 292     This method should also update all the parameters of the opcode to
 293     their canonical form if it hasn't been done by ExpandNames before.
 294
 295     """
 296     if self.tasklets is not None:
 297       for (idx, tl) in enumerate(self.tasklets):
 298         logging.debug("Checking prerequisites for tasklet %s/%s",
 299                       idx + 1, len(self.tasklets))
 300         tl.CheckPrereq()
 301     else:
 302       pass
 303
 304   def Exec(self, feedback_fn):
 305     """Execute the LU.
 306
 307     This method should implement the actual work. It should raise
 308     errors.OpExecError for failures that are somewhat dealt with in
 309     code, or expected.
 310
 311     """
 312     if self.tasklets is not None:
 313       for (idx, tl) in enumerate(self.tasklets):
 314         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 315         tl.Exec(feedback_fn)
 316     else:
 317       raise NotImplementedError
 318
 319   def BuildHooksEnv(self):
 320     """Build hooks environment for this LU.
 321
 322     This method should return a three-node tuple consisting of: a dict
 323     containing the environment that will be used for running the
 324     specific hook for this LU, a list of node names on which the hook
 325     should run before the execution, and a list of node names on which
 326     the hook should run after the execution.
 327
 328     The keys of the dict must not have 'GANETI_' prefixed as this will
 329     be handled in the hooks runner. Also note additional keys will be
 330     added by the hooks runner. If the LU doesn't define any
 331     environment, an empty dict (and not None) should be returned.
 332
 333     No nodes should be returned as an empty list (and not None).
 334
 335     Note that if the HPATH for a LU class is None, this function will
 336     not be called.
 337
 338     """
 339     raise NotImplementedError
 340
 341   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 342     """Notify the LU about the results of its hooks.
 343
 344     This method is called every time a hooks phase is executed, and notifies
 345     the Logical Unit about the hooks' result. The LU can then use it to alter
 346     its result based on the hooks.  By default the method does nothing and the
 347     previous result is passed back unchanged but any LU can define it if it
 348     wants to use the local cluster hook-scripts somehow.
 349
 350     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 351         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 352     @param hook_results: the results of the multi-node hooks rpc call
 353     @param feedback_fn: function used send feedback back to the caller
 354     @param lu_result: the previous Exec result this LU had, or None
 355         in the PRE phase
 356     @return: the new Exec result, based on the previous result
 357         and hook results
 358
 359     """
 360     # API must be kept, thus we ignore the unused argument and could
 361     # be a function warnings
 362     # pylint: disable-msg=W0613,R0201
 363     return lu_result
 364
 365   def _ExpandAndLockInstance(self):
 366     """Helper function to expand and lock an instance.
 367
 368     Many LUs that work on an instance take its name in self.op.instance_name
 369     and need to expand it and then declare the expanded name for locking. This
 370     function does it, and then updates self.op.instance_name to the expanded
 371     name. It also initializes needed_locks as a dict, if this hasn't been done
 372     before.
 373
 374     """
 375     if self.needed_locks is None:
 376       self.needed_locks = {}
 377     else:
 378       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 379         "_ExpandAndLockInstance called with instance-level locks set"
 380     self.op.instance_name = _ExpandInstanceName(self.cfg,
 381                                                 self.op.instance_name)
 382     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 383
 384   def _LockInstancesNodes(self, primary_only=False):
 385     """Helper function to declare instances' nodes for locking.
 386
 387     This function should be called after locking one or more instances to lock
 388     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 389     with all primary or secondary nodes for instances already locked and
 390     present in self.needed_locks[locking.LEVEL_INSTANCE].
 391
 392     It should be called from DeclareLocks, and for safety only works if
 393     self.recalculate_locks[locking.LEVEL_NODE] is set.
 394
 395     In the future it may grow parameters to just lock some instance's nodes, or
 396     to just lock primaries or secondary nodes, if needed.
 397
 398     If should be called in DeclareLocks in a way similar to::
 399
 400       if level == locking.LEVEL_NODE:
 401         self._LockInstancesNodes()
 402
 403     @type primary_only: boolean
 404     @param primary_only: only lock primary nodes of locked instances
 405
 406     """
 407     assert locking.LEVEL_NODE in self.recalculate_locks, \
 408       "_LockInstancesNodes helper function called with no nodes to recalculate"
 409
 410     # TODO: check if we're really been called with the instance locks held
 411
 412     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 413     # future we might want to have different behaviors depending on the value
 414     # of self.recalculate_locks[locking.LEVEL_NODE]
 415     wanted_nodes = []
 416     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 417       instance = self.context.cfg.GetInstanceInfo(instance_name)
 418       wanted_nodes.append(instance.primary_node)
 419       if not primary_only:
 420         wanted_nodes.extend(instance.secondary_nodes)
 421
 422     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 423       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 424     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 425       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 426
 427     del self.recalculate_locks[locking.LEVEL_NODE]
 428
 429
 430 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 431   """Simple LU which runs no hooks.
 432
 433   This LU is intended as a parent for other LogicalUnits which will
 434   run no hooks, in order to reduce duplicate code.
 435
 436   """
 437   HPATH = None
 438   HTYPE = None
 439
 440   def BuildHooksEnv(self):
 441     """Empty BuildHooksEnv for NoHooksLu.
 442
 443     This just raises an error.
 444
 445     """
 446     assert False, "BuildHooksEnv called for NoHooksLUs"
 447
 448
 449 class Tasklet:
 450   """Tasklet base class.
 451
 452   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 453   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 454   tasklets know nothing about locks.
 455
 456   Subclasses must follow these rules:
 457     - Implement CheckPrereq
 458     - Implement Exec
 459
 460   """
 461   def __init__(self, lu):
 462     self.lu = lu
 463
 464     # Shortcuts
 465     self.cfg = lu.cfg
 466     self.rpc = lu.rpc
 467
 468   def CheckPrereq(self):
 469     """Check prerequisites for this tasklets.
 470
 471     This method should check whether the prerequisites for the execution of
 472     this tasklet are fulfilled. It can do internode communication, but it
 473     should be idempotent - no cluster or system changes are allowed.
 474
 475     The method should raise errors.OpPrereqError in case something is not
 476     fulfilled. Its return value is ignored.
 477
 478     This method should also update all parameters to their canonical form if it
 479     hasn't been done before.
 480
 481     """
 482     pass
 483
 484   def Exec(self, feedback_fn):
 485     """Execute the tasklet.
 486
 487     This method should implement the actual work. It should raise
 488     errors.OpExecError for failures that are somewhat dealt with in code, or
 489     expected.
 490
 491     """
 492     raise NotImplementedError
 493
 494
 495 class _QueryBase:
 496   """Base for query utility classes.
 497
 498   """
 499   #: Attribute holding field definitions
 500   FIELDS = None
 501
 502   def __init__(self, names, fields, use_locking):
 503     """Initializes this class.
 504
 505     """
 506     self.names = names
 507     self.use_locking = use_locking
 508
 509     self.query = query.Query(self.FIELDS, fields)
 510     self.requested_data = self.query.RequestedData()
 511
 512     self.do_locking = None
 513     self.wanted = None
 514
 515   def _GetNames(self, lu, all_names, lock_level):
 516     """Helper function to determine names asked for in the query.
 517
 518     """
 519     if self.do_locking:
 520       names = lu.acquired_locks[lock_level]
 521     else:
 522       names = all_names
 523
 524     if self.wanted == locking.ALL_SET:
 525       assert not self.names
 526       # caller didn't specify names, so ordering is not important
 527       return utils.NiceSort(names)
 528
 529     # caller specified names and we must keep the same order
 530     assert self.names
 531     assert not self.do_locking or lu.acquired_locks[lock_level]
 532
 533     missing = set(self.wanted).difference(names)
 534     if missing:
 535       raise errors.OpExecError("Some items were removed before retrieving"
 536                                " their data: %s" % missing)
 537
 538     # Return expanded names
 539     return self.wanted
 540
 541   @classmethod
 542   def FieldsQuery(cls, fields):
 543     """Returns list of available fields.
 544
 545     @return: List of L{objects.QueryFieldDefinition}
 546
 547     """
 548     if fields is None:
 549       # Client requests all fields, sort by name
 550       fdefs = sorted(query.GetAllFields(cls.FIELDS.values()),
 551                      key=operator.attrgetter("name"))
 552     else:
 553       # Keep order as requested by client
 554       fdefs = query.Query(cls.FIELDS, fields).GetFields()
 555
 556     return objects.QueryFieldsResponse(fields=fdefs).ToDict()
 557
 558   def ExpandNames(self, lu):
 559     """Expand names for this query.
 560
 561     See L{LogicalUnit.ExpandNames}.
 562
 563     """
 564     raise NotImplementedError()
 565
 566   def DeclareLocks(self, lu, level):
 567     """Declare locks for this query.
 568
 569     See L{LogicalUnit.DeclareLocks}.
 570
 571     """
 572     raise NotImplementedError()
 573
 574   def _GetQueryData(self, lu):
 575     """Collects all data for this query.
 576
 577     @return: Query data object
 578
 579     """
 580     raise NotImplementedError()
 581
 582   def NewStyleQuery(self, lu):
 583     """Collect data and execute query.
 584
 585     """
 586     data = self._GetQueryData(lu)
 587
 588     return objects.QueryResponse(data=self.query.Query(data),
 589                                  fields=self.query.GetFields()).ToDict()
 590
 591   def OldStyleQuery(self, lu):
 592     """Collect data and execute query.
 593
 594     """
 595     return self.query.OldStyleQuery(self._GetQueryData(lu))
 596
 597
 598 def _GetWantedNodes(lu, nodes):
 599   """Returns list of checked and expanded node names.
 600
 601   @type lu: L{LogicalUnit}
 602   @param lu: the logical unit on whose behalf we execute
 603   @type nodes: list
 604   @param nodes: list of node names or None for all nodes
 605   @rtype: list
 606   @return: the list of nodes, sorted
 607   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 608
 609   """
 610   if nodes:
 611     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 612
 613   return utils.NiceSort(lu.cfg.GetNodeList())
 614
 615
 616 def _GetWantedInstances(lu, instances):
 617   """Returns list of checked and expanded instance names.
 618
 619   @type lu: L{LogicalUnit}
 620   @param lu: the logical unit on whose behalf we execute
 621   @type instances: list
 622   @param instances: list of instance names or None for all instances
 623   @rtype: list
 624   @return: the list of instances, sorted
 625   @raise errors.OpPrereqError: if the instances parameter is wrong type
 626   @raise errors.OpPrereqError: if any of the passed instances is not found
 627
 628   """
 629   if instances:
 630     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 631   else:
 632     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 633   return wanted
 634
 635
 636 def _GetUpdatedParams(old_params, update_dict,
 637                       use_default=True, use_none=False):
 638   """Return the new version of a parameter dictionary.
 639
 640   @type old_params: dict
 641   @param old_params: old parameters
 642   @type update_dict: dict
 643   @param update_dict: dict containing new parameter values, or
 644       constants.VALUE_DEFAULT to reset the parameter to its default
 645       value
 646   @param use_default: boolean
 647   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 648       values as 'to be deleted' values
 649   @param use_none: boolean
 650   @type use_none: whether to recognise C{None} values as 'to be
 651       deleted' values
 652   @rtype: dict
 653   @return: the new parameter dictionary
 654
 655   """
 656   params_copy = copy.deepcopy(old_params)
 657   for key, val in update_dict.iteritems():
 658     if ((use_default and val == constants.VALUE_DEFAULT) or
 659         (use_none and val is None)):
 660       try:
 661         del params_copy[key]
 662       except KeyError:
 663         pass
 664     else:
 665       params_copy[key] = val
 666   return params_copy
 667
 668
 669 def _CheckOutputFields(static, dynamic, selected):
 670   """Checks whether all selected fields are valid.
 671
 672   @type static: L{utils.FieldSet}
 673   @param static: static fields set
 674   @type dynamic: L{utils.FieldSet}
 675   @param dynamic: dynamic fields set
 676
 677   """
 678   f = utils.FieldSet()
 679   f.Extend(static)
 680   f.Extend(dynamic)
 681
 682   delta = f.NonMatching(selected)
 683   if delta:
 684     raise errors.OpPrereqError("Unknown output fields selected: %s"
 685                                % ",".join(delta), errors.ECODE_INVAL)
 686
 687
 688 def _CheckGlobalHvParams(params):
 689   """Validates that given hypervisor params are not global ones.
 690
 691   This will ensure that instances don't get customised versions of
 692   global params.
 693
 694   """
 695   used_globals = constants.HVC_GLOBALS.intersection(params)
 696   if used_globals:
 697     msg = ("The following hypervisor parameters are global and cannot"
 698            " be customized at instance level, please modify them at"
 699            " cluster level: %s" % utils.CommaJoin(used_globals))
 700     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 701
 702
 703 def _CheckNodeOnline(lu, node, msg=None):
 704   """Ensure that a given node is online.
 705
 706   @param lu: the LU on behalf of which we make the check
 707   @param node: the node to check
 708   @param msg: if passed, should be a message to replace the default one
 709   @raise errors.OpPrereqError: if the node is offline
 710
 711   """
 712   if msg is None:
 713     msg = "Can't use offline node"
 714   if lu.cfg.GetNodeInfo(node).offline:
 715     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 716
 717
 718 def _CheckNodeNotDrained(lu, node):
 719   """Ensure that a given node is not drained.
 720
 721   @param lu: the LU on behalf of which we make the check
 722   @param node: the node to check
 723   @raise errors.OpPrereqError: if the node is drained
 724
 725   """
 726   if lu.cfg.GetNodeInfo(node).drained:
 727     raise errors.OpPrereqError("Can't use drained node %s" % node,
 728                                errors.ECODE_STATE)
 729
 730
 731 def _CheckNodeVmCapable(lu, node):
 732   """Ensure that a given node is vm capable.
 733
 734   @param lu: the LU on behalf of which we make the check
 735   @param node: the node to check
 736   @raise errors.OpPrereqError: if the node is not vm capable
 737
 738   """
 739   if not lu.cfg.GetNodeInfo(node).vm_capable:
 740     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 741                                errors.ECODE_STATE)
 742
 743
 744 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 745   """Ensure that a node supports a given OS.
 746
 747   @param lu: the LU on behalf of which we make the check
 748   @param node: the node to check
 749   @param os_name: the OS to query about
 750   @param force_variant: whether to ignore variant errors
 751   @raise errors.OpPrereqError: if the node is not supporting the OS
 752
 753   """
 754   result = lu.rpc.call_os_get(node, os_name)
 755   result.Raise("OS '%s' not in supported OS list for node %s" %
 756                (os_name, node),
 757                prereq=True, ecode=errors.ECODE_INVAL)
 758   if not force_variant:
 759     _CheckOSVariant(result.payload, os_name)
 760
 761
 762 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 763   """Ensure that a node has the given secondary ip.
 764
 765   @type lu: L{LogicalUnit}
 766   @param lu: the LU on behalf of which we make the check
 767   @type node: string
 768   @param node: the node to check
 769   @type secondary_ip: string
 770   @param secondary_ip: the ip to check
 771   @type prereq: boolean
 772   @param prereq: whether to throw a prerequisite or an execute error
 773   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 774   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 775
 776   """
 777   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 778   result.Raise("Failure checking secondary ip on node %s" % node,
 779                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 780   if not result.payload:
 781     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 782            " please fix and re-run this command" % secondary_ip)
 783     if prereq:
 784       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 785     else:
 786       raise errors.OpExecError(msg)
 787
 788
 789 def _RequireFileStorage():
 790   """Checks that file storage is enabled.
 791
 792   @raise errors.OpPrereqError: when file storage is disabled
 793
 794   """
 795   if not constants.ENABLE_FILE_STORAGE:
 796     raise errors.OpPrereqError("File storage disabled at configure time",
 797                                errors.ECODE_INVAL)
 798
 799
 800 def _CheckDiskTemplate(template):
 801   """Ensure a given disk template is valid.
 802
 803   """
 804   if template not in constants.DISK_TEMPLATES:
 805     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 806            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 807     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 808   if template == constants.DT_FILE:
 809     _RequireFileStorage()
 810   return True
 811
 812
 813 def _CheckStorageType(storage_type):
 814   """Ensure a given storage type is valid.
 815
 816   """
 817   if storage_type not in constants.VALID_STORAGE_TYPES:
 818     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 819                                errors.ECODE_INVAL)
 820   if storage_type == constants.ST_FILE:
 821     _RequireFileStorage()
 822   return True
 823
 824
 825 def _GetClusterDomainSecret():
 826   """Reads the cluster domain secret.
 827
 828   """
 829   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 830                                strict=True)
 831
 832
 833 def _CheckInstanceDown(lu, instance, reason):
 834   """Ensure that an instance is not running."""
 835   if instance.admin_up:
 836     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 837                                (instance.name, reason), errors.ECODE_STATE)
 838
 839   pnode = instance.primary_node
 840   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 841   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 842               prereq=True, ecode=errors.ECODE_ENVIRON)
 843
 844   if instance.name in ins_l.payload:
 845     raise errors.OpPrereqError("Instance %s is running, %s" %
 846                                (instance.name, reason), errors.ECODE_STATE)
 847
 848
 849 def _ExpandItemName(fn, name, kind):
 850   """Expand an item name.
 851
 852   @param fn: the function to use for expansion
 853   @param name: requested item name
 854   @param kind: text description ('Node' or 'Instance')
 855   @return: the resolved (full) name
 856   @raise errors.OpPrereqError: if the item is not found
 857
 858   """
 859   full_name = fn(name)
 860   if full_name is None:
 861     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 862                                errors.ECODE_NOENT)
 863   return full_name
 864
 865
 866 def _ExpandNodeName(cfg, name):
 867   """Wrapper over L{_ExpandItemName} for nodes."""
 868   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 869
 870
 871 def _ExpandInstanceName(cfg, name):
 872   """Wrapper over L{_ExpandItemName} for instance."""
 873   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 874
 875
 876 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 877                           memory, vcpus, nics, disk_template, disks,
 878                           bep, hvp, hypervisor_name):
 879   """Builds instance related env variables for hooks
 880
 881   This builds the hook environment from individual variables.
 882
 883   @type name: string
 884   @param name: the name of the instance
 885   @type primary_node: string
 886   @param primary_node: the name of the instance's primary node
 887   @type secondary_nodes: list
 888   @param secondary_nodes: list of secondary nodes as strings
 889   @type os_type: string
 890   @param os_type: the name of the instance's OS
 891   @type status: boolean
 892   @param status: the should_run status of the instance
 893   @type memory: string
 894   @param memory: the memory size of the instance
 895   @type vcpus: string
 896   @param vcpus: the count of VCPUs the instance has
 897   @type nics: list
 898   @param nics: list of tuples (ip, mac, mode, link) representing
 899       the NICs the instance has
 900   @type disk_template: string
 901   @param disk_template: the disk template of the instance
 902   @type disks: list
 903   @param disks: the list of (size, mode) pairs
 904   @type bep: dict
 905   @param bep: the backend parameters for the instance
 906   @type hvp: dict
 907   @param hvp: the hypervisor parameters for the instance
 908   @type hypervisor_name: string
 909   @param hypervisor_name: the hypervisor for the instance
 910   @rtype: dict
 911   @return: the hook environment for this instance
 912
 913   """
 914   if status:
 915     str_status = "up"
 916   else:
 917     str_status = "down"
 918   env = {
 919     "OP_TARGET": name,
 920     "INSTANCE_NAME": name,
 921     "INSTANCE_PRIMARY": primary_node,
 922     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 923     "INSTANCE_OS_TYPE": os_type,
 924     "INSTANCE_STATUS": str_status,
 925     "INSTANCE_MEMORY": memory,
 926     "INSTANCE_VCPUS": vcpus,
 927     "INSTANCE_DISK_TEMPLATE": disk_template,
 928     "INSTANCE_HYPERVISOR": hypervisor_name,
 929   }
 930
 931   if nics:
 932     nic_count = len(nics)
 933     for idx, (ip, mac, mode, link) in enumerate(nics):
 934       if ip is None:
 935         ip = ""
 936       env["INSTANCE_NIC%d_IP" % idx] = ip
 937       env["INSTANCE_NIC%d_MAC" % idx] = mac
 938       env["INSTANCE_NIC%d_MODE" % idx] = mode
 939       env["INSTANCE_NIC%d_LINK" % idx] = link
 940       if mode == constants.NIC_MODE_BRIDGED:
 941         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 942   else:
 943     nic_count = 0
 944
 945   env["INSTANCE_NIC_COUNT"] = nic_count
 946
 947   if disks:
 948     disk_count = len(disks)
 949     for idx, (size, mode) in enumerate(disks):
 950       env["INSTANCE_DISK%d_SIZE" % idx] = size
 951       env["INSTANCE_DISK%d_MODE" % idx] = mode
 952   else:
 953     disk_count = 0
 954
 955   env["INSTANCE_DISK_COUNT"] = disk_count
 956
 957   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 958     for key, value in source.items():
 959       env["INSTANCE_%s_%s" % (kind, key)] = value
 960
 961   return env
 962
 963
 964 def _NICListToTuple(lu, nics):
 965   """Build a list of nic information tuples.
 966
 967   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 968   value in LUQueryInstanceData.
 969
 970   @type lu:  L{LogicalUnit}
 971   @param lu: the logical unit on whose behalf we execute
 972   @type nics: list of L{objects.NIC}
 973   @param nics: list of nics to convert to hooks tuples
 974
 975   """
 976   hooks_nics = []
 977   cluster = lu.cfg.GetClusterInfo()
 978   for nic in nics:
 979     ip = nic.ip
 980     mac = nic.mac
 981     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 982     mode = filled_params[constants.NIC_MODE]
 983     link = filled_params[constants.NIC_LINK]
 984     hooks_nics.append((ip, mac, mode, link))
 985   return hooks_nics
 986
 987
 988 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 989   """Builds instance related env variables for hooks from an object.
 990
 991   @type lu: L{LogicalUnit}
 992   @param lu: the logical unit on whose behalf we execute
 993   @type instance: L{objects.Instance}
 994   @param instance: the instance for which we should build the
 995       environment
 996   @type override: dict
 997   @param override: dictionary with key/values that will override
 998       our values
 999   @rtype: dict
1000   @return: the hook environment dictionary
1001
1002   """
1003   cluster = lu.cfg.GetClusterInfo()
1004   bep = cluster.FillBE(instance)
1005   hvp = cluster.FillHV(instance)
1006   args = {
1007     'name': instance.name,
1008     'primary_node': instance.primary_node,
1009     'secondary_nodes': instance.secondary_nodes,
1010     'os_type': instance.os,
1011     'status': instance.admin_up,
1012     'memory': bep[constants.BE_MEMORY],
1013     'vcpus': bep[constants.BE_VCPUS],
1014     'nics': _NICListToTuple(lu, instance.nics),
1015     'disk_template': instance.disk_template,
1016     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1017     'bep': bep,
1018     'hvp': hvp,
1019     'hypervisor_name': instance.hypervisor,
1020   }
1021   if override:
1022     args.update(override)
1023   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1024
1025
1026 def _AdjustCandidatePool(lu, exceptions):
1027   """Adjust the candidate pool after node operations.
1028
1029   """
1030   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1031   if mod_list:
1032     lu.LogInfo("Promoted nodes to master candidate role: %s",
1033                utils.CommaJoin(node.name for node in mod_list))
1034     for name in mod_list:
1035       lu.context.ReaddNode(name)
1036   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1037   if mc_now > mc_max:
1038     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1039                (mc_now, mc_max))
1040
1041
1042 def _DecideSelfPromotion(lu, exceptions=None):
1043   """Decide whether I should promote myself as a master candidate.
1044
1045   """
1046   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1047   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1048   # the new node will increase mc_max with one, so:
1049   mc_should = min(mc_should + 1, cp_size)
1050   return mc_now < mc_should
1051
1052
1053 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1054   """Check that the brigdes needed by a list of nics exist.
1055
1056   """
1057   cluster = lu.cfg.GetClusterInfo()
1058   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1059   brlist = [params[constants.NIC_LINK] for params in paramslist
1060             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1061   if brlist:
1062     result = lu.rpc.call_bridges_exist(target_node, brlist)
1063     result.Raise("Error checking bridges on destination node '%s'" %
1064                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1065
1066
1067 def _CheckInstanceBridgesExist(lu, instance, node=None):
1068   """Check that the brigdes needed by an instance exist.
1069
1070   """
1071   if node is None:
1072     node = instance.primary_node
1073   _CheckNicsBridgesExist(lu, instance.nics, node)
1074
1075
1076 def _CheckOSVariant(os_obj, name):
1077   """Check whether an OS name conforms to the os variants specification.
1078
1079   @type os_obj: L{objects.OS}
1080   @param os_obj: OS object to check
1081   @type name: string
1082   @param name: OS name passed by the user, to check for validity
1083
1084   """
1085   if not os_obj.supported_variants:
1086     return
1087   variant = objects.OS.GetVariant(name)
1088   if not variant:
1089     raise errors.OpPrereqError("OS name must include a variant",
1090                                errors.ECODE_INVAL)
1091
1092   if variant not in os_obj.supported_variants:
1093     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1094
1095
1096 def _GetNodeInstancesInner(cfg, fn):
1097   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1098
1099
1100 def _GetNodeInstances(cfg, node_name):
1101   """Returns a list of all primary and secondary instances on a node.
1102
1103   """
1104
1105   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1106
1107
1108 def _GetNodePrimaryInstances(cfg, node_name):
1109   """Returns primary instances on a node.
1110
1111   """
1112   return _GetNodeInstancesInner(cfg,
1113                                 lambda inst: node_name == inst.primary_node)
1114
1115
1116 def _GetNodeSecondaryInstances(cfg, node_name):
1117   """Returns secondary instances on a node.
1118
1119   """
1120   return _GetNodeInstancesInner(cfg,
1121                                 lambda inst: node_name in inst.secondary_nodes)
1122
1123
1124 def _GetStorageTypeArgs(cfg, storage_type):
1125   """Returns the arguments for a storage type.
1126
1127   """
1128   # Special case for file storage
1129   if storage_type == constants.ST_FILE:
1130     # storage.FileStorage wants a list of storage directories
1131     return [[cfg.GetFileStorageDir()]]
1132
1133   return []
1134
1135
1136 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1137   faulty = []
1138
1139   for dev in instance.disks:
1140     cfg.SetDiskID(dev, node_name)
1141
1142   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1143   result.Raise("Failed to get disk status from node %s" % node_name,
1144                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1145
1146   for idx, bdev_status in enumerate(result.payload):
1147     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1148       faulty.append(idx)
1149
1150   return faulty
1151
1152
1153 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1154   """Check the sanity of iallocator and node arguments and use the
1155   cluster-wide iallocator if appropriate.
1156
1157   Check that at most one of (iallocator, node) is specified. If none is
1158   specified, then the LU's opcode's iallocator slot is filled with the
1159   cluster-wide default iallocator.
1160
1161   @type iallocator_slot: string
1162   @param iallocator_slot: the name of the opcode iallocator slot
1163   @type node_slot: string
1164   @param node_slot: the name of the opcode target node slot
1165
1166   """
1167   node = getattr(lu.op, node_slot, None)
1168   iallocator = getattr(lu.op, iallocator_slot, None)
1169
1170   if node is not None and iallocator is not None:
1171     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1172                                errors.ECODE_INVAL)
1173   elif node is None and iallocator is None:
1174     default_iallocator = lu.cfg.GetDefaultIAllocator()
1175     if default_iallocator:
1176       setattr(lu.op, iallocator_slot, default_iallocator)
1177     else:
1178       raise errors.OpPrereqError("No iallocator or node given and no"
1179                                  " cluster-wide default iallocator found."
1180                                  " Please specify either an iallocator or a"
1181                                  " node, or set a cluster-wide default"
1182                                  " iallocator.")
1183
1184
1185 class LUPostInitCluster(LogicalUnit):
1186   """Logical unit for running hooks after cluster initialization.
1187
1188   """
1189   HPATH = "cluster-init"
1190   HTYPE = constants.HTYPE_CLUSTER
1191
1192   def BuildHooksEnv(self):
1193     """Build hooks env.
1194
1195     """
1196     env = {"OP_TARGET": self.cfg.GetClusterName()}
1197     mn = self.cfg.GetMasterNode()
1198     return env, [], [mn]
1199
1200   def Exec(self, feedback_fn):
1201     """Nothing to do.
1202
1203     """
1204     return True
1205
1206
1207 class LUDestroyCluster(LogicalUnit):
1208   """Logical unit for destroying the cluster.
1209
1210   """
1211   HPATH = "cluster-destroy"
1212   HTYPE = constants.HTYPE_CLUSTER
1213
1214   def BuildHooksEnv(self):
1215     """Build hooks env.
1216
1217     """
1218     env = {"OP_TARGET": self.cfg.GetClusterName()}
1219     return env, [], []
1220
1221   def CheckPrereq(self):
1222     """Check prerequisites.
1223
1224     This checks whether the cluster is empty.
1225
1226     Any errors are signaled by raising errors.OpPrereqError.
1227
1228     """
1229     master = self.cfg.GetMasterNode()
1230
1231     nodelist = self.cfg.GetNodeList()
1232     if len(nodelist) != 1 or nodelist[0] != master:
1233       raise errors.OpPrereqError("There are still %d node(s) in"
1234                                  " this cluster." % (len(nodelist) - 1),
1235                                  errors.ECODE_INVAL)
1236     instancelist = self.cfg.GetInstanceList()
1237     if instancelist:
1238       raise errors.OpPrereqError("There are still %d instance(s) in"
1239                                  " this cluster." % len(instancelist),
1240                                  errors.ECODE_INVAL)
1241
1242   def Exec(self, feedback_fn):
1243     """Destroys the cluster.
1244
1245     """
1246     master = self.cfg.GetMasterNode()
1247
1248     # Run post hooks on master node before it's removed
1249     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1250     try:
1251       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1252     except:
1253       # pylint: disable-msg=W0702
1254       self.LogWarning("Errors occurred running hooks on %s" % master)
1255
1256     result = self.rpc.call_node_stop_master(master, False)
1257     result.Raise("Could not disable the master role")
1258
1259     return master
1260
1261
1262 def _VerifyCertificate(filename):
1263   """Verifies a certificate for LUVerifyCluster.
1264
1265   @type filename: string
1266   @param filename: Path to PEM file
1267
1268   """
1269   try:
1270     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1271                                            utils.ReadFile(filename))
1272   except Exception, err: # pylint: disable-msg=W0703
1273     return (LUVerifyCluster.ETYPE_ERROR,
1274             "Failed to load X509 certificate %s: %s" % (filename, err))
1275
1276   (errcode, msg) = \
1277     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1278                                 constants.SSL_CERT_EXPIRATION_ERROR)
1279
1280   if msg:
1281     fnamemsg = "While verifying %s: %s" % (filename, msg)
1282   else:
1283     fnamemsg = None
1284
1285   if errcode is None:
1286     return (None, fnamemsg)
1287   elif errcode == utils.CERT_WARNING:
1288     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1289   elif errcode == utils.CERT_ERROR:
1290     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1291
1292   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1293
1294
1295 class LUVerifyCluster(LogicalUnit):
1296   """Verifies the cluster status.
1297
1298   """
1299   HPATH = "cluster-verify"
1300   HTYPE = constants.HTYPE_CLUSTER
1301   _OP_PARAMS = [
1302     ("skip_checks", ht.EmptyList,
1303      ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1304     ("verbose", False, ht.TBool),
1305     ("error_codes", False, ht.TBool),
1306     ("debug_simulate_errors", False, ht.TBool),
1307     ]
1308   REQ_BGL = False
1309
1310   TCLUSTER = "cluster"
1311   TNODE = "node"
1312   TINSTANCE = "instance"
1313
1314   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1315   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1316   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1317   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1318   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1319   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1320   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1321   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1322   ENODEDRBD = (TNODE, "ENODEDRBD")
1323   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1324   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1325   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1326   ENODEHV = (TNODE, "ENODEHV")
1327   ENODELVM = (TNODE, "ENODELVM")
1328   ENODEN1 = (TNODE, "ENODEN1")
1329   ENODENET = (TNODE, "ENODENET")
1330   ENODEOS = (TNODE, "ENODEOS")
1331   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1332   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1333   ENODERPC = (TNODE, "ENODERPC")
1334   ENODESSH = (TNODE, "ENODESSH")
1335   ENODEVERSION = (TNODE, "ENODEVERSION")
1336   ENODESETUP = (TNODE, "ENODESETUP")
1337   ENODETIME = (TNODE, "ENODETIME")
1338
1339   ETYPE_FIELD = "code"
1340   ETYPE_ERROR = "ERROR"
1341   ETYPE_WARNING = "WARNING"
1342
1343   _HOOKS_INDENT_RE = re.compile("^", re.M)
1344
1345   class NodeImage(object):
1346     """A class representing the logical and physical status of a node.
1347
1348     @type name: string
1349     @ivar name: the node name to which this object refers
1350     @ivar volumes: a structure as returned from
1351         L{ganeti.backend.GetVolumeList} (runtime)
1352     @ivar instances: a list of running instances (runtime)
1353     @ivar pinst: list of configured primary instances (config)
1354     @ivar sinst: list of configured secondary instances (config)
1355     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1356         of this node (config)
1357     @ivar mfree: free memory, as reported by hypervisor (runtime)
1358     @ivar dfree: free disk, as reported by the node (runtime)
1359     @ivar offline: the offline status (config)
1360     @type rpc_fail: boolean
1361     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1362         not whether the individual keys were correct) (runtime)
1363     @type lvm_fail: boolean
1364     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1365     @type hyp_fail: boolean
1366     @ivar hyp_fail: whether the RPC call didn't return the instance list
1367     @type ghost: boolean
1368     @ivar ghost: whether this is a known node or not (config)
1369     @type os_fail: boolean
1370     @ivar os_fail: whether the RPC call didn't return valid OS data
1371     @type oslist: list
1372     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1373     @type vm_capable: boolean
1374     @ivar vm_capable: whether the node can host instances
1375
1376     """
1377     def __init__(self, offline=False, name=None, vm_capable=True):
1378       self.name = name
1379       self.volumes = {}
1380       self.instances = []
1381       self.pinst = []
1382       self.sinst = []
1383       self.sbp = {}
1384       self.mfree = 0
1385       self.dfree = 0
1386       self.offline = offline
1387       self.vm_capable = vm_capable
1388       self.rpc_fail = False
1389       self.lvm_fail = False
1390       self.hyp_fail = False
1391       self.ghost = False
1392       self.os_fail = False
1393       self.oslist = {}
1394
1395   def ExpandNames(self):
1396     self.needed_locks = {
1397       locking.LEVEL_NODE: locking.ALL_SET,
1398       locking.LEVEL_INSTANCE: locking.ALL_SET,
1399     }
1400     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1401
1402   def _Error(self, ecode, item, msg, *args, **kwargs):
1403     """Format an error message.
1404
1405     Based on the opcode's error_codes parameter, either format a
1406     parseable error code, or a simpler error string.
1407
1408     This must be called only from Exec and functions called from Exec.
1409
1410     """
1411     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1412     itype, etxt = ecode
1413     # first complete the msg
1414     if args:
1415       msg = msg % args
1416     # then format the whole message
1417     if self.op.error_codes:
1418       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1419     else:
1420       if item:
1421         item = " " + item
1422       else:
1423         item = ""
1424       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1425     # and finally report it via the feedback_fn
1426     self._feedback_fn("  - %s" % msg)
1427
1428   def _ErrorIf(self, cond, *args, **kwargs):
1429     """Log an error message if the passed condition is True.
1430
1431     """
1432     cond = bool(cond) or self.op.debug_simulate_errors
1433     if cond:
1434       self._Error(*args, **kwargs)
1435     # do not mark the operation as failed for WARN cases only
1436     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1437       self.bad = self.bad or cond
1438
1439   def _VerifyNode(self, ninfo, nresult):
1440     """Perform some basic validation on data returned from a node.
1441
1442       - check the result data structure is well formed and has all the
1443         mandatory fields
1444       - check ganeti version
1445
1446     @type ninfo: L{objects.Node}
1447     @param ninfo: the node to check
1448     @param nresult: the results from the node
1449     @rtype: boolean
1450     @return: whether overall this call was successful (and we can expect
1451          reasonable values in the respose)
1452
1453     """
1454     node = ninfo.name
1455     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1456
1457     # main result, nresult should be a non-empty dict
1458     test = not nresult or not isinstance(nresult, dict)
1459     _ErrorIf(test, self.ENODERPC, node,
1460                   "unable to verify node: no data returned")
1461     if test:
1462       return False
1463
1464     # compares ganeti version
1465     local_version = constants.PROTOCOL_VERSION
1466     remote_version = nresult.get("version", None)
1467     test = not (remote_version and
1468                 isinstance(remote_version, (list, tuple)) and
1469                 len(remote_version) == 2)
1470     _ErrorIf(test, self.ENODERPC, node,
1471              "connection to node returned invalid data")
1472     if test:
1473       return False
1474
1475     test = local_version != remote_version[0]
1476     _ErrorIf(test, self.ENODEVERSION, node,
1477              "incompatible protocol versions: master %s,"
1478              " node %s", local_version, remote_version[0])
1479     if test:
1480       return False
1481
1482     # node seems compatible, we can actually try to look into its results
1483
1484     # full package version
1485     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1486                   self.ENODEVERSION, node,
1487                   "software version mismatch: master %s, node %s",
1488                   constants.RELEASE_VERSION, remote_version[1],
1489                   code=self.ETYPE_WARNING)
1490
1491     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1492     if ninfo.vm_capable and isinstance(hyp_result, dict):
1493       for hv_name, hv_result in hyp_result.iteritems():
1494         test = hv_result is not None
1495         _ErrorIf(test, self.ENODEHV, node,
1496                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1497
1498     test = nresult.get(constants.NV_NODESETUP,
1499                            ["Missing NODESETUP results"])
1500     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1501              "; ".join(test))
1502
1503     return True
1504
1505   def _VerifyNodeTime(self, ninfo, nresult,
1506                       nvinfo_starttime, nvinfo_endtime):
1507     """Check the node time.
1508
1509     @type ninfo: L{objects.Node}
1510     @param ninfo: the node to check
1511     @param nresult: the remote results for the node
1512     @param nvinfo_starttime: the start time of the RPC call
1513     @param nvinfo_endtime: the end time of the RPC call
1514
1515     """
1516     node = ninfo.name
1517     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1518
1519     ntime = nresult.get(constants.NV_TIME, None)
1520     try:
1521       ntime_merged = utils.MergeTime(ntime)
1522     except (ValueError, TypeError):
1523       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1524       return
1525
1526     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1527       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1528     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1529       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1530     else:
1531       ntime_diff = None
1532
1533     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1534              "Node time diverges by at least %s from master node time",
1535              ntime_diff)
1536
1537   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1538     """Check the node time.
1539
1540     @type ninfo: L{objects.Node}
1541     @param ninfo: the node to check
1542     @param nresult: the remote results for the node
1543     @param vg_name: the configured VG name
1544
1545     """
1546     if vg_name is None:
1547       return
1548
1549     node = ninfo.name
1550     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1551
1552     # checks vg existence and size > 20G
1553     vglist = nresult.get(constants.NV_VGLIST, None)
1554     test = not vglist
1555     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1556     if not test:
1557       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1558                                             constants.MIN_VG_SIZE)
1559       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1560
1561     # check pv names
1562     pvlist = nresult.get(constants.NV_PVLIST, None)
1563     test = pvlist is None
1564     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1565     if not test:
1566       # check that ':' is not present in PV names, since it's a
1567       # special character for lvcreate (denotes the range of PEs to
1568       # use on the PV)
1569       for _, pvname, owner_vg in pvlist:
1570         test = ":" in pvname
1571         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1572                  " '%s' of VG '%s'", pvname, owner_vg)
1573
1574   def _VerifyNodeNetwork(self, ninfo, nresult):
1575     """Check the node time.
1576
1577     @type ninfo: L{objects.Node}
1578     @param ninfo: the node to check
1579     @param nresult: the remote results for the node
1580
1581     """
1582     node = ninfo.name
1583     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1584
1585     test = constants.NV_NODELIST not in nresult
1586     _ErrorIf(test, self.ENODESSH, node,
1587              "node hasn't returned node ssh connectivity data")
1588     if not test:
1589       if nresult[constants.NV_NODELIST]:
1590         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1591           _ErrorIf(True, self.ENODESSH, node,
1592                    "ssh communication with node '%s': %s", a_node, a_msg)
1593
1594     test = constants.NV_NODENETTEST not in nresult
1595     _ErrorIf(test, self.ENODENET, node,
1596              "node hasn't returned node tcp connectivity data")
1597     if not test:
1598       if nresult[constants.NV_NODENETTEST]:
1599         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1600         for anode in nlist:
1601           _ErrorIf(True, self.ENODENET, node,
1602                    "tcp communication with node '%s': %s",
1603                    anode, nresult[constants.NV_NODENETTEST][anode])
1604
1605     test = constants.NV_MASTERIP not in nresult
1606     _ErrorIf(test, self.ENODENET, node,
1607              "node hasn't returned node master IP reachability data")
1608     if not test:
1609       if not nresult[constants.NV_MASTERIP]:
1610         if node == self.master_node:
1611           msg = "the master node cannot reach the master IP (not configured?)"
1612         else:
1613           msg = "cannot reach the master IP"
1614         _ErrorIf(True, self.ENODENET, node, msg)
1615
1616   def _VerifyInstance(self, instance, instanceconfig, node_image,
1617                       diskstatus):
1618     """Verify an instance.
1619
1620     This function checks to see if the required block devices are
1621     available on the instance's node.
1622
1623     """
1624     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1625     node_current = instanceconfig.primary_node
1626
1627     node_vol_should = {}
1628     instanceconfig.MapLVsByNode(node_vol_should)
1629
1630     for node in node_vol_should:
1631       n_img = node_image[node]
1632       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1633         # ignore missing volumes on offline or broken nodes
1634         continue
1635       for volume in node_vol_should[node]:
1636         test = volume not in n_img.volumes
1637         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1638                  "volume %s missing on node %s", volume, node)
1639
1640     if instanceconfig.admin_up:
1641       pri_img = node_image[node_current]
1642       test = instance not in pri_img.instances and not pri_img.offline
1643       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1644                "instance not running on its primary node %s",
1645                node_current)
1646
1647     for node, n_img in node_image.items():
1648       if (not node == node_current):
1649         test = instance in n_img.instances
1650         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1651                  "instance should not run on node %s", node)
1652
1653     diskdata = [(nname, success, status, idx)
1654                 for (nname, disks) in diskstatus.items()
1655                 for idx, (success, status) in enumerate(disks)]
1656
1657     for nname, success, bdev_status, idx in diskdata:
1658       _ErrorIf(instanceconfig.admin_up and not success,
1659                self.EINSTANCEFAULTYDISK, instance,
1660                "couldn't retrieve status for disk/%s on %s: %s",
1661                idx, nname, bdev_status)
1662       _ErrorIf((instanceconfig.admin_up and success and
1663                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1664                self.EINSTANCEFAULTYDISK, instance,
1665                "disk/%s on %s is faulty", idx, nname)
1666
1667   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1668     """Verify if there are any unknown volumes in the cluster.
1669
1670     The .os, .swap and backup volumes are ignored. All other volumes are
1671     reported as unknown.
1672
1673     @type reserved: L{ganeti.utils.FieldSet}
1674     @param reserved: a FieldSet of reserved volume names
1675
1676     """
1677     for node, n_img in node_image.items():
1678       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1679         # skip non-healthy nodes
1680         continue
1681       for volume in n_img.volumes:
1682         test = ((node not in node_vol_should or
1683                 volume not in node_vol_should[node]) and
1684                 not reserved.Matches(volume))
1685         self._ErrorIf(test, self.ENODEORPHANLV, node,
1686                       "volume %s is unknown", volume)
1687
1688   def _VerifyOrphanInstances(self, instancelist, node_image):
1689     """Verify the list of running instances.
1690
1691     This checks what instances are running but unknown to the cluster.
1692
1693     """
1694     for node, n_img in node_image.items():
1695       for o_inst in n_img.instances:
1696         test = o_inst not in instancelist
1697         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1698                       "instance %s on node %s should not exist", o_inst, node)
1699
1700   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1701     """Verify N+1 Memory Resilience.
1702
1703     Check that if one single node dies we can still start all the
1704     instances it was primary for.
1705
1706     """
1707     for node, n_img in node_image.items():
1708       # This code checks that every node which is now listed as
1709       # secondary has enough memory to host all instances it is
1710       # supposed to should a single other node in the cluster fail.
1711       # FIXME: not ready for failover to an arbitrary node
1712       # FIXME: does not support file-backed instances
1713       # WARNING: we currently take into account down instances as well
1714       # as up ones, considering that even if they're down someone
1715       # might want to start them even in the event of a node failure.
1716       for prinode, instances in n_img.sbp.items():
1717         needed_mem = 0
1718         for instance in instances:
1719           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1720           if bep[constants.BE_AUTO_BALANCE]:
1721             needed_mem += bep[constants.BE_MEMORY]
1722         test = n_img.mfree < needed_mem
1723         self._ErrorIf(test, self.ENODEN1, node,
1724                       "not enough memory on to accommodate"
1725                       " failovers should peer node %s fail", prinode)
1726
1727   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1728                        master_files):
1729     """Verifies and computes the node required file checksums.
1730
1731     @type ninfo: L{objects.Node}
1732     @param ninfo: the node to check
1733     @param nresult: the remote results for the node
1734     @param file_list: required list of files
1735     @param local_cksum: dictionary of local files and their checksums
1736     @param master_files: list of files that only masters should have
1737
1738     """
1739     node = ninfo.name
1740     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1741
1742     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1743     test = not isinstance(remote_cksum, dict)
1744     _ErrorIf(test, self.ENODEFILECHECK, node,
1745              "node hasn't returned file checksum data")
1746     if test:
1747       return
1748
1749     for file_name in file_list:
1750       node_is_mc = ninfo.master_candidate
1751       must_have = (file_name not in master_files) or node_is_mc
1752       # missing
1753       test1 = file_name not in remote_cksum
1754       # invalid checksum
1755       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1756       # existing and good
1757       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1758       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1759                "file '%s' missing", file_name)
1760       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1761                "file '%s' has wrong checksum", file_name)
1762       # not candidate and this is not a must-have file
1763       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1764                "file '%s' should not exist on non master"
1765                " candidates (and the file is outdated)", file_name)
1766       # all good, except non-master/non-must have combination
1767       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1768                "file '%s' should not exist"
1769                " on non master candidates", file_name)
1770
1771   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1772                       drbd_map):
1773     """Verifies and the node DRBD status.
1774
1775     @type ninfo: L{objects.Node}
1776     @param ninfo: the node to check
1777     @param nresult: the remote results for the node
1778     @param instanceinfo: the dict of instances
1779     @param drbd_helper: the configured DRBD usermode helper
1780     @param drbd_map: the DRBD map as returned by
1781         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1782
1783     """
1784     node = ninfo.name
1785     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1786
1787     if drbd_helper:
1788       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1789       test = (helper_result == None)
1790       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1791                "no drbd usermode helper returned")
1792       if helper_result:
1793         status, payload = helper_result
1794         test = not status
1795         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1796                  "drbd usermode helper check unsuccessful: %s", payload)
1797         test = status and (payload != drbd_helper)
1798         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1799                  "wrong drbd usermode helper: %s", payload)
1800
1801     # compute the DRBD minors
1802     node_drbd = {}
1803     for minor, instance in drbd_map[node].items():
1804       test = instance not in instanceinfo
1805       _ErrorIf(test, self.ECLUSTERCFG, None,
1806                "ghost instance '%s' in temporary DRBD map", instance)
1807         # ghost instance should not be running, but otherwise we
1808         # don't give double warnings (both ghost instance and
1809         # unallocated minor in use)
1810       if test:
1811         node_drbd[minor] = (instance, False)
1812       else:
1813         instance = instanceinfo[instance]
1814         node_drbd[minor] = (instance.name, instance.admin_up)
1815
1816     # and now check them
1817     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1818     test = not isinstance(used_minors, (tuple, list))
1819     _ErrorIf(test, self.ENODEDRBD, node,
1820              "cannot parse drbd status file: %s", str(used_minors))
1821     if test:
1822       # we cannot check drbd status
1823       return
1824
1825     for minor, (iname, must_exist) in node_drbd.items():
1826       test = minor not in used_minors and must_exist
1827       _ErrorIf(test, self.ENODEDRBD, node,
1828                "drbd minor %d of instance %s is not active", minor, iname)
1829     for minor in used_minors:
1830       test = minor not in node_drbd
1831       _ErrorIf(test, self.ENODEDRBD, node,
1832                "unallocated drbd minor %d is in use", minor)
1833
1834   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1835     """Builds the node OS structures.
1836
1837     @type ninfo: L{objects.Node}
1838     @param ninfo: the node to check
1839     @param nresult: the remote results for the node
1840     @param nimg: the node image object
1841
1842     """
1843     node = ninfo.name
1844     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1845
1846     remote_os = nresult.get(constants.NV_OSLIST, None)
1847     test = (not isinstance(remote_os, list) or
1848             not compat.all(isinstance(v, list) and len(v) == 7
1849                            for v in remote_os))
1850
1851     _ErrorIf(test, self.ENODEOS, node,
1852              "node hasn't returned valid OS data")
1853
1854     nimg.os_fail = test
1855
1856     if test:
1857       return
1858
1859     os_dict = {}
1860
1861     for (name, os_path, status, diagnose,
1862          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1863
1864       if name not in os_dict:
1865         os_dict[name] = []
1866
1867       # parameters is a list of lists instead of list of tuples due to
1868       # JSON lacking a real tuple type, fix it:
1869       parameters = [tuple(v) for v in parameters]
1870       os_dict[name].append((os_path, status, diagnose,
1871                             set(variants), set(parameters), set(api_ver)))
1872
1873     nimg.oslist = os_dict
1874
1875   def _VerifyNodeOS(self, ninfo, nimg, base):
1876     """Verifies the node OS list.
1877
1878     @type ninfo: L{objects.Node}
1879     @param ninfo: the node to check
1880     @param nimg: the node image object
1881     @param base: the 'template' node we match against (e.g. from the master)
1882
1883     """
1884     node = ninfo.name
1885     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1886
1887     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1888
1889     for os_name, os_data in nimg.oslist.items():
1890       assert os_data, "Empty OS status for OS %s?!" % os_name
1891       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1892       _ErrorIf(not f_status, self.ENODEOS, node,
1893                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1894       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1895                "OS '%s' has multiple entries (first one shadows the rest): %s",
1896                os_name, utils.CommaJoin([v[0] for v in os_data]))
1897       # this will catched in backend too
1898       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1899                and not f_var, self.ENODEOS, node,
1900                "OS %s with API at least %d does not declare any variant",
1901                os_name, constants.OS_API_V15)
1902       # comparisons with the 'base' image
1903       test = os_name not in base.oslist
1904       _ErrorIf(test, self.ENODEOS, node,
1905                "Extra OS %s not present on reference node (%s)",
1906                os_name, base.name)
1907       if test:
1908         continue
1909       assert base.oslist[os_name], "Base node has empty OS status?"
1910       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1911       if not b_status:
1912         # base OS is invalid, skipping
1913         continue
1914       for kind, a, b in [("API version", f_api, b_api),
1915                          ("variants list", f_var, b_var),
1916                          ("parameters", f_param, b_param)]:
1917         _ErrorIf(a != b, self.ENODEOS, node,
1918                  "OS %s %s differs from reference node %s: %s vs. %s",
1919                  kind, os_name, base.name,
1920                  utils.CommaJoin(a), utils.CommaJoin(b))
1921
1922     # check any missing OSes
1923     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1924     _ErrorIf(missing, self.ENODEOS, node,
1925              "OSes present on reference node %s but missing on this node: %s",
1926              base.name, utils.CommaJoin(missing))
1927
1928   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1929     """Verifies and updates the node volume data.
1930
1931     This function will update a L{NodeImage}'s internal structures
1932     with data from the remote call.
1933
1934     @type ninfo: L{objects.Node}
1935     @param ninfo: the node to check
1936     @param nresult: the remote results for the node
1937     @param nimg: the node image object
1938     @param vg_name: the configured VG name
1939
1940     """
1941     node = ninfo.name
1942     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1943
1944     nimg.lvm_fail = True
1945     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1946     if vg_name is None:
1947       pass
1948     elif isinstance(lvdata, basestring):
1949       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1950                utils.SafeEncode(lvdata))
1951     elif not isinstance(lvdata, dict):
1952       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1953     else:
1954       nimg.volumes = lvdata
1955       nimg.lvm_fail = False
1956
1957   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1958     """Verifies and updates the node instance list.
1959
1960     If the listing was successful, then updates this node's instance
1961     list. Otherwise, it marks the RPC call as failed for the instance
1962     list key.
1963
1964     @type ninfo: L{objects.Node}
1965     @param ninfo: the node to check
1966     @param nresult: the remote results for the node
1967     @param nimg: the node image object
1968
1969     """
1970     idata = nresult.get(constants.NV_INSTANCELIST, None)
1971     test = not isinstance(idata, list)
1972     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1973                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1974     if test:
1975       nimg.hyp_fail = True
1976     else:
1977       nimg.instances = idata
1978
1979   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1980     """Verifies and computes a node information map
1981
1982     @type ninfo: L{objects.Node}
1983     @param ninfo: the node to check
1984     @param nresult: the remote results for the node
1985     @param nimg: the node image object
1986     @param vg_name: the configured VG name
1987
1988     """
1989     node = ninfo.name
1990     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1991
1992     # try to read free memory (from the hypervisor)
1993     hv_info = nresult.get(constants.NV_HVINFO, None)
1994     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1995     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1996     if not test:
1997       try:
1998         nimg.mfree = int(hv_info["memory_free"])
1999       except (ValueError, TypeError):
2000         _ErrorIf(True, self.ENODERPC, node,
2001                  "node returned invalid nodeinfo, check hypervisor")
2002
2003     # FIXME: devise a free space model for file based instances as well
2004     if vg_name is not None:
2005       test = (constants.NV_VGLIST not in nresult or
2006               vg_name not in nresult[constants.NV_VGLIST])
2007       _ErrorIf(test, self.ENODELVM, node,
2008                "node didn't return data for the volume group '%s'"
2009                " - it is either missing or broken", vg_name)
2010       if not test:
2011         try:
2012           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2013         except (ValueError, TypeError):
2014           _ErrorIf(True, self.ENODERPC, node,
2015                    "node returned invalid LVM info, check LVM status")
2016
2017   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2018     """Gets per-disk status information for all instances.
2019
2020     @type nodelist: list of strings
2021     @param nodelist: Node names
2022     @type node_image: dict of (name, L{objects.Node})
2023     @param node_image: Node objects
2024     @type instanceinfo: dict of (name, L{objects.Instance})
2025     @param instanceinfo: Instance objects
2026
2027     """
2028     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2029
2030     node_disks = {}
2031     node_disks_devonly = {}
2032
2033     for nname in nodelist:
2034       disks = [(inst, disk)
2035                for instlist in [node_image[nname].pinst,
2036                                 node_image[nname].sinst]
2037                for inst in instlist
2038                for disk in instanceinfo[inst].disks]
2039
2040       if not disks:
2041         # No need to collect data
2042         continue
2043
2044       node_disks[nname] = disks
2045
2046       # Creating copies as SetDiskID below will modify the objects and that can
2047       # lead to incorrect data returned from nodes
2048       devonly = [dev.Copy() for (_, dev) in disks]
2049
2050       for dev in devonly:
2051         self.cfg.SetDiskID(dev, nname)
2052
2053       node_disks_devonly[nname] = devonly
2054
2055     assert len(node_disks) == len(node_disks_devonly)
2056
2057     # Collect data from all nodes with disks
2058     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2059                                                           node_disks_devonly)
2060
2061     assert len(result) == len(node_disks)
2062
2063     instdisk = {}
2064
2065     for (nname, nres) in result.items():
2066       if nres.offline:
2067         # Ignore offline node
2068         continue
2069
2070       disks = node_disks[nname]
2071
2072       msg = nres.fail_msg
2073       _ErrorIf(msg, self.ENODERPC, nname,
2074                "while getting disk information: %s", nres.fail_msg)
2075       if msg:
2076         # No data from this node
2077         data = len(disks) * [None]
2078       else:
2079         data = nres.payload
2080
2081       for ((inst, _), status) in zip(disks, data):
2082         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2083
2084     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2085                       len(nnames) <= len(instanceinfo[inst].all_nodes)
2086                       for inst, nnames in instdisk.items()
2087                       for nname, statuses in nnames.items())
2088
2089     return instdisk
2090
2091   def BuildHooksEnv(self):
2092     """Build hooks env.
2093
2094     Cluster-Verify hooks just ran in the post phase and their failure makes
2095     the output be logged in the verify output and the verification to fail.
2096
2097     """
2098     all_nodes = self.cfg.GetNodeList()
2099     env = {
2100       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2101       }
2102     for node in self.cfg.GetAllNodesInfo().values():
2103       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2104
2105     return env, [], all_nodes
2106
2107   def Exec(self, feedback_fn):
2108     """Verify integrity of cluster, performing various test on nodes.
2109
2110     """
2111     self.bad = False
2112     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2113     verbose = self.op.verbose
2114     self._feedback_fn = feedback_fn
2115     feedback_fn("* Verifying global settings")
2116     for msg in self.cfg.VerifyConfig():
2117       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2118
2119     # Check the cluster certificates
2120     for cert_filename in constants.ALL_CERT_FILES:
2121       (errcode, msg) = _VerifyCertificate(cert_filename)
2122       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2123
2124     vg_name = self.cfg.GetVGName()
2125     drbd_helper = self.cfg.GetDRBDHelper()
2126     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2127     cluster = self.cfg.GetClusterInfo()
2128     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2129     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2130     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2131     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2132                         for iname in instancelist)
2133     i_non_redundant = [] # Non redundant instances
2134     i_non_a_balanced = [] # Non auto-balanced instances
2135     n_offline = 0 # Count of offline nodes
2136     n_drained = 0 # Count of nodes being drained
2137     node_vol_should = {}
2138
2139     # FIXME: verify OS list
2140     # do local checksums
2141     master_files = [constants.CLUSTER_CONF_FILE]
2142     master_node = self.master_node = self.cfg.GetMasterNode()
2143     master_ip = self.cfg.GetMasterIP()
2144
2145     file_names = ssconf.SimpleStore().GetFileList()
2146     file_names.extend(constants.ALL_CERT_FILES)
2147     file_names.extend(master_files)
2148     if cluster.modify_etc_hosts:
2149       file_names.append(constants.ETC_HOSTS)
2150
2151     local_checksums = utils.FingerprintFiles(file_names)
2152
2153     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2154     node_verify_param = {
2155       constants.NV_FILELIST: file_names,
2156       constants.NV_NODELIST: [node.name for node in nodeinfo
2157                               if not node.offline],
2158       constants.NV_HYPERVISOR: hypervisors,
2159       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2160                                   node.secondary_ip) for node in nodeinfo
2161                                  if not node.offline],
2162       constants.NV_INSTANCELIST: hypervisors,
2163       constants.NV_VERSION: None,
2164       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2165       constants.NV_NODESETUP: None,
2166       constants.NV_TIME: None,
2167       constants.NV_MASTERIP: (master_node, master_ip),
2168       constants.NV_OSLIST: None,
2169       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2170       }
2171
2172     if vg_name is not None:
2173       node_verify_param[constants.NV_VGLIST] = None
2174       node_verify_param[constants.NV_LVLIST] = vg_name
2175       node_verify_param[constants.NV_PVLIST] = [vg_name]
2176       node_verify_param[constants.NV_DRBDLIST] = None
2177
2178     if drbd_helper:
2179       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2180
2181     # Build our expected cluster state
2182     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2183                                                  name=node.name,
2184                                                  vm_capable=node.vm_capable))
2185                       for node in nodeinfo)
2186
2187     for instance in instancelist:
2188       inst_config = instanceinfo[instance]
2189
2190       for nname in inst_config.all_nodes:
2191         if nname not in node_image:
2192           # ghost node
2193           gnode = self.NodeImage(name=nname)
2194           gnode.ghost = True
2195           node_image[nname] = gnode
2196
2197       inst_config.MapLVsByNode(node_vol_should)
2198
2199       pnode = inst_config.primary_node
2200       node_image[pnode].pinst.append(instance)
2201
2202       for snode in inst_config.secondary_nodes:
2203         nimg = node_image[snode]
2204         nimg.sinst.append(instance)
2205         if pnode not in nimg.sbp:
2206           nimg.sbp[pnode] = []
2207         nimg.sbp[pnode].append(instance)
2208
2209     # At this point, we have the in-memory data structures complete,
2210     # except for the runtime information, which we'll gather next
2211
2212     # Due to the way our RPC system works, exact response times cannot be
2213     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2214     # time before and after executing the request, we can at least have a time
2215     # window.
2216     nvinfo_starttime = time.time()
2217     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2218                                            self.cfg.GetClusterName())
2219     nvinfo_endtime = time.time()
2220
2221     all_drbd_map = self.cfg.ComputeDRBDMap()
2222
2223     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2224     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2225
2226     feedback_fn("* Verifying node status")
2227
2228     refos_img = None
2229
2230     for node_i in nodeinfo:
2231       node = node_i.name
2232       nimg = node_image[node]
2233
2234       if node_i.offline:
2235         if verbose:
2236           feedback_fn("* Skipping offline node %s" % (node,))
2237         n_offline += 1
2238         continue
2239
2240       if node == master_node:
2241         ntype = "master"
2242       elif node_i.master_candidate:
2243         ntype = "master candidate"
2244       elif node_i.drained:
2245         ntype = "drained"
2246         n_drained += 1
2247       else:
2248         ntype = "regular"
2249       if verbose:
2250         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2251
2252       msg = all_nvinfo[node].fail_msg
2253       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2254       if msg:
2255         nimg.rpc_fail = True
2256         continue
2257
2258       nresult = all_nvinfo[node].payload
2259
2260       nimg.call_ok = self._VerifyNode(node_i, nresult)
2261       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2262       self._VerifyNodeNetwork(node_i, nresult)
2263       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2264                             master_files)
2265
2266       if nimg.vm_capable:
2267         self._VerifyNodeLVM(node_i, nresult, vg_name)
2268         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2269                              all_drbd_map)
2270
2271         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2272         self._UpdateNodeInstances(node_i, nresult, nimg)
2273         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2274         self._UpdateNodeOS(node_i, nresult, nimg)
2275         if not nimg.os_fail:
2276           if refos_img is None:
2277             refos_img = nimg
2278           self._VerifyNodeOS(node_i, nimg, refos_img)
2279
2280     feedback_fn("* Verifying instance status")
2281     for instance in instancelist:
2282       if verbose:
2283         feedback_fn("* Verifying instance %s" % instance)
2284       inst_config = instanceinfo[instance]
2285       self._VerifyInstance(instance, inst_config, node_image,
2286                            instdisk[instance])
2287       inst_nodes_offline = []
2288
2289       pnode = inst_config.primary_node
2290       pnode_img = node_image[pnode]
2291       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2292                self.ENODERPC, pnode, "instance %s, connection to"
2293                " primary node failed", instance)
2294
2295       if pnode_img.offline:
2296         inst_nodes_offline.append(pnode)
2297
2298       # If the instance is non-redundant we cannot survive losing its primary
2299       # node, so we are not N+1 compliant. On the other hand we have no disk
2300       # templates with more than one secondary so that situation is not well
2301       # supported either.
2302       # FIXME: does not support file-backed instances
2303       if not inst_config.secondary_nodes:
2304         i_non_redundant.append(instance)
2305       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2306                instance, "instance has multiple secondary nodes: %s",
2307                utils.CommaJoin(inst_config.secondary_nodes),
2308                code=self.ETYPE_WARNING)
2309
2310       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2311         i_non_a_balanced.append(instance)
2312
2313       for snode in inst_config.secondary_nodes:
2314         s_img = node_image[snode]
2315         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2316                  "instance %s, connection to secondary node failed", instance)
2317
2318         if s_img.offline:
2319           inst_nodes_offline.append(snode)
2320
2321       # warn that the instance lives on offline nodes
2322       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2323                "instance lives on offline node(s) %s",
2324                utils.CommaJoin(inst_nodes_offline))
2325       # ... or ghost/non-vm_capable nodes
2326       for node in inst_config.all_nodes:
2327         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2328                  "instance lives on ghost node %s", node)
2329         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2330                  instance, "instance lives on non-vm_capable node %s", node)
2331
2332     feedback_fn("* Verifying orphan volumes")
2333     reserved = utils.FieldSet(*cluster.reserved_lvs)
2334     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2335
2336     feedback_fn("* Verifying orphan instances")
2337     self._VerifyOrphanInstances(instancelist, node_image)
2338
2339     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2340       feedback_fn("* Verifying N+1 Memory redundancy")
2341       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2342
2343     feedback_fn("* Other Notes")
2344     if i_non_redundant:
2345       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2346                   % len(i_non_redundant))
2347
2348     if i_non_a_balanced:
2349       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2350                   % len(i_non_a_balanced))
2351
2352     if n_offline:
2353       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2354
2355     if n_drained:
2356       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2357
2358     return not self.bad
2359
2360   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2361     """Analyze the post-hooks' result
2362
2363     This method analyses the hook result, handles it, and sends some
2364     nicely-formatted feedback back to the user.
2365
2366     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2367         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2368     @param hooks_results: the results of the multi-node hooks rpc call
2369     @param feedback_fn: function used send feedback back to the caller
2370     @param lu_result: previous Exec result
2371     @return: the new Exec result, based on the previous result
2372         and hook results
2373
2374     """
2375     # We only really run POST phase hooks, and are only interested in
2376     # their results
2377     if phase == constants.HOOKS_PHASE_POST:
2378       # Used to change hooks' output to proper indentation
2379       feedback_fn("* Hooks Results")
2380       assert hooks_results, "invalid result from hooks"
2381
2382       for node_name in hooks_results:
2383         res = hooks_results[node_name]
2384         msg = res.fail_msg
2385         test = msg and not res.offline
2386         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2387                       "Communication failure in hooks execution: %s", msg)
2388         if res.offline or msg:
2389           # No need to investigate payload if node is offline or gave an error.
2390           # override manually lu_result here as _ErrorIf only
2391           # overrides self.bad
2392           lu_result = 1
2393           continue
2394         for script, hkr, output in res.payload:
2395           test = hkr == constants.HKR_FAIL
2396           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2397                         "Script %s failed, output:", script)
2398           if test:
2399             output = self._HOOKS_INDENT_RE.sub('      ', output)
2400             feedback_fn("%s" % output)
2401             lu_result = 0
2402
2403       return lu_result
2404
2405
2406 class LUVerifyDisks(NoHooksLU):
2407   """Verifies the cluster disks status.
2408
2409   """
2410   REQ_BGL = False
2411
2412   def ExpandNames(self):
2413     self.needed_locks = {
2414       locking.LEVEL_NODE: locking.ALL_SET,
2415       locking.LEVEL_INSTANCE: locking.ALL_SET,
2416     }
2417     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2418
2419   def Exec(self, feedback_fn):
2420     """Verify integrity of cluster disks.
2421
2422     @rtype: tuple of three items
2423     @return: a tuple of (dict of node-to-node_error, list of instances
2424         which need activate-disks, dict of instance: (node, volume) for
2425         missing volumes
2426
2427     """
2428     result = res_nodes, res_instances, res_missing = {}, [], {}
2429
2430     nodes = utils.NiceSort(self.cfg.GetNodeList())
2431     instances = [self.cfg.GetInstanceInfo(name)
2432                  for name in self.cfg.GetInstanceList()]
2433
2434     nv_dict = {}
2435     for inst in instances:
2436       inst_lvs = {}
2437       if (not inst.admin_up or
2438           inst.disk_template not in constants.DTS_NET_MIRROR):
2439         continue
2440       inst.MapLVsByNode(inst_lvs)
2441       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2442       for node, vol_list in inst_lvs.iteritems():
2443         for vol in vol_list:
2444           nv_dict[(node, vol)] = inst
2445
2446     if not nv_dict:
2447       return result
2448
2449     vg_names = self.rpc.call_vg_list(nodes)
2450     vg_names.Raise("Cannot get list of VGs")
2451
2452     for node in nodes:
2453       # node_volume
2454       node_res = self.rpc.call_lv_list([node],
2455                                        vg_names[node].payload.keys())[node]
2456       if node_res.offline:
2457         continue
2458       msg = node_res.fail_msg
2459       if msg:
2460         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2461         res_nodes[node] = msg
2462         continue
2463
2464       lvs = node_res.payload
2465       for lv_name, (_, _, lv_online) in lvs.items():
2466         inst = nv_dict.pop((node, lv_name), None)
2467         if (not lv_online and inst is not None
2468             and inst.name not in res_instances):
2469           res_instances.append(inst.name)
2470
2471     # any leftover items in nv_dict are missing LVs, let's arrange the
2472     # data better
2473     for key, inst in nv_dict.iteritems():
2474       if inst.name not in res_missing:
2475         res_missing[inst.name] = []
2476       res_missing[inst.name].append(key)
2477
2478     return result
2479
2480
2481 class LURepairDiskSizes(NoHooksLU):
2482   """Verifies the cluster disks sizes.
2483
2484   """
2485   _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2486   REQ_BGL = False
2487
2488   def ExpandNames(self):
2489     if self.op.instances:
2490       self.wanted_names = []
2491       for name in self.op.instances:
2492         full_name = _ExpandInstanceName(self.cfg, name)
2493         self.wanted_names.append(full_name)
2494       self.needed_locks = {
2495         locking.LEVEL_NODE: [],
2496         locking.LEVEL_INSTANCE: self.wanted_names,
2497         }
2498       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2499     else:
2500       self.wanted_names = None
2501       self.needed_locks = {
2502         locking.LEVEL_NODE: locking.ALL_SET,
2503         locking.LEVEL_INSTANCE: locking.ALL_SET,
2504         }
2505     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2506
2507   def DeclareLocks(self, level):
2508     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2509       self._LockInstancesNodes(primary_only=True)
2510
2511   def CheckPrereq(self):
2512     """Check prerequisites.
2513
2514     This only checks the optional instance list against the existing names.
2515
2516     """
2517     if self.wanted_names is None:
2518       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2519
2520     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2521                              in self.wanted_names]
2522
2523   def _EnsureChildSizes(self, disk):
2524     """Ensure children of the disk have the needed disk size.
2525
2526     This is valid mainly for DRBD8 and fixes an issue where the
2527     children have smaller disk size.
2528
2529     @param disk: an L{ganeti.objects.Disk} object
2530
2531     """
2532     if disk.dev_type == constants.LD_DRBD8:
2533       assert disk.children, "Empty children for DRBD8?"
2534       fchild = disk.children[0]
2535       mismatch = fchild.size < disk.size
2536       if mismatch:
2537         self.LogInfo("Child disk has size %d, parent %d, fixing",
2538                      fchild.size, disk.size)
2539         fchild.size = disk.size
2540
2541       # and we recurse on this child only, not on the metadev
2542       return self._EnsureChildSizes(fchild) or mismatch
2543     else:
2544       return False
2545
2546   def Exec(self, feedback_fn):
2547     """Verify the size of cluster disks.
2548
2549     """
2550     # TODO: check child disks too
2551     # TODO: check differences in size between primary/secondary nodes
2552     per_node_disks = {}
2553     for instance in self.wanted_instances:
2554       pnode = instance.primary_node
2555       if pnode not in per_node_disks:
2556         per_node_disks[pnode] = []
2557       for idx, disk in enumerate(instance.disks):
2558         per_node_disks[pnode].append((instance, idx, disk))
2559
2560     changed = []
2561     for node, dskl in per_node_disks.items():
2562       newl = [v[2].Copy() for v in dskl]
2563       for dsk in newl:
2564         self.cfg.SetDiskID(dsk, node)
2565       result = self.rpc.call_blockdev_getsizes(node, newl)
2566       if result.fail_msg:
2567         self.LogWarning("Failure in blockdev_getsizes call to node"
2568                         " %s, ignoring", node)
2569         continue
2570       if len(result.data) != len(dskl):
2571         self.LogWarning("Invalid result from node %s, ignoring node results",
2572                         node)
2573         continue
2574       for ((instance, idx, disk), size) in zip(dskl, result.data):
2575         if size is None:
2576           self.LogWarning("Disk %d of instance %s did not return size"
2577                           " information, ignoring", idx, instance.name)
2578           continue
2579         if not isinstance(size, (int, long)):
2580           self.LogWarning("Disk %d of instance %s did not return valid"
2581                           " size information, ignoring", idx, instance.name)
2582           continue
2583         size = size >> 20
2584         if size != disk.size:
2585           self.LogInfo("Disk %d of instance %s has mismatched size,"
2586                        " correcting: recorded %d, actual %d", idx,
2587                        instance.name, disk.size, size)
2588           disk.size = size
2589           self.cfg.Update(instance, feedback_fn)
2590           changed.append((instance.name, idx, size))
2591         if self._EnsureChildSizes(disk):
2592           self.cfg.Update(instance, feedback_fn)
2593           changed.append((instance.name, idx, disk.size))
2594     return changed
2595
2596
2597 class LURenameCluster(LogicalUnit):
2598   """Rename the cluster.
2599
2600   """
2601   HPATH = "cluster-rename"
2602   HTYPE = constants.HTYPE_CLUSTER
2603   _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2604
2605   def BuildHooksEnv(self):
2606     """Build hooks env.
2607
2608     """
2609     env = {
2610       "OP_TARGET": self.cfg.GetClusterName(),
2611       "NEW_NAME": self.op.name,
2612       }
2613     mn = self.cfg.GetMasterNode()
2614     all_nodes = self.cfg.GetNodeList()
2615     return env, [mn], all_nodes
2616
2617   def CheckPrereq(self):
2618     """Verify that the passed name is a valid one.
2619
2620     """
2621     hostname = netutils.GetHostname(name=self.op.name,
2622                                     family=self.cfg.GetPrimaryIPFamily())
2623
2624     new_name = hostname.name
2625     self.ip = new_ip = hostname.ip
2626     old_name = self.cfg.GetClusterName()
2627     old_ip = self.cfg.GetMasterIP()
2628     if new_name == old_name and new_ip == old_ip:
2629       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2630                                  " cluster has changed",
2631                                  errors.ECODE_INVAL)
2632     if new_ip != old_ip:
2633       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2634         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2635                                    " reachable on the network" %
2636                                    new_ip, errors.ECODE_NOTUNIQUE)
2637
2638     self.op.name = new_name
2639
2640   def Exec(self, feedback_fn):
2641     """Rename the cluster.
2642
2643     """
2644     clustername = self.op.name
2645     ip = self.ip
2646
2647     # shutdown the master IP
2648     master = self.cfg.GetMasterNode()
2649     result = self.rpc.call_node_stop_master(master, False)
2650     result.Raise("Could not disable the master role")
2651
2652     try:
2653       cluster = self.cfg.GetClusterInfo()
2654       cluster.cluster_name = clustername
2655       cluster.master_ip = ip
2656       self.cfg.Update(cluster, feedback_fn)
2657
2658       # update the known hosts file
2659       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2660       node_list = self.cfg.GetOnlineNodeList()
2661       try:
2662         node_list.remove(master)
2663       except ValueError:
2664         pass
2665       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2666     finally:
2667       result = self.rpc.call_node_start_master(master, False, False)
2668       msg = result.fail_msg
2669       if msg:
2670         self.LogWarning("Could not re-enable the master role on"
2671                         " the master, please restart manually: %s", msg)
2672
2673     return clustername
2674
2675
2676 class LUSetClusterParams(LogicalUnit):
2677   """Change the parameters of the cluster.
2678
2679   """
2680   HPATH = "cluster-modify"
2681   HTYPE = constants.HTYPE_CLUSTER
2682   _OP_PARAMS = [
2683     ("vg_name", None, ht.TMaybeString),
2684     ("enabled_hypervisors", None,
2685      ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2686             ht.TNone)),
2687     ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2688                               ht.TNone)),
2689     ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2690     ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2691                             ht.TNone)),
2692     ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2693                               ht.TNone)),
2694     ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2695     ("uid_pool", None, ht.NoType),
2696     ("add_uids", None, ht.NoType),
2697     ("remove_uids", None, ht.NoType),
2698     ("maintain_node_health", None, ht.TMaybeBool),
2699     ("prealloc_wipe_disks", None, ht.TMaybeBool),
2700     ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2701     ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
2702     ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2703     ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2704     ("master_netdev", None, ht.TOr(ht.TString, ht.TNone)),
2705     ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2706     ("hidden_os", None, ht.TOr(ht.TListOf(\
2707           ht.TAnd(ht.TList,
2708                 ht.TIsLength(2),
2709                 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2710           ht.TNone)),
2711     ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2712           ht.TAnd(ht.TList,
2713                 ht.TIsLength(2),
2714                 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2715           ht.TNone)),
2716     ]
2717   REQ_BGL = False
2718
2719   def CheckArguments(self):
2720     """Check parameters
2721
2722     """
2723     if self.op.uid_pool:
2724       uidpool.CheckUidPool(self.op.uid_pool)
2725
2726     if self.op.add_uids:
2727       uidpool.CheckUidPool(self.op.add_uids)
2728
2729     if self.op.remove_uids:
2730       uidpool.CheckUidPool(self.op.remove_uids)
2731
2732   def ExpandNames(self):
2733     # FIXME: in the future maybe other cluster params won't require checking on
2734     # all nodes to be modified.
2735     self.needed_locks = {
2736       locking.LEVEL_NODE: locking.ALL_SET,
2737     }
2738     self.share_locks[locking.LEVEL_NODE] = 1
2739
2740   def BuildHooksEnv(self):
2741     """Build hooks env.
2742
2743     """
2744     env = {
2745       "OP_TARGET": self.cfg.GetClusterName(),
2746       "NEW_VG_NAME": self.op.vg_name,
2747       }
2748     mn = self.cfg.GetMasterNode()
2749     return env, [mn], [mn]
2750
2751   def CheckPrereq(self):
2752     """Check prerequisites.
2753
2754     This checks whether the given params don't conflict and
2755     if the given volume group is valid.
2756
2757     """
2758     if self.op.vg_name is not None and not self.op.vg_name:
2759       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2760         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2761                                    " instances exist", errors.ECODE_INVAL)
2762
2763     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2764       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2765         raise errors.OpPrereqError("Cannot disable drbd helper while"
2766                                    " drbd-based instances exist",
2767                                    errors.ECODE_INVAL)
2768
2769     node_list = self.acquired_locks[locking.LEVEL_NODE]
2770
2771     # if vg_name not None, checks given volume group on all nodes
2772     if self.op.vg_name:
2773       vglist = self.rpc.call_vg_list(node_list)
2774       for node in node_list:
2775         msg = vglist[node].fail_msg
2776         if msg:
2777           # ignoring down node
2778           self.LogWarning("Error while gathering data on node %s"
2779                           " (ignoring node): %s", node, msg)
2780           continue
2781         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2782                                               self.op.vg_name,
2783                                               constants.MIN_VG_SIZE)
2784         if vgstatus:
2785           raise errors.OpPrereqError("Error on node '%s': %s" %
2786                                      (node, vgstatus), errors.ECODE_ENVIRON)
2787
2788     if self.op.drbd_helper:
2789       # checks given drbd helper on all nodes
2790       helpers = self.rpc.call_drbd_helper(node_list)
2791       for node in node_list:
2792         ninfo = self.cfg.GetNodeInfo(node)
2793         if ninfo.offline:
2794           self.LogInfo("Not checking drbd helper on offline node %s", node)
2795           continue
2796         msg = helpers[node].fail_msg
2797         if msg:
2798           raise errors.OpPrereqError("Error checking drbd helper on node"
2799                                      " '%s': %s" % (node, msg),
2800                                      errors.ECODE_ENVIRON)
2801         node_helper = helpers[node].payload
2802         if node_helper != self.op.drbd_helper:
2803           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2804                                      (node, node_helper), errors.ECODE_ENVIRON)
2805
2806     self.cluster = cluster = self.cfg.GetClusterInfo()
2807     # validate params changes
2808     if self.op.beparams:
2809       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2810       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2811
2812     if self.op.ndparams:
2813       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2814       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2815
2816     if self.op.nicparams:
2817       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2818       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2819       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2820       nic_errors = []
2821
2822       # check all instances for consistency
2823       for instance in self.cfg.GetAllInstancesInfo().values():
2824         for nic_idx, nic in enumerate(instance.nics):
2825           params_copy = copy.deepcopy(nic.nicparams)
2826           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2827
2828           # check parameter syntax
2829           try:
2830             objects.NIC.CheckParameterSyntax(params_filled)
2831           except errors.ConfigurationError, err:
2832             nic_errors.append("Instance %s, nic/%d: %s" %
2833                               (instance.name, nic_idx, err))
2834
2835           # if we're moving instances to routed, check that they have an ip
2836           target_mode = params_filled[constants.NIC_MODE]
2837           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2838             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2839                               (instance.name, nic_idx))
2840       if nic_errors:
2841         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2842                                    "\n".join(nic_errors))
2843
2844     # hypervisor list/parameters
2845     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2846     if self.op.hvparams:
2847       for hv_name, hv_dict in self.op.hvparams.items():
2848         if hv_name not in self.new_hvparams:
2849           self.new_hvparams[hv_name] = hv_dict
2850         else:
2851           self.new_hvparams[hv_name].update(hv_dict)
2852
2853     # os hypervisor parameters
2854     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2855     if self.op.os_hvp:
2856       for os_name, hvs in self.op.os_hvp.items():
2857         if os_name not in self.new_os_hvp:
2858           self.new_os_hvp[os_name] = hvs
2859         else:
2860           for hv_name, hv_dict in hvs.items():
2861             if hv_name not in self.new_os_hvp[os_name]:
2862               self.new_os_hvp[os_name][hv_name] = hv_dict
2863             else:
2864               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2865
2866     # os parameters
2867     self.new_osp = objects.FillDict(cluster.osparams, {})
2868     if self.op.osparams:
2869       for os_name, osp in self.op.osparams.items():
2870         if os_name not in self.new_osp:
2871           self.new_osp[os_name] = {}
2872
2873         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2874                                                   use_none=True)
2875
2876         if not self.new_osp[os_name]:
2877           # we removed all parameters
2878           del self.new_osp[os_name]
2879         else:
2880           # check the parameter validity (remote check)
2881           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2882                          os_name, self.new_osp[os_name])
2883
2884     # changes to the hypervisor list
2885     if self.op.enabled_hypervisors is not None:
2886       self.hv_list = self.op.enabled_hypervisors
2887       for hv in self.hv_list:
2888         # if the hypervisor doesn't already exist in the cluster
2889         # hvparams, we initialize it to empty, and then (in both
2890         # cases) we make sure to fill the defaults, as we might not
2891         # have a complete defaults list if the hypervisor wasn't
2892         # enabled before
2893         if hv not in new_hvp:
2894           new_hvp[hv] = {}
2895         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2896         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2897     else:
2898       self.hv_list = cluster.enabled_hypervisors
2899
2900     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2901       # either the enabled list has changed, or the parameters have, validate
2902       for hv_name, hv_params in self.new_hvparams.items():
2903         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2904             (self.op.enabled_hypervisors and
2905              hv_name in self.op.enabled_hypervisors)):
2906           # either this is a new hypervisor, or its parameters have changed
2907           hv_class = hypervisor.GetHypervisor(hv_name)
2908           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2909           hv_class.CheckParameterSyntax(hv_params)
2910           _CheckHVParams(self, node_list, hv_name, hv_params)
2911
2912     if self.op.os_hvp:
2913       # no need to check any newly-enabled hypervisors, since the
2914       # defaults have already been checked in the above code-block
2915       for os_name, os_hvp in self.new_os_hvp.items():
2916         for hv_name, hv_params in os_hvp.items():
2917           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2918           # we need to fill in the new os_hvp on top of the actual hv_p
2919           cluster_defaults = self.new_hvparams.get(hv_name, {})
2920           new_osp = objects.FillDict(cluster_defaults, hv_params)
2921           hv_class = hypervisor.GetHypervisor(hv_name)
2922           hv_class.CheckParameterSyntax(new_osp)
2923           _CheckHVParams(self, node_list, hv_name, new_osp)
2924
2925     if self.op.default_iallocator:
2926       alloc_script = utils.FindFile(self.op.default_iallocator,
2927                                     constants.IALLOCATOR_SEARCH_PATH,
2928                                     os.path.isfile)
2929       if alloc_script is None:
2930         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2931                                    " specified" % self.op.default_iallocator,
2932                                    errors.ECODE_INVAL)
2933
2934   def Exec(self, feedback_fn):
2935     """Change the parameters of the cluster.
2936
2937     """
2938     if self.op.vg_name is not None:
2939       new_volume = self.op.vg_name
2940       if not new_volume:
2941         new_volume = None
2942       if new_volume != self.cfg.GetVGName():
2943         self.cfg.SetVGName(new_volume)
2944       else:
2945         feedback_fn("Cluster LVM configuration already in desired"
2946                     " state, not changing")
2947     if self.op.drbd_helper is not None:
2948       new_helper = self.op.drbd_helper
2949       if not new_helper:
2950         new_helper = None
2951       if new_helper != self.cfg.GetDRBDHelper():
2952         self.cfg.SetDRBDHelper(new_helper)
2953       else:
2954         feedback_fn("Cluster DRBD helper already in desired state,"
2955                     " not changing")
2956     if self.op.hvparams:
2957       self.cluster.hvparams = self.new_hvparams
2958     if self.op.os_hvp:
2959       self.cluster.os_hvp = self.new_os_hvp
2960     if self.op.enabled_hypervisors is not None:
2961       self.cluster.hvparams = self.new_hvparams
2962       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2963     if self.op.beparams:
2964       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2965     if self.op.nicparams:
2966       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2967     if self.op.osparams:
2968       self.cluster.osparams = self.new_osp
2969     if self.op.ndparams:
2970       self.cluster.ndparams = self.new_ndparams
2971
2972     if self.op.candidate_pool_size is not None:
2973       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2974       # we need to update the pool size here, otherwise the save will fail
2975       _AdjustCandidatePool(self, [])
2976
2977     if self.op.maintain_node_health is not None:
2978       self.cluster.maintain_node_health = self.op.maintain_node_health
2979
2980     if self.op.prealloc_wipe_disks is not None:
2981       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2982
2983     if self.op.add_uids is not None:
2984       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2985
2986     if self.op.remove_uids is not None:
2987       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2988
2989     if self.op.uid_pool is not None:
2990       self.cluster.uid_pool = self.op.uid_pool
2991
2992     if self.op.default_iallocator is not None:
2993       self.cluster.default_iallocator = self.op.default_iallocator
2994
2995     if self.op.reserved_lvs is not None:
2996       self.cluster.reserved_lvs = self.op.reserved_lvs
2997
2998     def helper_os(aname, mods, desc):
2999       desc += " OS list"
3000       lst = getattr(self.cluster, aname)
3001       for key, val in mods:
3002         if key == constants.DDM_ADD:
3003           if val in lst:
3004             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3005           else:
3006             lst.append(val)
3007         elif key == constants.DDM_REMOVE:
3008           if val in lst:
3009             lst.remove(val)
3010           else:
3011             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3012         else:
3013           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3014
3015     if self.op.hidden_os:
3016       helper_os("hidden_os", self.op.hidden_os, "hidden")
3017
3018     if self.op.blacklisted_os:
3019       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3020
3021     if self.op.master_netdev:
3022       master = self.cfg.GetMasterNode()
3023       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3024                   self.cluster.master_netdev)
3025       result = self.rpc.call_node_stop_master(master, False)
3026       result.Raise("Could not disable the master ip")
3027       feedback_fn("Changing master_netdev from %s to %s" %
3028                   (self.cluster.master_netdev, self.op.master_netdev))
3029       self.cluster.master_netdev = self.op.master_netdev
3030
3031     self.cfg.Update(self.cluster, feedback_fn)
3032
3033     if self.op.master_netdev:
3034       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3035                   self.op.master_netdev)
3036       result = self.rpc.call_node_start_master(master, False, False)
3037       if result.fail_msg:
3038         self.LogWarning("Could not re-enable the master ip on"
3039                         " the master, please restart manually: %s",
3040                         result.fail_msg)
3041
3042
3043 def _UploadHelper(lu, nodes, fname):
3044   """Helper for uploading a file and showing warnings.
3045
3046   """
3047   if os.path.exists(fname):
3048     result = lu.rpc.call_upload_file(nodes, fname)
3049     for to_node, to_result in result.items():
3050       msg = to_result.fail_msg
3051       if msg:
3052         msg = ("Copy of file %s to node %s failed: %s" %
3053                (fname, to_node, msg))
3054         lu.proc.LogWarning(msg)
3055
3056
3057 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3058   """Distribute additional files which are part of the cluster configuration.
3059
3060   ConfigWriter takes care of distributing the config and ssconf files, but
3061   there are more files which should be distributed to all nodes. This function
3062   makes sure those are copied.
3063
3064   @param lu: calling logical unit
3065   @param additional_nodes: list of nodes not in the config to distribute to
3066   @type additional_vm: boolean
3067   @param additional_vm: whether the additional nodes are vm-capable or not
3068
3069   """
3070   # 1. Gather target nodes
3071   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3072   dist_nodes = lu.cfg.GetOnlineNodeList()
3073   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3074   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3075   if additional_nodes is not None:
3076     dist_nodes.extend(additional_nodes)
3077     if additional_vm:
3078       vm_nodes.extend(additional_nodes)
3079   if myself.name in dist_nodes:
3080     dist_nodes.remove(myself.name)
3081   if myself.name in vm_nodes:
3082     vm_nodes.remove(myself.name)
3083
3084   # 2. Gather files to distribute
3085   dist_files = set([constants.ETC_HOSTS,
3086                     constants.SSH_KNOWN_HOSTS_FILE,
3087                     constants.RAPI_CERT_FILE,
3088                     constants.RAPI_USERS_FILE,
3089                     constants.CONFD_HMAC_KEY,
3090                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3091                    ])
3092
3093   vm_files = set()
3094   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3095   for hv_name in enabled_hypervisors:
3096     hv_class = hypervisor.GetHypervisor(hv_name)
3097     vm_files.update(hv_class.GetAncillaryFiles())
3098
3099   # 3. Perform the files upload
3100   for fname in dist_files:
3101     _UploadHelper(lu, dist_nodes, fname)
3102   for fname in vm_files:
3103     _UploadHelper(lu, vm_nodes, fname)
3104
3105
3106 class LURedistributeConfig(NoHooksLU):
3107   """Force the redistribution of cluster configuration.
3108
3109   This is a very simple LU.
3110
3111   """
3112   REQ_BGL = False
3113
3114   def ExpandNames(self):
3115     self.needed_locks = {
3116       locking.LEVEL_NODE: locking.ALL_SET,
3117     }
3118     self.share_locks[locking.LEVEL_NODE] = 1
3119
3120   def Exec(self, feedback_fn):
3121     """Redistribute the configuration.
3122
3123     """
3124     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3125     _RedistributeAncillaryFiles(self)
3126
3127
3128 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3129   """Sleep and poll for an instance's disk to sync.
3130
3131   """
3132   if not instance.disks or disks is not None and not disks:
3133     return True
3134
3135   disks = _ExpandCheckDisks(instance, disks)
3136
3137   if not oneshot:
3138     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3139
3140   node = instance.primary_node
3141
3142   for dev in disks:
3143     lu.cfg.SetDiskID(dev, node)
3144
3145   # TODO: Convert to utils.Retry
3146
3147   retries = 0
3148   degr_retries = 10 # in seconds, as we sleep 1 second each time
3149   while True:
3150     max_time = 0
3151     done = True
3152     cumul_degraded = False
3153     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3154     msg = rstats.fail_msg
3155     if msg:
3156       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3157       retries += 1
3158       if retries >= 10:
3159         raise errors.RemoteError("Can't contact node %s for mirror data,"
3160                                  " aborting." % node)
3161       time.sleep(6)
3162       continue
3163     rstats = rstats.payload
3164     retries = 0
3165     for i, mstat in enumerate(rstats):
3166       if mstat is None:
3167         lu.LogWarning("Can't compute data for node %s/%s",
3168                            node, disks[i].iv_name)
3169         continue
3170
3171       cumul_degraded = (cumul_degraded or
3172                         (mstat.is_degraded and mstat.sync_percent is None))
3173       if mstat.sync_percent is not None:
3174         done = False
3175         if mstat.estimated_time is not None:
3176           rem_time = ("%s remaining (estimated)" %
3177                       utils.FormatSeconds(mstat.estimated_time))
3178           max_time = mstat.estimated_time
3179         else:
3180           rem_time = "no time estimate"
3181         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3182                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3183
3184     # if we're done but degraded, let's do a few small retries, to
3185     # make sure we see a stable and not transient situation; therefore
3186     # we force restart of the loop
3187     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3188       logging.info("Degraded disks found, %d retries left", degr_retries)
3189       degr_retries -= 1
3190       time.sleep(1)
3191       continue
3192
3193     if done or oneshot:
3194       break
3195
3196     time.sleep(min(60, max_time))
3197
3198   if done:
3199     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3200   return not cumul_degraded
3201
3202
3203 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3204   """Check that mirrors are not degraded.
3205
3206   The ldisk parameter, if True, will change the test from the
3207   is_degraded attribute (which represents overall non-ok status for
3208   the device(s)) to the ldisk (representing the local storage status).
3209
3210   """
3211   lu.cfg.SetDiskID(dev, node)
3212
3213   result = True
3214
3215   if on_primary or dev.AssembleOnSecondary():
3216     rstats = lu.rpc.call_blockdev_find(node, dev)
3217     msg = rstats.fail_msg
3218     if msg:
3219       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3220       result = False
3221     elif not rstats.payload:
3222       lu.LogWarning("Can't find disk on node %s", node)
3223       result = False
3224     else:
3225       if ldisk:
3226         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3227       else:
3228         result = result and not rstats.payload.is_degraded
3229
3230   if dev.children:
3231     for child in dev.children:
3232       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3233
3234   return result
3235
3236
3237 class LUOobCommand(NoHooksLU):
3238   """Logical unit for OOB handling.
3239
3240   """
3241   _OP_PARAMS = [
3242     _PNodeName,
3243     ("command", None, ht.TElemOf(constants.OOB_COMMANDS)),
3244     ("timeout", constants.OOB_TIMEOUT, ht.TInt),
3245     ]
3246   REG_BGL = False
3247
3248   def CheckPrereq(self):
3249     """Check prerequisites.
3250
3251     This checks:
3252      - the node exists in the configuration
3253      - OOB is supported
3254
3255     Any errors are signaled by raising errors.OpPrereqError.
3256
3257     """
3258     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3259     node = self.cfg.GetNodeInfo(self.op.node_name)
3260
3261     if node is None:
3262       raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3263
3264     self.oob_program = self.cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
3265
3266     if not self.oob_program:
3267       raise errors.OpPrereqError("OOB is not supported for node %s" %
3268                                  self.op.node_name)
3269
3270     self.node = node
3271
3272   def ExpandNames(self):
3273     """Gather locks we need.
3274
3275     """
3276     node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3277     self.needed_locks = {
3278       locking.LEVEL_NODE: [node_name],
3279       }
3280
3281   def Exec(self, feedback_fn):
3282     """Execute OOB and return result if we expect any.
3283
3284     """
3285     master_node = self.cfg.GetMasterNode()
3286     node = self.node
3287
3288     logging.info("Executing out-of-band command '%s' using '%s' on %s",
3289                  self.op.command, self.oob_program, self.op.node_name)
3290     result = self.rpc.call_run_oob(master_node, self.oob_program,
3291                                    self.op.command, self.op.node_name,
3292                                    self.op.timeout)
3293
3294     result.Raise("An error occurred on execution of OOB helper")
3295
3296     self._CheckPayload(result)
3297
3298     if self.op.command == constants.OOB_HEALTH:
3299       # For health we should log important events
3300       for item, status in result.payload:
3301         if status in [constants.OOB_STATUS_WARNING,
3302                       constants.OOB_STATUS_CRITICAL]:
3303           logging.warning("On node '%s' item '%s' has status '%s'",
3304                           self.op.node_name, item, status)
3305
3306     if self.op.command == constants.OOB_POWER_ON:
3307       node.powered = True
3308     elif self.op.command == constants.OOB_POWER_OFF:
3309       node.powered = False
3310     elif self.op.command == constants.OOB_POWER_STATUS:
3311       powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3312       if powered != self.node.powered:
3313         logging.warning(("Recorded power state (%s) of node '%s' does not match"
3314                          " actual power state (%s)"), node.powered,
3315                         self.op.node_name, powered)
3316
3317     self.cfg.Update(node, feedback_fn)
3318
3319     return result.payload
3320
3321   def _CheckPayload(self, result):
3322     """Checks if the payload is valid.
3323
3324     @param result: RPC result
3325     @raises errors.OpExecError: If payload is not valid
3326
3327     """
3328     errs = []
3329     if self.op.command == constants.OOB_HEALTH:
3330       if not isinstance(result.payload, list):
3331         errs.append("command 'health' is expected to return a list but got %s" %
3332                     type(result.payload))
3333       for item, status in result.payload:
3334         if status not in constants.OOB_STATUSES:
3335           errs.append("health item '%s' has invalid status '%s'" %
3336                       (item, status))
3337
3338     if self.op.command == constants.OOB_POWER_STATUS:
3339       if not isinstance(result.payload, dict):
3340         errs.append("power-status is expected to return a dict but got %s" %
3341                     type(result.payload))
3342
3343     if self.op.command in [
3344         constants.OOB_POWER_ON,
3345         constants.OOB_POWER_OFF,
3346         constants.OOB_POWER_CYCLE,
3347         ]:
3348       if result.payload is not None:
3349         errs.append("%s is expected to not return payload but got '%s'" %
3350                     (self.op.command, result.payload))
3351
3352     if errs:
3353       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3354                                utils.CommaJoin(errs))
3355
3356
3357
3358 class LUDiagnoseOS(NoHooksLU):
3359   """Logical unit for OS diagnose/query.
3360
3361   """
3362   _OP_PARAMS = [
3363     _POutputFields,
3364     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3365     ]
3366   REQ_BGL = False
3367   _HID = "hidden"
3368   _BLK = "blacklisted"
3369   _VLD = "valid"
3370   _FIELDS_STATIC = utils.FieldSet()
3371   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3372                                    "parameters", "api_versions", _HID, _BLK)
3373
3374   def CheckArguments(self):
3375     if self.op.names:
3376       raise errors.OpPrereqError("Selective OS query not supported",
3377                                  errors.ECODE_INVAL)
3378
3379     _CheckOutputFields(static=self._FIELDS_STATIC,
3380                        dynamic=self._FIELDS_DYNAMIC,
3381                        selected=self.op.output_fields)
3382
3383   def ExpandNames(self):
3384     # Lock all nodes, in shared mode
3385     # Temporary removal of locks, should be reverted later
3386     # TODO: reintroduce locks when they are lighter-weight
3387     self.needed_locks = {}
3388     #self.share_locks[locking.LEVEL_NODE] = 1
3389     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3390
3391   @staticmethod
3392   def _DiagnoseByOS(rlist):
3393     """Remaps a per-node return list into an a per-os per-node dictionary
3394
3395     @param rlist: a map with node names as keys and OS objects as values
3396
3397     @rtype: dict
3398     @return: a dictionary with osnames as keys and as value another
3399         map, with nodes as keys and tuples of (path, status, diagnose,
3400         variants, parameters, api_versions) as values, eg::
3401
3402           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3403                                      (/srv/..., False, "invalid api")],
3404                            "node2": [(/srv/..., True, "", [], [])]}
3405           }
3406
3407     """
3408     all_os = {}
3409     # we build here the list of nodes that didn't fail the RPC (at RPC
3410     # level), so that nodes with a non-responding node daemon don't
3411     # make all OSes invalid
3412     good_nodes = [node_name for node_name in rlist
3413                   if not rlist[node_name].fail_msg]
3414     for node_name, nr in rlist.items():
3415       if nr.fail_msg or not nr.payload:
3416         continue
3417       for (name, path, status, diagnose, variants,
3418            params, api_versions) in nr.payload:
3419         if name not in all_os:
3420           # build a list of nodes for this os containing empty lists
3421           # for each node in node_list
3422           all_os[name] = {}
3423           for nname in good_nodes:
3424             all_os[name][nname] = []
3425         # convert params from [name, help] to (name, help)
3426         params = [tuple(v) for v in params]
3427         all_os[name][node_name].append((path, status, diagnose,
3428                                         variants, params, api_versions))
3429     return all_os
3430
3431   def Exec(self, feedback_fn):
3432     """Compute the list of OSes.
3433
3434     """
3435     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3436     node_data = self.rpc.call_os_diagnose(valid_nodes)
3437     pol = self._DiagnoseByOS(node_data)
3438     output = []
3439     cluster = self.cfg.GetClusterInfo()
3440
3441     for os_name in utils.NiceSort(pol.keys()):
3442       os_data = pol[os_name]
3443       row = []
3444       valid = True
3445       (variants, params, api_versions) = null_state = (set(), set(), set())
3446       for idx, osl in enumerate(os_data.values()):
3447         valid = bool(valid and osl and osl[0][1])
3448         if not valid:
3449           (variants, params, api_versions) = null_state
3450           break
3451         node_variants, node_params, node_api = osl[0][3:6]
3452         if idx == 0: # first entry
3453           variants = set(node_variants)
3454           params = set(node_params)
3455           api_versions = set(node_api)
3456         else: # keep consistency
3457           variants.intersection_update(node_variants)
3458           params.intersection_update(node_params)
3459           api_versions.intersection_update(node_api)
3460
3461       is_hid = os_name in cluster.hidden_os
3462       is_blk = os_name in cluster.blacklisted_os
3463       if ((self._HID not in self.op.output_fields and is_hid) or
3464           (self._BLK not in self.op.output_fields and is_blk) or
3465           (self._VLD not in self.op.output_fields and not valid)):
3466         continue
3467
3468       for field in self.op.output_fields:
3469         if field == "name":
3470           val = os_name
3471         elif field == self._VLD:
3472           val = valid
3473         elif field == "node_status":
3474           # this is just a copy of the dict
3475           val = {}
3476           for node_name, nos_list in os_data.items():
3477             val[node_name] = nos_list
3478         elif field == "variants":
3479           val = utils.NiceSort(list(variants))
3480         elif field == "parameters":
3481           val = list(params)
3482         elif field == "api_versions":
3483           val = list(api_versions)
3484         elif field == self._HID:
3485           val = is_hid
3486         elif field == self._BLK:
3487           val = is_blk
3488         else:
3489           raise errors.ParameterError(field)
3490         row.append(val)
3491       output.append(row)
3492
3493     return output
3494
3495
3496 class LURemoveNode(LogicalUnit):
3497   """Logical unit for removing a node.
3498
3499   """
3500   HPATH = "node-remove"
3501   HTYPE = constants.HTYPE_NODE
3502   _OP_PARAMS = [
3503     _PNodeName,
3504     ]
3505
3506   def BuildHooksEnv(self):
3507     """Build hooks env.
3508
3509     This doesn't run on the target node in the pre phase as a failed
3510     node would then be impossible to remove.
3511
3512     """
3513     env = {
3514       "OP_TARGET": self.op.node_name,
3515       "NODE_NAME": self.op.node_name,
3516       }
3517     all_nodes = self.cfg.GetNodeList()
3518     try:
3519       all_nodes.remove(self.op.node_name)
3520     except ValueError:
3521       logging.warning("Node %s which is about to be removed not found"
3522                       " in the all nodes list", self.op.node_name)
3523     return env, all_nodes, all_nodes
3524
3525   def CheckPrereq(self):
3526     """Check prerequisites.
3527
3528     This checks:
3529      - the node exists in the configuration
3530      - it does not have primary or secondary instances
3531      - it's not the master
3532
3533     Any errors are signaled by raising errors.OpPrereqError.
3534
3535     """
3536     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3537     node = self.cfg.GetNodeInfo(self.op.node_name)
3538     assert node is not None
3539
3540     instance_list = self.cfg.GetInstanceList()
3541
3542     masternode = self.cfg.GetMasterNode()
3543     if node.name == masternode:
3544       raise errors.OpPrereqError("Node is the master node,"
3545                                  " you need to failover first.",
3546                                  errors.ECODE_INVAL)
3547
3548     for instance_name in instance_list:
3549       instance = self.cfg.GetInstanceInfo(instance_name)
3550       if node.name in instance.all_nodes:
3551         raise errors.OpPrereqError("Instance %s is still running on the node,"
3552                                    " please remove first." % instance_name,
3553                                    errors.ECODE_INVAL)
3554     self.op.node_name = node.name
3555     self.node = node
3556
3557   def Exec(self, feedback_fn):
3558     """Removes the node from the cluster.
3559
3560     """
3561     node = self.node
3562     logging.info("Stopping the node daemon and removing configs from node %s",
3563                  node.name)
3564
3565     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3566
3567     # Promote nodes to master candidate as needed
3568     _AdjustCandidatePool(self, exceptions=[node.name])
3569     self.context.RemoveNode(node.name)
3570
3571     # Run post hooks on the node before it's removed
3572     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3573     try:
3574       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3575     except:
3576       # pylint: disable-msg=W0702
3577       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3578
3579     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3580     msg = result.fail_msg
3581     if msg:
3582       self.LogWarning("Errors encountered on the remote node while leaving"
3583                       " the cluster: %s", msg)
3584
3585     # Remove node from our /etc/hosts
3586     if self.cfg.GetClusterInfo().modify_etc_hosts:
3587       master_node = self.cfg.GetMasterNode()
3588       result = self.rpc.call_etc_hosts_modify(master_node,
3589                                               constants.ETC_HOSTS_REMOVE,
3590                                               node.name, None)
3591       result.Raise("Can't update hosts file with new host data")
3592       _RedistributeAncillaryFiles(self)
3593
3594
3595 class _NodeQuery(_QueryBase):
3596   FIELDS = query.NODE_FIELDS
3597
3598   def ExpandNames(self, lu):
3599     lu.needed_locks = {}
3600     lu.share_locks[locking.LEVEL_NODE] = 1
3601
3602     if self.names:
3603       self.wanted = _GetWantedNodes(lu, self.names)
3604     else:
3605       self.wanted = locking.ALL_SET
3606
3607     self.do_locking = (self.use_locking and
3608                        query.NQ_LIVE in self.requested_data)
3609
3610     if self.do_locking:
3611       # if we don't request only static fields, we need to lock the nodes
3612       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3613
3614   def DeclareLocks(self, lu, level):
3615     pass
3616
3617   def _GetQueryData(self, lu):
3618     """Computes the list of nodes and their attributes.
3619
3620     """
3621     all_info = lu.cfg.GetAllNodesInfo()
3622
3623     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3624
3625     # Gather data as requested
3626     if query.NQ_LIVE in self.requested_data:
3627       node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3628                                         lu.cfg.GetHypervisorType())
3629       live_data = dict((name, nresult.payload)
3630                        for (name, nresult) in node_data.items()
3631                        if not nresult.fail_msg and nresult.payload)
3632     else:
3633       live_data = None
3634
3635     if query.NQ_INST in self.requested_data:
3636       node_to_primary = dict([(name, set()) for name in nodenames])
3637       node_to_secondary = dict([(name, set()) for name in nodenames])
3638
3639       inst_data = lu.cfg.GetAllInstancesInfo()
3640
3641       for inst in inst_data.values():
3642         if inst.primary_node in node_to_primary:
3643           node_to_primary[inst.primary_node].add(inst.name)
3644         for secnode in inst.secondary_nodes:
3645           if secnode in node_to_secondary:
3646             node_to_secondary[secnode].add(inst.name)
3647     else:
3648       node_to_primary = None
3649       node_to_secondary = None
3650
3651     if query.NQ_GROUP in self.requested_data:
3652       groups = lu.cfg.GetAllNodeGroupsInfo()
3653     else:
3654       groups = {}
3655
3656     return query.NodeQueryData([all_info[name] for name in nodenames],
3657                                live_data, lu.cfg.GetMasterNode(),
3658                                node_to_primary, node_to_secondary, groups)
3659
3660
3661 class LUQueryNodes(NoHooksLU):
3662   """Logical unit for querying nodes.
3663
3664   """
3665   # pylint: disable-msg=W0142
3666   _OP_PARAMS = [
3667     _POutputFields,
3668     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3669     ("use_locking", False, ht.TBool),
3670     ]
3671   REQ_BGL = False
3672
3673   def CheckArguments(self):
3674     self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3675                          self.op.use_locking)
3676
3677   def ExpandNames(self):
3678     self.nq.ExpandNames(self)
3679
3680   def Exec(self, feedback_fn):
3681     return self.nq.OldStyleQuery(self)
3682
3683
3684 class LUQueryNodeVolumes(NoHooksLU):
3685   """Logical unit for getting volumes on node(s).
3686
3687   """
3688   _OP_PARAMS = [
3689     _POutputFields,
3690     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3691     ]
3692   REQ_BGL = False
3693   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3694   _FIELDS_STATIC = utils.FieldSet("node")
3695
3696   def CheckArguments(self):
3697     _CheckOutputFields(static=self._FIELDS_STATIC,
3698                        dynamic=self._FIELDS_DYNAMIC,
3699                        selected=self.op.output_fields)
3700
3701   def ExpandNames(self):
3702     self.needed_locks = {}
3703     self.share_locks[locking.LEVEL_NODE] = 1
3704     if not self.op.nodes:
3705       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3706     else:
3707       self.needed_locks[locking.LEVEL_NODE] = \
3708         _GetWantedNodes(self, self.op.nodes)
3709
3710   def Exec(self, feedback_fn):
3711     """Computes the list of nodes and their attributes.
3712
3713     """
3714     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3715     volumes = self.rpc.call_node_volumes(nodenames)
3716
3717     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3718              in self.cfg.GetInstanceList()]
3719
3720     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3721
3722     output = []
3723     for node in nodenames:
3724       nresult = volumes[node]
3725       if nresult.offline:
3726         continue
3727       msg = nresult.fail_msg
3728       if msg:
3729         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3730         continue
3731
3732       node_vols = nresult.payload[:]
3733       node_vols.sort(key=lambda vol: vol['dev'])
3734
3735       for vol in node_vols:
3736         node_output = []
3737         for field in self.op.output_fields:
3738           if field == "node":
3739             val = node
3740           elif field == "phys":
3741             val = vol['dev']
3742           elif field == "vg":
3743             val = vol['vg']
3744           elif field == "name":
3745             val = vol['name']
3746           elif field == "size":
3747             val = int(float(vol['size']))
3748           elif field == "instance":
3749             for inst in ilist:
3750               if node not in lv_by_node[inst]:
3751                 continue
3752               if vol['name'] in lv_by_node[inst][node]:
3753                 val = inst.name
3754                 break
3755             else:
3756               val = '-'
3757           else:
3758             raise errors.ParameterError(field)
3759           node_output.append(str(val))
3760
3761         output.append(node_output)
3762
3763     return output
3764
3765
3766 class LUQueryNodeStorage(NoHooksLU):
3767   """Logical unit for getting information on storage units on node(s).
3768
3769   """
3770   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3771   _OP_PARAMS = [
3772     _POutputFields,
3773     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3774     ("storage_type", ht.NoDefault, _CheckStorageType),
3775     ("name", None, ht.TMaybeString),
3776     ]
3777   REQ_BGL = False
3778
3779   def CheckArguments(self):
3780     _CheckOutputFields(static=self._FIELDS_STATIC,
3781                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3782                        selected=self.op.output_fields)
3783
3784   def ExpandNames(self):
3785     self.needed_locks = {}
3786     self.share_locks[locking.LEVEL_NODE] = 1
3787
3788     if self.op.nodes:
3789       self.needed_locks[locking.LEVEL_NODE] = \
3790         _GetWantedNodes(self, self.op.nodes)
3791     else:
3792       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3793
3794   def Exec(self, feedback_fn):
3795     """Computes the list of nodes and their attributes.
3796
3797     """
3798     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3799
3800     # Always get name to sort by
3801     if constants.SF_NAME in self.op.output_fields:
3802       fields = self.op.output_fields[:]
3803     else:
3804       fields = [constants.SF_NAME] + self.op.output_fields
3805
3806     # Never ask for node or type as it's only known to the LU
3807     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3808       while extra in fields:
3809         fields.remove(extra)
3810
3811     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3812     name_idx = field_idx[constants.SF_NAME]
3813
3814     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3815     data = self.rpc.call_storage_list(self.nodes,
3816                                       self.op.storage_type, st_args,
3817                                       self.op.name, fields)
3818
3819     result = []
3820
3821     for node in utils.NiceSort(self.nodes):
3822       nresult = data[node]
3823       if nresult.offline:
3824         continue
3825
3826       msg = nresult.fail_msg
3827       if msg:
3828         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3829         continue
3830
3831       rows = dict([(row[name_idx], row) for row in nresult.payload])
3832
3833       for name in utils.NiceSort(rows.keys()):
3834         row = rows[name]
3835
3836         out = []
3837
3838         for field in self.op.output_fields:
3839           if field == constants.SF_NODE:
3840             val = node
3841           elif field == constants.SF_TYPE:
3842             val = self.op.storage_type
3843           elif field in field_idx:
3844             val = row[field_idx[field]]
3845           else:
3846             raise errors.ParameterError(field)
3847
3848           out.append(val)
3849
3850         result.append(out)
3851
3852     return result
3853
3854
3855 class _InstanceQuery(_QueryBase):
3856   FIELDS = query.INSTANCE_FIELDS
3857
3858   def ExpandNames(self, lu):
3859     lu.needed_locks = {}
3860     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3861     lu.share_locks[locking.LEVEL_NODE] = 1
3862
3863     if self.names:
3864       self.wanted = _GetWantedInstances(lu, self.names)
3865     else:
3866       self.wanted = locking.ALL_SET
3867
3868     self.do_locking = (self.use_locking and
3869                        query.IQ_LIVE in self.requested_data)
3870     if self.do_locking:
3871       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3872       lu.needed_locks[locking.LEVEL_NODE] = []
3873       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3874
3875   def DeclareLocks(self, lu, level):
3876     if level == locking.LEVEL_NODE and self.do_locking:
3877       lu._LockInstancesNodes() # pylint: disable-msg=W0212
3878
3879   def _GetQueryData(self, lu):
3880     """Computes the list of instances and their attributes.
3881
3882     """
3883     all_info = lu.cfg.GetAllInstancesInfo()
3884
3885     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3886
3887     instance_list = [all_info[name] for name in instance_names]
3888     nodes = frozenset([inst.primary_node for inst in instance_list])
3889     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3890     bad_nodes = []
3891     offline_nodes = []
3892
3893     # Gather data as requested
3894     if query.IQ_LIVE in self.requested_data:
3895       live_data = {}
3896       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3897       for name in nodes:
3898         result = node_data[name]
3899         if result.offline:
3900           # offline nodes will be in both lists
3901           assert result.fail_msg
3902           offline_nodes.append(name)
3903         if result.fail_msg:
3904           bad_nodes.append(name)
3905         elif result.payload:
3906           live_data.update(result.payload)
3907         # else no instance is alive
3908     else:
3909       live_data = {}
3910
3911     if query.IQ_DISKUSAGE in self.requested_data:
3912       disk_usage = dict((inst.name,
3913                          _ComputeDiskSize(inst.disk_template,
3914                                           [{"size": disk.size}
3915                                            for disk in inst.disks]))
3916                         for inst in instance_list)
3917     else:
3918       disk_usage = None
3919
3920     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3921                                    disk_usage, offline_nodes, bad_nodes,
3922                                    live_data)
3923
3924
3925 #: Query type implementations
3926 _QUERY_IMPL = {
3927   constants.QR_INSTANCE: _InstanceQuery,
3928   constants.QR_NODE: _NodeQuery,
3929   }
3930
3931
3932 def _GetQueryImplementation(name):
3933   """Returns the implemtnation for a query type.
3934
3935   @param name: Query type, must be one of L{constants.QR_OP_QUERY}
3936
3937   """
3938   try:
3939     return _QUERY_IMPL[name]
3940   except KeyError:
3941     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
3942                                errors.ECODE_INVAL)
3943
3944
3945 class LUQuery(NoHooksLU):
3946   """Query for resources/items of a certain kind.
3947
3948   """
3949   # pylint: disable-msg=W0142
3950   _OP_PARAMS = [
3951     ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3952     ("fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3953     ("filter", None, ht.TOr(ht.TNone,
3954                             ht.TListOf(ht.TOr(ht.TNonEmptyString, ht.TList)))),
3955     ]
3956   REQ_BGL = False
3957
3958   def CheckArguments(self):
3959     qcls = _GetQueryImplementation(self.op.what)
3960     names = qlang.ReadSimpleFilter("name", self.op.filter)
3961
3962     self.impl = qcls(names, self.op.fields, False)
3963
3964   def ExpandNames(self):
3965     self.impl.ExpandNames(self)
3966
3967   def DeclareLocks(self, level):
3968     self.impl.DeclareLocks(self, level)
3969
3970   def Exec(self, feedback_fn):
3971     return self.impl.NewStyleQuery(self)
3972
3973
3974 class LUQueryFields(NoHooksLU):
3975   """Query for resources/items of a certain kind.
3976
3977   """
3978   # pylint: disable-msg=W0142
3979   _OP_PARAMS = [
3980     ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3981     ("fields", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
3982     ]
3983   REQ_BGL = False
3984
3985   def CheckArguments(self):
3986     self.qcls = _GetQueryImplementation(self.op.what)
3987
3988   def ExpandNames(self):
3989     self.needed_locks = {}
3990
3991   def Exec(self, feedback_fn):
3992     return self.qcls.FieldsQuery(self.op.fields)
3993
3994
3995 class LUModifyNodeStorage(NoHooksLU):
3996   """Logical unit for modifying a storage volume on a node.
3997
3998   """
3999   _OP_PARAMS = [
4000     _PNodeName,
4001     ("storage_type", ht.NoDefault, _CheckStorageType),
4002     ("name", ht.NoDefault, ht.TNonEmptyString),
4003     ("changes", ht.NoDefault, ht.TDict),
4004     ]
4005   REQ_BGL = False
4006
4007   def CheckArguments(self):
4008     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4009
4010     storage_type = self.op.storage_type
4011
4012     try:
4013       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4014     except KeyError:
4015       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4016                                  " modified" % storage_type,
4017                                  errors.ECODE_INVAL)
4018
4019     diff = set(self.op.changes.keys()) - modifiable
4020     if diff:
4021       raise errors.OpPrereqError("The following fields can not be modified for"
4022                                  " storage units of type '%s': %r" %
4023                                  (storage_type, list(diff)),
4024                                  errors.ECODE_INVAL)
4025
4026   def ExpandNames(self):
4027     self.needed_locks = {
4028       locking.LEVEL_NODE: self.op.node_name,
4029       }
4030
4031   def Exec(self, feedback_fn):
4032     """Computes the list of nodes and their attributes.
4033
4034     """
4035     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4036     result = self.rpc.call_storage_modify(self.op.node_name,
4037                                           self.op.storage_type, st_args,
4038                                           self.op.name, self.op.changes)
4039     result.Raise("Failed to modify storage unit '%s' on %s" %
4040                  (self.op.name, self.op.node_name))
4041
4042
4043 class LUAddNode(LogicalUnit):
4044   """Logical unit for adding node to the cluster.
4045
4046   """
4047   HPATH = "node-add"
4048   HTYPE = constants.HTYPE_NODE
4049   _OP_PARAMS = [
4050     _PNodeName,
4051     ("primary_ip", None, ht.NoType),
4052     ("secondary_ip", None, ht.TMaybeString),
4053     ("readd", False, ht.TBool),
4054     ("group", None, ht.TMaybeString),
4055     ("master_capable", None, ht.TMaybeBool),
4056     ("vm_capable", None, ht.TMaybeBool),
4057     ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4058     ]
4059   _NFLAGS = ["master_capable", "vm_capable"]
4060
4061   def CheckArguments(self):
4062     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4063     # validate/normalize the node name
4064     self.hostname = netutils.GetHostname(name=self.op.node_name,
4065                                          family=self.primary_ip_family)
4066     self.op.node_name = self.hostname.name
4067     if self.op.readd and self.op.group:
4068       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4069                                  " being readded", errors.ECODE_INVAL)
4070
4071   def BuildHooksEnv(self):
4072     """Build hooks env.
4073
4074     This will run on all nodes before, and on all nodes + the new node after.
4075
4076     """
4077     env = {
4078       "OP_TARGET": self.op.node_name,
4079       "NODE_NAME": self.op.node_name,
4080       "NODE_PIP": self.op.primary_ip,
4081       "NODE_SIP": self.op.secondary_ip,
4082       "MASTER_CAPABLE": str(self.op.master_capable),
4083       "VM_CAPABLE": str(self.op.vm_capable),
4084       }
4085     nodes_0 = self.cfg.GetNodeList()
4086     nodes_1 = nodes_0 + [self.op.node_name, ]
4087     return env, nodes_0, nodes_1
4088
4089   def CheckPrereq(self):
4090     """Check prerequisites.
4091
4092     This checks:
4093      - the new node is not already in the config
4094      - it is resolvable
4095      - its parameters (single/dual homed) matches the cluster
4096
4097     Any errors are signaled by raising errors.OpPrereqError.
4098
4099     """
4100     cfg = self.cfg
4101     hostname = self.hostname
4102     node = hostname.name
4103     primary_ip = self.op.primary_ip = hostname.ip
4104     if self.op.secondary_ip is None:
4105       if self.primary_ip_family == netutils.IP6Address.family:
4106         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4107                                    " IPv4 address must be given as secondary",
4108                                    errors.ECODE_INVAL)
4109       self.op.secondary_ip = primary_ip
4110
4111     secondary_ip = self.op.secondary_ip
4112     if not netutils.IP4Address.IsValid(secondary_ip):
4113       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4114                                  " address" % secondary_ip, errors.ECODE_INVAL)
4115
4116     node_list = cfg.GetNodeList()
4117     if not self.op.readd and node in node_list:
4118       raise errors.OpPrereqError("Node %s is already in the configuration" %
4119                                  node, errors.ECODE_EXISTS)
4120     elif self.op.readd and node not in node_list:
4121       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4122                                  errors.ECODE_NOENT)
4123
4124     self.changed_primary_ip = False
4125
4126     for existing_node_name in node_list:
4127       existing_node = cfg.GetNodeInfo(existing_node_name)
4128
4129       if self.op.readd and node == existing_node_name:
4130         if existing_node.secondary_ip != secondary_ip:
4131           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4132                                      " address configuration as before",
4133                                      errors.ECODE_INVAL)
4134         if existing_node.primary_ip != primary_ip:
4135           self.changed_primary_ip = True
4136
4137         continue
4138
4139       if (existing_node.primary_ip == primary_ip or
4140           existing_node.secondary_ip == primary_ip or
4141           existing_node.primary_ip == secondary_ip or
4142           existing_node.secondary_ip == secondary_ip):
4143         raise errors.OpPrereqError("New node ip address(es) conflict with"
4144                                    " existing node %s" % existing_node.name,
4145                                    errors.ECODE_NOTUNIQUE)
4146
4147     # After this 'if' block, None is no longer a valid value for the
4148     # _capable op attributes
4149     if self.op.readd:
4150       old_node = self.cfg.GetNodeInfo(node)
4151       assert old_node is not None, "Can't retrieve locked node %s" % node
4152       for attr in self._NFLAGS:
4153         if getattr(self.op, attr) is None:
4154           setattr(self.op, attr, getattr(old_node, attr))
4155     else:
4156       for attr in self._NFLAGS:
4157         if getattr(self.op, attr) is None:
4158           setattr(self.op, attr, True)
4159
4160     if self.op.readd and not self.op.vm_capable:
4161       pri, sec = cfg.GetNodeInstances(node)
4162       if pri or sec:
4163         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4164                                    " flag set to false, but it already holds"
4165                                    " instances" % node,
4166                                    errors.ECODE_STATE)
4167
4168     # check that the type of the node (single versus dual homed) is the
4169     # same as for the master
4170     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4171     master_singlehomed = myself.secondary_ip == myself.primary_ip
4172     newbie_singlehomed = secondary_ip == primary_ip
4173     if master_singlehomed != newbie_singlehomed:
4174       if master_singlehomed:
4175         raise errors.OpPrereqError("The master has no secondary ip but the"
4176                                    " new node has one",
4177                                    errors.ECODE_INVAL)
4178       else:
4179         raise errors.OpPrereqError("The master has a secondary ip but the"
4180                                    " new node doesn't have one",
4181                                    errors.ECODE_INVAL)
4182
4183     # checks reachability
4184     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4185       raise errors.OpPrereqError("Node not reachable by ping",
4186                                  errors.ECODE_ENVIRON)
4187
4188     if not newbie_singlehomed:
4189       # check reachability from my secondary ip to newbie's secondary ip
4190       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4191                            source=myself.secondary_ip):
4192         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4193                                    " based ping to node daemon port",
4194                                    errors.ECODE_ENVIRON)
4195
4196     if self.op.readd:
4197       exceptions = [node]
4198     else:
4199       exceptions = []
4200
4201     if self.op.master_capable:
4202       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4203     else:
4204       self.master_candidate = False
4205
4206     if self.op.readd:
4207       self.new_node = old_node
4208     else:
4209       node_group = cfg.LookupNodeGroup(self.op.group)
4210       self.new_node = objects.Node(name=node,
4211                                    primary_ip=primary_ip,
4212                                    secondary_ip=secondary_ip,
4213                                    master_candidate=self.master_candidate,
4214                                    offline=False, drained=False,
4215                                    group=node_group)
4216
4217     if self.op.ndparams:
4218       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4219
4220   def Exec(self, feedback_fn):
4221     """Adds the new node to the cluster.
4222
4223     """
4224     new_node = self.new_node
4225     node = new_node.name
4226
4227     # We adding a new node so we assume it's powered
4228     new_node.powered = True
4229
4230     # for re-adds, reset the offline/drained/master-candidate flags;
4231     # we need to reset here, otherwise offline would prevent RPC calls
4232     # later in the procedure; this also means that if the re-add
4233     # fails, we are left with a non-offlined, broken node
4234     if self.op.readd:
4235       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4236       self.LogInfo("Readding a node, the offline/drained flags were reset")
4237       # if we demote the node, we do cleanup later in the procedure
4238       new_node.master_candidate = self.master_candidate
4239       if self.changed_primary_ip:
4240         new_node.primary_ip = self.op.primary_ip
4241
4242     # copy the master/vm_capable flags
4243     for attr in self._NFLAGS:
4244       setattr(new_node, attr, getattr(self.op, attr))
4245
4246     # notify the user about any possible mc promotion
4247     if new_node.master_candidate:
4248       self.LogInfo("Node will be a master candidate")
4249
4250     if self.op.ndparams:
4251       new_node.ndparams = self.op.ndparams
4252
4253     # check connectivity
4254     result = self.rpc.call_version([node])[node]
4255     result.Raise("Can't get version information from node %s" % node)
4256     if constants.PROTOCOL_VERSION == result.payload:
4257       logging.info("Communication to node %s fine, sw version %s match",
4258                    node, result.payload)
4259     else:
4260       raise errors.OpExecError("Version mismatch master version %s,"
4261                                " node version %s" %
4262                                (constants.PROTOCOL_VERSION, result.payload))
4263
4264     # Add node to our /etc/hosts, and add key to known_hosts
4265     if self.cfg.GetClusterInfo().modify_etc_hosts:
4266       master_node = self.cfg.GetMasterNode()
4267       result = self.rpc.call_etc_hosts_modify(master_node,
4268                                               constants.ETC_HOSTS_ADD,
4269                                               self.hostname.name,
4270                                               self.hostname.ip)
4271       result.Raise("Can't update hosts file with new host data")
4272
4273     if new_node.secondary_ip != new_node.primary_ip:
4274       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4275                                False)
4276
4277     node_verify_list = [self.cfg.GetMasterNode()]
4278     node_verify_param = {
4279       constants.NV_NODELIST: [node],
4280       # TODO: do a node-net-test as well?
4281     }
4282
4283     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4284                                        self.cfg.GetClusterName())
4285     for verifier in node_verify_list:
4286       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4287       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4288       if nl_payload:
4289         for failed in nl_payload:
4290           feedback_fn("ssh/hostname verification failed"
4291                       " (checking from %s): %s" %
4292                       (verifier, nl_payload[failed]))
4293         raise errors.OpExecError("ssh/hostname verification failed.")
4294
4295     if self.op.readd:
4296       _RedistributeAncillaryFiles(self)
4297       self.context.ReaddNode(new_node)
4298       # make sure we redistribute the config
4299       self.cfg.Update(new_node, feedback_fn)
4300       # and make sure the new node will not have old files around
4301       if not new_node.master_candidate:
4302         result = self.rpc.call_node_demote_from_mc(new_node.name)
4303         msg = result.fail_msg
4304         if msg:
4305           self.LogWarning("Node failed to demote itself from master"
4306                           " candidate status: %s" % msg)
4307     else:
4308       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4309                                   additional_vm=self.op.vm_capable)
4310       self.context.AddNode(new_node, self.proc.GetECId())
4311
4312
4313 class LUSetNodeParams(LogicalUnit):
4314   """Modifies the parameters of a node.
4315
4316   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4317       to the node role (as _ROLE_*)
4318   @cvar _R2F: a dictionary from node role to tuples of flags
4319   @cvar _FLAGS: a list of attribute names corresponding to the flags
4320
4321   """
4322   HPATH = "node-modify"
4323   HTYPE = constants.HTYPE_NODE
4324   _OP_PARAMS = [
4325     _PNodeName,
4326     ("master_candidate", None, ht.TMaybeBool),
4327     ("offline", None, ht.TMaybeBool),
4328     ("drained", None, ht.TMaybeBool),
4329     ("auto_promote", False, ht.TBool),
4330     ("master_capable", None, ht.TMaybeBool),
4331     ("vm_capable", None, ht.TMaybeBool),
4332     ("secondary_ip", None, ht.TMaybeString),
4333     ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4334     _PForce,
4335     ]
4336   REQ_BGL = False
4337   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4338   _F2R = {
4339     (True, False, False): _ROLE_CANDIDATE,
4340     (False, True, False): _ROLE_DRAINED,
4341     (False, False, True): _ROLE_OFFLINE,
4342     (False, False, False): _ROLE_REGULAR,
4343     }
4344   _R2F = dict((v, k) for k, v in _F2R.items())
4345   _FLAGS = ["master_candidate", "drained", "offline"]
4346
4347   def CheckArguments(self):
4348     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4349     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4350                 self.op.master_capable, self.op.vm_capable,
4351                 self.op.secondary_ip, self.op.ndparams]
4352     if all_mods.count(None) == len(all_mods):
4353       raise errors.OpPrereqError("Please pass at least one modification",
4354                                  errors.ECODE_INVAL)
4355     if all_mods.count(True) > 1:
4356       raise errors.OpPrereqError("Can't set the node into more than one"
4357                                  " state at the same time",
4358                                  errors.ECODE_INVAL)
4359
4360     # Boolean value that tells us whether we might be demoting from MC
4361     self.might_demote = (self.op.master_candidate == False or
4362                          self.op.offline == True or
4363                          self.op.drained == True or
4364                          self.op.master_capable == False)
4365
4366     if self.op.secondary_ip:
4367       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4368         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4369                                    " address" % self.op.secondary_ip,
4370                                    errors.ECODE_INVAL)
4371
4372     self.lock_all = self.op.auto_promote and self.might_demote
4373     self.lock_instances = self.op.secondary_ip is not None
4374
4375   def ExpandNames(self):
4376     if self.lock_all:
4377       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4378     else:
4379       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4380
4381     if self.lock_instances:
4382       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4383
4384   def DeclareLocks(self, level):
4385     # If we have locked all instances, before waiting to lock nodes, release
4386     # all the ones living on nodes unrelated to the current operation.
4387     if level == locking.LEVEL_NODE and self.lock_instances:
4388       instances_release = []
4389       instances_keep = []
4390       self.affected_instances = []
4391       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4392         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4393           instance = self.context.cfg.GetInstanceInfo(instance_name)
4394           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4395           if i_mirrored and self.op.node_name in instance.all_nodes:
4396             instances_keep.append(instance_name)
4397             self.affected_instances.append(instance)
4398           else:
4399             instances_release.append(instance_name)
4400         if instances_release:
4401           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4402           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4403
4404   def BuildHooksEnv(self):
4405     """Build hooks env.
4406
4407     This runs on the master node.
4408
4409     """
4410     env = {
4411       "OP_TARGET": self.op.node_name,
4412       "MASTER_CANDIDATE": str(self.op.master_candidate),
4413       "OFFLINE": str(self.op.offline),
4414       "DRAINED": str(self.op.drained),
4415       "MASTER_CAPABLE": str(self.op.master_capable),
4416       "VM_CAPABLE": str(self.op.vm_capable),
4417       }
4418     nl = [self.cfg.GetMasterNode(),
4419           self.op.node_name]
4420     return env, nl, nl
4421
4422   def CheckPrereq(self):
4423     """Check prerequisites.
4424
4425     This only checks the instance list against the existing names.
4426
4427     """
4428     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4429
4430     if (self.op.master_candidate is not None or
4431         self.op.drained is not None or
4432         self.op.offline is not None):
4433       # we can't change the master's node flags
4434       if self.op.node_name == self.cfg.GetMasterNode():
4435         raise errors.OpPrereqError("The master role can be changed"
4436                                    " only via master-failover",
4437                                    errors.ECODE_INVAL)
4438
4439     if self.op.master_candidate and not node.master_capable:
4440       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4441                                  " it a master candidate" % node.name,
4442                                  errors.ECODE_STATE)
4443
4444     if self.op.vm_capable == False:
4445       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4446       if ipri or isec:
4447         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4448                                    " the vm_capable flag" % node.name,
4449                                    errors.ECODE_STATE)
4450
4451     if node.master_candidate and self.might_demote and not self.lock_all:
4452       assert not self.op.auto_promote, "auto-promote set but lock_all not"
4453       # check if after removing the current node, we're missing master
4454       # candidates
4455       (mc_remaining, mc_should, _) = \
4456           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4457       if mc_remaining < mc_should:
4458         raise errors.OpPrereqError("Not enough master candidates, please"
4459                                    " pass auto_promote to allow promotion",
4460                                    errors.ECODE_STATE)
4461
4462     self.old_flags = old_flags = (node.master_candidate,
4463                                   node.drained, node.offline)
4464     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4465     self.old_role = old_role = self._F2R[old_flags]
4466
4467     # Check for ineffective changes
4468     for attr in self._FLAGS:
4469       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4470         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4471         setattr(self.op, attr, None)
4472
4473     # Past this point, any flag change to False means a transition
4474     # away from the respective state, as only real changes are kept
4475
4476     # If we're being deofflined/drained, we'll MC ourself if needed
4477     if (self.op.drained == False or self.op.offline == False or
4478         (self.op.master_capable and not node.master_capable)):
4479       if _DecideSelfPromotion(self):
4480         self.op.master_candidate = True
4481         self.LogInfo("Auto-promoting node to master candidate")
4482
4483     # If we're no longer master capable, we'll demote ourselves from MC
4484     if self.op.master_capable == False and node.master_candidate:
4485       self.LogInfo("Demoting from master candidate")
4486       self.op.master_candidate = False
4487
4488     # Compute new role
4489     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4490     if self.op.master_candidate:
4491       new_role = self._ROLE_CANDIDATE
4492     elif self.op.drained:
4493       new_role = self._ROLE_DRAINED
4494     elif self.op.offline:
4495       new_role = self._ROLE_OFFLINE
4496     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4497       # False is still in new flags, which means we're un-setting (the
4498       # only) True flag
4499       new_role = self._ROLE_REGULAR
4500     else: # no new flags, nothing, keep old role
4501       new_role = old_role
4502
4503     self.new_role = new_role
4504
4505     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4506       # Trying to transition out of offline status
4507       result = self.rpc.call_version([node.name])[node.name]
4508       if result.fail_msg:
4509         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4510                                    " to report its version: %s" %
4511                                    (node.name, result.fail_msg),
4512                                    errors.ECODE_STATE)
4513       else:
4514         self.LogWarning("Transitioning node from offline to online state"
4515                         " without using re-add. Please make sure the node"
4516                         " is healthy!")
4517
4518     if self.op.secondary_ip:
4519       # Ok even without locking, because this can't be changed by any LU
4520       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4521       master_singlehomed = master.secondary_ip == master.primary_ip
4522       if master_singlehomed and self.op.secondary_ip:
4523         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4524                                    " homed cluster", errors.ECODE_INVAL)
4525
4526       if node.offline:
4527         if self.affected_instances:
4528           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4529                                      " node has instances (%s) configured"
4530                                      " to use it" % self.affected_instances)
4531       else:
4532         # On online nodes, check that no instances are running, and that
4533         # the node has the new ip and we can reach it.
4534         for instance in self.affected_instances:
4535           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4536
4537         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4538         if master.name != node.name:
4539           # check reachability from master secondary ip to new secondary ip
4540           if not netutils.TcpPing(self.op.secondary_ip,
4541                                   constants.DEFAULT_NODED_PORT,
4542                                   source=master.secondary_ip):
4543             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4544                                        " based ping to node daemon port",
4545                                        errors.ECODE_ENVIRON)
4546
4547     if self.op.ndparams:
4548       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4549       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4550       self.new_ndparams = new_ndparams
4551
4552   def Exec(self, feedback_fn):
4553     """Modifies a node.
4554
4555     """
4556     node = self.node
4557     old_role = self.old_role
4558     new_role = self.new_role
4559
4560     result = []
4561
4562     if self.op.ndparams:
4563       node.ndparams = self.new_ndparams
4564
4565     for attr in ["master_capable", "vm_capable"]:
4566       val = getattr(self.op, attr)
4567       if val is not None:
4568         setattr(node, attr, val)
4569         result.append((attr, str(val)))
4570
4571     if new_role != old_role:
4572       # Tell the node to demote itself, if no longer MC and not offline
4573       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4574         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4575         if msg:
4576           self.LogWarning("Node failed to demote itself: %s", msg)
4577
4578       new_flags = self._R2F[new_role]
4579       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4580         if of != nf:
4581           result.append((desc, str(nf)))
4582       (node.master_candidate, node.drained, node.offline) = new_flags
4583
4584       # we locked all nodes, we adjust the CP before updating this node
4585       if self.lock_all:
4586         _AdjustCandidatePool(self, [node.name])
4587
4588     if self.op.secondary_ip:
4589       node.secondary_ip = self.op.secondary_ip
4590       result.append(("secondary_ip", self.op.secondary_ip))
4591
4592     # this will trigger configuration file update, if needed
4593     self.cfg.Update(node, feedback_fn)
4594
4595     # this will trigger job queue propagation or cleanup if the mc
4596     # flag changed
4597     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4598       self.context.ReaddNode(node)
4599
4600     return result
4601
4602
4603 class LUPowercycleNode(NoHooksLU):
4604   """Powercycles a node.
4605
4606   """
4607   _OP_PARAMS = [
4608     _PNodeName,
4609     _PForce,
4610     ]
4611   REQ_BGL = False
4612
4613   def CheckArguments(self):
4614     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4615     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4616       raise errors.OpPrereqError("The node is the master and the force"
4617                                  " parameter was not set",
4618                                  errors.ECODE_INVAL)
4619
4620   def ExpandNames(self):
4621     """Locking for PowercycleNode.
4622
4623     This is a last-resort option and shouldn't block on other
4624     jobs. Therefore, we grab no locks.
4625
4626     """
4627     self.needed_locks = {}
4628
4629   def Exec(self, feedback_fn):
4630     """Reboots a node.
4631
4632     """
4633     result = self.rpc.call_node_powercycle(self.op.node_name,
4634                                            self.cfg.GetHypervisorType())
4635     result.Raise("Failed to schedule the reboot")
4636     return result.payload
4637
4638
4639 class LUQueryClusterInfo(NoHooksLU):
4640   """Query cluster configuration.
4641
4642   """
4643   REQ_BGL = False
4644
4645   def ExpandNames(self):
4646     self.needed_locks = {}
4647
4648   def Exec(self, feedback_fn):
4649     """Return cluster config.
4650
4651     """
4652     cluster = self.cfg.GetClusterInfo()
4653     os_hvp = {}
4654
4655     # Filter just for enabled hypervisors
4656     for os_name, hv_dict in cluster.os_hvp.items():
4657       os_hvp[os_name] = {}
4658       for hv_name, hv_params in hv_dict.items():
4659         if hv_name in cluster.enabled_hypervisors:
4660           os_hvp[os_name][hv_name] = hv_params
4661
4662     # Convert ip_family to ip_version
4663     primary_ip_version = constants.IP4_VERSION
4664     if cluster.primary_ip_family == netutils.IP6Address.family:
4665       primary_ip_version = constants.IP6_VERSION
4666
4667     result = {
4668       "software_version": constants.RELEASE_VERSION,
4669       "protocol_version": constants.PROTOCOL_VERSION,
4670       "config_version": constants.CONFIG_VERSION,
4671       "os_api_version": max(constants.OS_API_VERSIONS),
4672       "export_version": constants.EXPORT_VERSION,
4673       "architecture": (platform.architecture()[0], platform.machine()),
4674       "name": cluster.cluster_name,
4675       "master": cluster.master_node,
4676       "default_hypervisor": cluster.enabled_hypervisors[0],
4677       "enabled_hypervisors": cluster.enabled_hypervisors,
4678       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4679                         for hypervisor_name in cluster.enabled_hypervisors]),
4680       "os_hvp": os_hvp,
4681       "beparams": cluster.beparams,
4682       "osparams": cluster.osparams,
4683       "nicparams": cluster.nicparams,
4684       "candidate_pool_size": cluster.candidate_pool_size,
4685       "master_netdev": cluster.master_netdev,
4686       "volume_group_name": cluster.volume_group_name,
4687       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4688       "file_storage_dir": cluster.file_storage_dir,
4689       "maintain_node_health": cluster.maintain_node_health,
4690       "ctime": cluster.ctime,
4691       "mtime": cluster.mtime,
4692       "uuid": cluster.uuid,
4693       "tags": list(cluster.GetTags()),
4694       "uid_pool": cluster.uid_pool,
4695       "default_iallocator": cluster.default_iallocator,
4696       "reserved_lvs": cluster.reserved_lvs,
4697       "primary_ip_version": primary_ip_version,
4698       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4699       }
4700
4701     return result
4702
4703
4704 class LUQueryConfigValues(NoHooksLU):
4705   """Return configuration values.
4706
4707   """
4708   _OP_PARAMS = [_POutputFields]
4709   REQ_BGL = False
4710   _FIELDS_DYNAMIC = utils.FieldSet()
4711   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4712                                   "watcher_pause", "volume_group_name")
4713
4714   def CheckArguments(self):
4715     _CheckOutputFields(static=self._FIELDS_STATIC,
4716                        dynamic=self._FIELDS_DYNAMIC,
4717                        selected=self.op.output_fields)
4718
4719   def ExpandNames(self):
4720     self.needed_locks = {}
4721
4722   def Exec(self, feedback_fn):
4723     """Dump a representation of the cluster config to the standard output.
4724
4725     """
4726     values = []
4727     for field in self.op.output_fields:
4728       if field == "cluster_name":
4729         entry = self.cfg.GetClusterName()
4730       elif field == "master_node":
4731         entry = self.cfg.GetMasterNode()
4732       elif field == "drain_flag":
4733         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4734       elif field == "watcher_pause":
4735         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4736       elif field == "volume_group_name":
4737         entry = self.cfg.GetVGName()
4738       else:
4739         raise errors.ParameterError(field)
4740       values.append(entry)
4741     return values
4742
4743
4744 class LUActivateInstanceDisks(NoHooksLU):
4745   """Bring up an instance's disks.
4746
4747   """
4748   _OP_PARAMS = [
4749     _PInstanceName,
4750     ("ignore_size", False, ht.TBool),
4751     ]
4752   REQ_BGL = False
4753
4754   def ExpandNames(self):
4755     self._ExpandAndLockInstance()
4756     self.needed_locks[locking.LEVEL_NODE] = []
4757     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4758
4759   def DeclareLocks(self, level):
4760     if level == locking.LEVEL_NODE:
4761       self._LockInstancesNodes()
4762
4763   def CheckPrereq(self):
4764     """Check prerequisites.
4765
4766     This checks that the instance is in the cluster.
4767
4768     """
4769     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4770     assert self.instance is not None, \
4771       "Cannot retrieve locked instance %s" % self.op.instance_name
4772     _CheckNodeOnline(self, self.instance.primary_node)
4773
4774   def Exec(self, feedback_fn):
4775     """Activate the disks.
4776
4777     """
4778     disks_ok, disks_info = \
4779               _AssembleInstanceDisks(self, self.instance,
4780                                      ignore_size=self.op.ignore_size)
4781     if not disks_ok:
4782       raise errors.OpExecError("Cannot activate block devices")
4783
4784     return disks_info
4785
4786
4787 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4788                            ignore_size=False):
4789   """Prepare the block devices for an instance.
4790
4791   This sets up the block devices on all nodes.
4792
4793   @type lu: L{LogicalUnit}
4794   @param lu: the logical unit on whose behalf we execute
4795   @type instance: L{objects.Instance}
4796   @param instance: the instance for whose disks we assemble
4797   @type disks: list of L{objects.Disk} or None
4798   @param disks: which disks to assemble (or all, if None)
4799   @type ignore_secondaries: boolean
4800   @param ignore_secondaries: if true, errors on secondary nodes
4801       won't result in an error return from the function
4802   @type ignore_size: boolean
4803   @param ignore_size: if true, the current known size of the disk
4804       will not be used during the disk activation, useful for cases
4805       when the size is wrong
4806   @return: False if the operation failed, otherwise a list of
4807       (host, instance_visible_name, node_visible_name)
4808       with the mapping from node devices to instance devices
4809
4810   """
4811   device_info = []
4812   disks_ok = True
4813   iname = instance.name
4814   disks = _ExpandCheckDisks(instance, disks)
4815
4816   # With the two passes mechanism we try to reduce the window of
4817   # opportunity for the race condition of switching DRBD to primary
4818   # before handshaking occured, but we do not eliminate it
4819
4820   # The proper fix would be to wait (with some limits) until the
4821   # connection has been made and drbd transitions from WFConnection
4822   # into any other network-connected state (Connected, SyncTarget,
4823   # SyncSource, etc.)
4824
4825   # 1st pass, assemble on all nodes in secondary mode
4826   for inst_disk in disks:
4827     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4828       if ignore_size:
4829         node_disk = node_disk.Copy()
4830         node_disk.UnsetSize()
4831       lu.cfg.SetDiskID(node_disk, node)
4832       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4833       msg = result.fail_msg
4834       if msg:
4835         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4836                            " (is_primary=False, pass=1): %s",
4837                            inst_disk.iv_name, node, msg)
4838         if not ignore_secondaries:
4839           disks_ok = False
4840
4841   # FIXME: race condition on drbd migration to primary
4842
4843   # 2nd pass, do only the primary node
4844   for inst_disk in disks:
4845     dev_path = None
4846
4847     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4848       if node != instance.primary_node:
4849         continue
4850       if ignore_size:
4851         node_disk = node_disk.Copy()
4852         node_disk.UnsetSize()
4853       lu.cfg.SetDiskID(node_disk, node)
4854       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4855       msg = result.fail_msg
4856       if msg:
4857         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4858                            " (is_primary=True, pass=2): %s",
4859                            inst_disk.iv_name, node, msg)
4860         disks_ok = False
4861       else:
4862         dev_path = result.payload
4863
4864     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4865
4866   # leave the disks configured for the primary node
4867   # this is a workaround that would be fixed better by
4868   # improving the logical/physical id handling
4869   for disk in disks:
4870     lu.cfg.SetDiskID(disk, instance.primary_node)
4871
4872   return disks_ok, device_info
4873
4874
4875 def _StartInstanceDisks(lu, instance, force):
4876   """Start the disks of an instance.
4877
4878   """
4879   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4880                                            ignore_secondaries=force)
4881   if not disks_ok:
4882     _ShutdownInstanceDisks(lu, instance)
4883     if force is not None and not force:
4884       lu.proc.LogWarning("", hint="If the message above refers to a"
4885                          " secondary node,"
4886                          " you can retry the operation using '--force'.")
4887     raise errors.OpExecError("Disk consistency error")
4888
4889
4890 class LUDeactivateInstanceDisks(NoHooksLU):
4891   """Shutdown an instance's disks.
4892
4893   """
4894   _OP_PARAMS = [
4895     _PInstanceName,
4896     ]
4897   REQ_BGL = False
4898
4899   def ExpandNames(self):
4900     self._ExpandAndLockInstance()
4901     self.needed_locks[locking.LEVEL_NODE] = []
4902     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4903
4904   def DeclareLocks(self, level):
4905     if level == locking.LEVEL_NODE:
4906       self._LockInstancesNodes()
4907
4908   def CheckPrereq(self):
4909     """Check prerequisites.
4910
4911     This checks that the instance is in the cluster.
4912
4913     """
4914     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4915     assert self.instance is not None, \
4916       "Cannot retrieve locked instance %s" % self.op.instance_name
4917
4918   def Exec(self, feedback_fn):
4919     """Deactivate the disks
4920
4921     """
4922     instance = self.instance
4923     _SafeShutdownInstanceDisks(self, instance)
4924
4925
4926 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4927   """Shutdown block devices of an instance.
4928
4929   This function checks if an instance is running, before calling
4930   _ShutdownInstanceDisks.
4931
4932   """
4933   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4934   _ShutdownInstanceDisks(lu, instance, disks=disks)
4935
4936
4937 def _ExpandCheckDisks(instance, disks):
4938   """Return the instance disks selected by the disks list
4939
4940   @type disks: list of L{objects.Disk} or None
4941   @param disks: selected disks
4942   @rtype: list of L{objects.Disk}
4943   @return: selected instance disks to act on
4944
4945   """
4946   if disks is None:
4947     return instance.disks
4948   else:
4949     if not set(disks).issubset(instance.disks):
4950       raise errors.ProgrammerError("Can only act on disks belonging to the"
4951                                    " target instance")
4952     return disks
4953
4954
4955 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4956   """Shutdown block devices of an instance.
4957
4958   This does the shutdown on all nodes of the instance.
4959
4960   If the ignore_primary is false, errors on the primary node are
4961   ignored.
4962
4963   """
4964   all_result = True
4965   disks = _ExpandCheckDisks(instance, disks)
4966
4967   for disk in disks:
4968     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4969       lu.cfg.SetDiskID(top_disk, node)
4970       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4971       msg = result.fail_msg
4972       if msg:
4973         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4974                       disk.iv_name, node, msg)
4975         if not ignore_primary or node != instance.primary_node:
4976           all_result = False
4977   return all_result
4978
4979
4980 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4981   """Checks if a node has enough free memory.
4982
4983   This function check if a given node has the needed amount of free
4984   memory. In case the node has less memory or we cannot get the
4985   information from the node, this function raise an OpPrereqError
4986   exception.
4987
4988   @type lu: C{LogicalUnit}
4989   @param lu: a logical unit from which we get configuration data
4990   @type node: C{str}
4991   @param node: the node to check
4992   @type reason: C{str}
4993   @param reason: string to use in the error message
4994   @type requested: C{int}
4995   @param requested: the amount of memory in MiB to check for
4996   @type hypervisor_name: C{str}
4997   @param hypervisor_name: the hypervisor to ask for memory stats
4998   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4999       we cannot check the node
5000
5001   """
5002   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5003   nodeinfo[node].Raise("Can't get data from node %s" % node,
5004                        prereq=True, ecode=errors.ECODE_ENVIRON)
5005   free_mem = nodeinfo[node].payload.get('memory_free', None)
5006   if not isinstance(free_mem, int):
5007     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5008                                " was '%s'" % (node, free_mem),
5009                                errors.ECODE_ENVIRON)
5010   if requested > free_mem:
5011     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5012                                " needed %s MiB, available %s MiB" %
5013                                (node, reason, requested, free_mem),
5014                                errors.ECODE_NORES)
5015
5016
5017 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5018   """Checks if nodes have enough free disk space in the all VGs.
5019
5020   This function check if all given nodes have the needed amount of
5021   free disk. In case any node has less disk or we cannot get the
5022   information from the node, this function raise an OpPrereqError
5023   exception.
5024
5025   @type lu: C{LogicalUnit}
5026   @param lu: a logical unit from which we get configuration data
5027   @type nodenames: C{list}
5028   @param nodenames: the list of node names to check
5029   @type req_sizes: C{dict}
5030   @param req_sizes: the hash of vg and corresponding amount of disk in
5031       MiB to check for
5032   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5033       or we cannot check the node
5034
5035   """
5036   if req_sizes is not None:
5037     for vg, req_size in req_sizes.iteritems():
5038       _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5039
5040
5041 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5042   """Checks if nodes have enough free disk space in the specified VG.
5043
5044   This function check if all given nodes have the needed amount of
5045   free disk. In case any node has less disk or we cannot get the
5046   information from the node, this function raise an OpPrereqError
5047   exception.
5048
5049   @type lu: C{LogicalUnit}
5050   @param lu: a logical unit from which we get configuration data
5051   @type nodenames: C{list}
5052   @param nodenames: the list of node names to check
5053   @type vg: C{str}
5054   @param vg: the volume group to check
5055   @type requested: C{int}
5056   @param requested: the amount of disk in MiB to check for
5057   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5058       or we cannot check the node
5059
5060   """
5061   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5062   for node in nodenames:
5063     info = nodeinfo[node]
5064     info.Raise("Cannot get current information from node %s" % node,
5065                prereq=True, ecode=errors.ECODE_ENVIRON)
5066     vg_free = info.payload.get("vg_free", None)
5067     if not isinstance(vg_free, int):
5068       raise errors.OpPrereqError("Can't compute free disk space on node"
5069                                  " %s for vg %s, result was '%s'" %
5070                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5071     if requested > vg_free:
5072       raise errors.OpPrereqError("Not enough disk space on target node %s"
5073                                  " vg %s: required %d MiB, available %d MiB" %
5074                                  (node, vg, requested, vg_free),
5075                                  errors.ECODE_NORES)
5076
5077
5078 class LUStartupInstance(LogicalUnit):
5079   """Starts an instance.
5080
5081   """
5082   HPATH = "instance-start"
5083   HTYPE = constants.HTYPE_INSTANCE
5084   _OP_PARAMS = [
5085     _PInstanceName,
5086     _PForce,
5087     _PIgnoreOfflineNodes,
5088     ("hvparams", ht.EmptyDict, ht.TDict),
5089     ("beparams", ht.EmptyDict, ht.TDict),
5090     ]
5091   REQ_BGL = False
5092
5093   def CheckArguments(self):
5094     # extra beparams
5095     if self.op.beparams:
5096       # fill the beparams dict
5097       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5098
5099   def ExpandNames(self):
5100     self._ExpandAndLockInstance()
5101
5102   def BuildHooksEnv(self):
5103     """Build hooks env.
5104
5105     This runs on master, primary and secondary nodes of the instance.
5106
5107     """
5108     env = {
5109       "FORCE": self.op.force,
5110       }
5111     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5112     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5113     return env, nl, nl
5114
5115   def CheckPrereq(self):
5116     """Check prerequisites.
5117
5118     This checks that the instance is in the cluster.
5119
5120     """
5121     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5122     assert self.instance is not None, \
5123       "Cannot retrieve locked instance %s" % self.op.instance_name
5124
5125     # extra hvparams
5126     if self.op.hvparams:
5127       # check hypervisor parameter syntax (locally)
5128       cluster = self.cfg.GetClusterInfo()
5129       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5130       filled_hvp = cluster.FillHV(instance)
5131       filled_hvp.update(self.op.hvparams)
5132       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5133       hv_type.CheckParameterSyntax(filled_hvp)
5134       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5135
5136     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5137
5138     if self.primary_offline and self.op.ignore_offline_nodes:
5139       self.proc.LogWarning("Ignoring offline primary node")
5140
5141       if self.op.hvparams or self.op.beparams:
5142         self.proc.LogWarning("Overridden parameters are ignored")
5143     else:
5144       _CheckNodeOnline(self, instance.primary_node)
5145
5146       bep = self.cfg.GetClusterInfo().FillBE(instance)
5147
5148       # check bridges existence
5149       _CheckInstanceBridgesExist(self, instance)
5150
5151       remote_info = self.rpc.call_instance_info(instance.primary_node,
5152                                                 instance.name,
5153                                                 instance.hypervisor)
5154       remote_info.Raise("Error checking node %s" % instance.primary_node,
5155                         prereq=True, ecode=errors.ECODE_ENVIRON)
5156       if not remote_info.payload: # not running already
5157         _CheckNodeFreeMemory(self, instance.primary_node,
5158                              "starting instance %s" % instance.name,
5159                              bep[constants.BE_MEMORY], instance.hypervisor)
5160
5161   def Exec(self, feedback_fn):
5162     """Start the instance.
5163
5164     """
5165     instance = self.instance
5166     force = self.op.force
5167
5168     self.cfg.MarkInstanceUp(instance.name)
5169
5170     if self.primary_offline:
5171       assert self.op.ignore_offline_nodes
5172       self.proc.LogInfo("Primary node offline, marked instance as started")
5173     else:
5174       node_current = instance.primary_node
5175
5176       _StartInstanceDisks(self, instance, force)
5177
5178       result = self.rpc.call_instance_start(node_current, instance,
5179                                             self.op.hvparams, self.op.beparams)
5180       msg = result.fail_msg
5181       if msg:
5182         _ShutdownInstanceDisks(self, instance)
5183         raise errors.OpExecError("Could not start instance: %s" % msg)
5184
5185
5186 class LURebootInstance(LogicalUnit):
5187   """Reboot an instance.
5188
5189   """
5190   HPATH = "instance-reboot"
5191   HTYPE = constants.HTYPE_INSTANCE
5192   _OP_PARAMS = [
5193     _PInstanceName,
5194     ("ignore_secondaries", False, ht.TBool),
5195     ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
5196     _PShutdownTimeout,
5197     ]
5198   REQ_BGL = False
5199
5200   def ExpandNames(self):
5201     self._ExpandAndLockInstance()
5202
5203   def BuildHooksEnv(self):
5204     """Build hooks env.
5205
5206     This runs on master, primary and secondary nodes of the instance.
5207
5208     """
5209     env = {
5210       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5211       "REBOOT_TYPE": self.op.reboot_type,
5212       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5213       }
5214     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5215     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5216     return env, nl, nl
5217
5218   def CheckPrereq(self):
5219     """Check prerequisites.
5220
5221     This checks that the instance is in the cluster.
5222
5223     """
5224     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5225     assert self.instance is not None, \
5226       "Cannot retrieve locked instance %s" % self.op.instance_name
5227
5228     _CheckNodeOnline(self, instance.primary_node)
5229
5230     # check bridges existence
5231     _CheckInstanceBridgesExist(self, instance)
5232
5233   def Exec(self, feedback_fn):
5234     """Reboot the instance.
5235
5236     """
5237     instance = self.instance
5238     ignore_secondaries = self.op.ignore_secondaries
5239     reboot_type = self.op.reboot_type
5240
5241     node_current = instance.primary_node
5242
5243     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5244                        constants.INSTANCE_REBOOT_HARD]:
5245       for disk in instance.disks:
5246         self.cfg.SetDiskID(disk, node_current)
5247       result = self.rpc.call_instance_reboot(node_current, instance,
5248                                              reboot_type,
5249                                              self.op.shutdown_timeout)
5250       result.Raise("Could not reboot instance")
5251     else:
5252       result = self.rpc.call_instance_shutdown(node_current, instance,
5253                                                self.op.shutdown_timeout)
5254       result.Raise("Could not shutdown instance for full reboot")
5255       _ShutdownInstanceDisks(self, instance)
5256       _StartInstanceDisks(self, instance, ignore_secondaries)
5257       result = self.rpc.call_instance_start(node_current, instance, None, None)
5258       msg = result.fail_msg
5259       if msg:
5260         _ShutdownInstanceDisks(self, instance)
5261         raise errors.OpExecError("Could not start instance for"
5262                                  " full reboot: %s" % msg)
5263
5264     self.cfg.MarkInstanceUp(instance.name)
5265
5266
5267 class LUShutdownInstance(LogicalUnit):
5268   """Shutdown an instance.
5269
5270   """
5271   HPATH = "instance-stop"
5272   HTYPE = constants.HTYPE_INSTANCE
5273   _OP_PARAMS = [
5274     _PInstanceName,
5275     _PIgnoreOfflineNodes,
5276     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
5277     ]
5278   REQ_BGL = False
5279
5280   def ExpandNames(self):
5281     self._ExpandAndLockInstance()
5282
5283   def BuildHooksEnv(self):
5284     """Build hooks env.
5285
5286     This runs on master, primary and secondary nodes of the instance.
5287
5288     """
5289     env = _BuildInstanceHookEnvByObject(self, self.instance)
5290     env["TIMEOUT"] = self.op.timeout
5291     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5292     return env, nl, nl
5293
5294   def CheckPrereq(self):
5295     """Check prerequisites.
5296
5297     This checks that the instance is in the cluster.
5298
5299     """
5300     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5301     assert self.instance is not None, \
5302       "Cannot retrieve locked instance %s" % self.op.instance_name
5303
5304     self.primary_offline = \
5305       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5306
5307     if self.primary_offline and self.op.ignore_offline_nodes:
5308       self.proc.LogWarning("Ignoring offline primary node")
5309     else:
5310       _CheckNodeOnline(self, self.instance.primary_node)
5311
5312   def Exec(self, feedback_fn):
5313     """Shutdown the instance.
5314
5315     """
5316     instance = self.instance
5317     node_current = instance.primary_node
5318     timeout = self.op.timeout
5319
5320     self.cfg.MarkInstanceDown(instance.name)
5321
5322     if self.primary_offline:
5323       assert self.op.ignore_offline_nodes
5324       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5325     else:
5326       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5327       msg = result.fail_msg
5328       if msg:
5329         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5330
5331       _ShutdownInstanceDisks(self, instance)
5332
5333
5334 class LUReinstallInstance(LogicalUnit):
5335   """Reinstall an instance.
5336
5337   """
5338   HPATH = "instance-reinstall"
5339   HTYPE = constants.HTYPE_INSTANCE
5340   _OP_PARAMS = [
5341     _PInstanceName,
5342     ("os_type", None, ht.TMaybeString),
5343     ("force_variant", False, ht.TBool),
5344     ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
5345     ]
5346   REQ_BGL = False
5347
5348   def ExpandNames(self):
5349     self._ExpandAndLockInstance()
5350
5351   def BuildHooksEnv(self):
5352     """Build hooks env.
5353
5354     This runs on master, primary and secondary nodes of the instance.
5355
5356     """
5357     env = _BuildInstanceHookEnvByObject(self, self.instance)
5358     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5359     return env, nl, nl
5360
5361   def CheckPrereq(self):
5362     """Check prerequisites.
5363
5364     This checks that the instance is in the cluster and is not running.
5365
5366     """
5367     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5368     assert instance is not None, \
5369       "Cannot retrieve locked instance %s" % self.op.instance_name
5370     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5371                      " offline, cannot reinstall")
5372     for node in instance.secondary_nodes:
5373       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5374                        " cannot reinstall")
5375
5376     if instance.disk_template == constants.DT_DISKLESS:
5377       raise errors.OpPrereqError("Instance '%s' has no disks" %
5378                                  self.op.instance_name,
5379                                  errors.ECODE_INVAL)
5380     _CheckInstanceDown(self, instance, "cannot reinstall")
5381
5382     if self.op.os_type is not None:
5383       # OS verification
5384       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5385       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5386       instance_os = self.op.os_type
5387     else:
5388       instance_os = instance.os
5389
5390     nodelist = list(instance.all_nodes)
5391
5392     if self.op.osparams:
5393       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5394       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5395       self.os_inst = i_osdict # the new dict (without defaults)
5396     else:
5397       self.os_inst = None
5398
5399     self.instance = instance
5400
5401   def Exec(self, feedback_fn):
5402     """Reinstall the instance.
5403
5404     """
5405     inst = self.instance
5406
5407     if self.op.os_type is not None:
5408       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5409       inst.os = self.op.os_type
5410       # Write to configuration
5411       self.cfg.Update(inst, feedback_fn)
5412
5413     _StartInstanceDisks(self, inst, None)
5414     try:
5415       feedback_fn("Running the instance OS create scripts...")
5416       # FIXME: pass debug option from opcode to backend
5417       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5418                                              self.op.debug_level,
5419                                              osparams=self.os_inst)
5420       result.Raise("Could not install OS for instance %s on node %s" %
5421                    (inst.name, inst.primary_node))
5422     finally:
5423       _ShutdownInstanceDisks(self, inst)
5424
5425
5426 class LURecreateInstanceDisks(LogicalUnit):
5427   """Recreate an instance's missing disks.
5428
5429   """
5430   HPATH = "instance-recreate-disks"
5431   HTYPE = constants.HTYPE_INSTANCE
5432   _OP_PARAMS = [
5433     _PInstanceName,
5434     ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5435     ]
5436   REQ_BGL = False
5437
5438   def ExpandNames(self):
5439     self._ExpandAndLockInstance()
5440
5441   def BuildHooksEnv(self):
5442     """Build hooks env.
5443
5444     This runs on master, primary and secondary nodes of the instance.
5445
5446     """
5447     env = _BuildInstanceHookEnvByObject(self, self.instance)
5448     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5449     return env, nl, nl
5450
5451   def CheckPrereq(self):
5452     """Check prerequisites.
5453
5454     This checks that the instance is in the cluster and is not running.
5455
5456     """
5457     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5458     assert instance is not None, \
5459       "Cannot retrieve locked instance %s" % self.op.instance_name
5460     _CheckNodeOnline(self, instance.primary_node)
5461
5462     if instance.disk_template == constants.DT_DISKLESS:
5463       raise errors.OpPrereqError("Instance '%s' has no disks" %
5464                                  self.op.instance_name, errors.ECODE_INVAL)
5465     _CheckInstanceDown(self, instance, "cannot recreate disks")
5466
5467     if not self.op.disks:
5468       self.op.disks = range(len(instance.disks))
5469     else:
5470       for idx in self.op.disks:
5471         if idx >= len(instance.disks):
5472           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5473                                      errors.ECODE_INVAL)
5474
5475     self.instance = instance
5476
5477   def Exec(self, feedback_fn):
5478     """Recreate the disks.
5479
5480     """
5481     to_skip = []
5482     for idx, _ in enumerate(self.instance.disks):
5483       if idx not in self.op.disks: # disk idx has not been passed in
5484         to_skip.append(idx)
5485         continue
5486
5487     _CreateDisks(self, self.instance, to_skip=to_skip)
5488
5489
5490 class LURenameInstance(LogicalUnit):
5491   """Rename an instance.
5492
5493   """
5494   HPATH = "instance-rename"
5495   HTYPE = constants.HTYPE_INSTANCE
5496   _OP_PARAMS = [
5497     _PInstanceName,
5498     ("new_name", ht.NoDefault, ht.TNonEmptyString),
5499     ("ip_check", False, ht.TBool),
5500     ("name_check", True, ht.TBool),
5501     ]
5502
5503   def CheckArguments(self):
5504     """Check arguments.
5505
5506     """
5507     if self.op.ip_check and not self.op.name_check:
5508       # TODO: make the ip check more flexible and not depend on the name check
5509       raise errors.OpPrereqError("Cannot do ip check without a name check",
5510                                  errors.ECODE_INVAL)
5511
5512   def BuildHooksEnv(self):
5513     """Build hooks env.
5514
5515     This runs on master, primary and secondary nodes of the instance.
5516
5517     """
5518     env = _BuildInstanceHookEnvByObject(self, self.instance)
5519     env["INSTANCE_NEW_NAME"] = self.op.new_name
5520     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5521     return env, nl, nl
5522
5523   def CheckPrereq(self):
5524     """Check prerequisites.
5525
5526     This checks that the instance is in the cluster and is not running.
5527
5528     """
5529     self.op.instance_name = _ExpandInstanceName(self.cfg,
5530                                                 self.op.instance_name)
5531     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5532     assert instance is not None
5533     _CheckNodeOnline(self, instance.primary_node)
5534     _CheckInstanceDown(self, instance, "cannot rename")
5535     self.instance = instance
5536
5537     new_name = self.op.new_name
5538     if self.op.name_check:
5539       hostname = netutils.GetHostname(name=new_name)
5540       new_name = self.op.new_name = hostname.name
5541       if (self.op.ip_check and
5542           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5543         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5544                                    (hostname.ip, new_name),
5545                                    errors.ECODE_NOTUNIQUE)
5546
5547     instance_list = self.cfg.GetInstanceList()
5548     if new_name in instance_list and new_name != instance.name:
5549       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5550                                  new_name, errors.ECODE_EXISTS)
5551
5552   def Exec(self, feedback_fn):
5553     """Reinstall the instance.
5554
5555     """
5556     inst = self.instance
5557     old_name = inst.name
5558
5559     rename_file_storage = False
5560     if (inst.disk_template == constants.DT_FILE and
5561         self.op.new_name != inst.name):
5562       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5563       rename_file_storage = True
5564
5565     self.cfg.RenameInstance(inst.name, self.op.new_name)
5566     # Change the instance lock. This is definitely safe while we hold the BGL
5567     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5568     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5569
5570     # re-read the instance from the configuration after rename
5571     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5572
5573     if rename_file_storage:
5574       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5575       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5576                                                      old_file_storage_dir,
5577                                                      new_file_storage_dir)
5578       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5579                    " (but the instance has been renamed in Ganeti)" %
5580                    (inst.primary_node, old_file_storage_dir,
5581                     new_file_storage_dir))
5582
5583     _StartInstanceDisks(self, inst, None)
5584     try:
5585       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5586                                                  old_name, self.op.debug_level)
5587       msg = result.fail_msg
5588       if msg:
5589         msg = ("Could not run OS rename script for instance %s on node %s"
5590                " (but the instance has been renamed in Ganeti): %s" %
5591                (inst.name, inst.primary_node, msg))
5592         self.proc.LogWarning(msg)
5593     finally:
5594       _ShutdownInstanceDisks(self, inst)
5595
5596     return inst.name
5597
5598
5599 class LURemoveInstance(LogicalUnit):
5600   """Remove an instance.
5601
5602   """
5603   HPATH = "instance-remove"
5604   HTYPE = constants.HTYPE_INSTANCE
5605   _OP_PARAMS = [
5606     _PInstanceName,
5607     ("ignore_failures", False, ht.TBool),
5608     _PShutdownTimeout,
5609     ]
5610   REQ_BGL = False
5611
5612   def ExpandNames(self):
5613     self._ExpandAndLockInstance()
5614     self.needed_locks[locking.LEVEL_NODE] = []
5615     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5616
5617   def DeclareLocks(self, level):
5618     if level == locking.LEVEL_NODE:
5619       self._LockInstancesNodes()
5620
5621   def BuildHooksEnv(self):
5622     """Build hooks env.
5623
5624     This runs on master, primary and secondary nodes of the instance.
5625
5626     """
5627     env = _BuildInstanceHookEnvByObject(self, self.instance)
5628     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5629     nl = [self.cfg.GetMasterNode()]
5630     nl_post = list(self.instance.all_nodes) + nl
5631     return env, nl, nl_post
5632
5633   def CheckPrereq(self):
5634     """Check prerequisites.
5635
5636     This checks that the instance is in the cluster.
5637
5638     """
5639     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5640     assert self.instance is not None, \
5641       "Cannot retrieve locked instance %s" % self.op.instance_name
5642
5643   def Exec(self, feedback_fn):
5644     """Remove the instance.
5645
5646     """
5647     instance = self.instance
5648     logging.info("Shutting down instance %s on node %s",
5649                  instance.name, instance.primary_node)
5650
5651     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5652                                              self.op.shutdown_timeout)
5653     msg = result.fail_msg
5654     if msg:
5655       if self.op.ignore_failures:
5656         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5657       else:
5658         raise errors.OpExecError("Could not shutdown instance %s on"
5659                                  " node %s: %s" %
5660                                  (instance.name, instance.primary_node, msg))
5661
5662     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5663
5664
5665 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5666   """Utility function to remove an instance.
5667
5668   """
5669   logging.info("Removing block devices for instance %s", instance.name)
5670
5671   if not _RemoveDisks(lu, instance):
5672     if not ignore_failures:
5673       raise errors.OpExecError("Can't remove instance's disks")
5674     feedback_fn("Warning: can't remove instance's disks")
5675
5676   logging.info("Removing instance %s out of cluster config", instance.name)
5677
5678   lu.cfg.RemoveInstance(instance.name)
5679
5680   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5681     "Instance lock removal conflict"
5682
5683   # Remove lock for the instance
5684   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5685
5686
5687 class LUQueryInstances(NoHooksLU):
5688   """Logical unit for querying instances.
5689
5690   """
5691   # pylint: disable-msg=W0142
5692   _OP_PARAMS = [
5693     _POutputFields,
5694     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5695     ("use_locking", False, ht.TBool),
5696     ]
5697   REQ_BGL = False
5698
5699   def CheckArguments(self):
5700     self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5701                              self.op.use_locking)
5702
5703   def ExpandNames(self):
5704     self.iq.ExpandNames(self)
5705
5706   def DeclareLocks(self, level):
5707     self.iq.DeclareLocks(self, level)
5708
5709   def Exec(self, feedback_fn):
5710     return self.iq.OldStyleQuery(self)
5711
5712
5713 class LUFailoverInstance(LogicalUnit):
5714   """Failover an instance.
5715
5716   """
5717   HPATH = "instance-failover"
5718   HTYPE = constants.HTYPE_INSTANCE
5719   _OP_PARAMS = [
5720     _PInstanceName,
5721     ("ignore_consistency", False, ht.TBool),
5722     _PShutdownTimeout,
5723     ]
5724   REQ_BGL = False
5725
5726   def ExpandNames(self):
5727     self._ExpandAndLockInstance()
5728     self.needed_locks[locking.LEVEL_NODE] = []
5729     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5730
5731   def DeclareLocks(self, level):
5732     if level == locking.LEVEL_NODE:
5733       self._LockInstancesNodes()
5734
5735   def BuildHooksEnv(self):
5736     """Build hooks env.
5737
5738     This runs on master, primary and secondary nodes of the instance.
5739
5740     """
5741     instance = self.instance
5742     source_node = instance.primary_node
5743     target_node = instance.secondary_nodes[0]
5744     env = {
5745       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5746       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5747       "OLD_PRIMARY": source_node,
5748       "OLD_SECONDARY": target_node,
5749       "NEW_PRIMARY": target_node,
5750       "NEW_SECONDARY": source_node,
5751       }
5752     env.update(_BuildInstanceHookEnvByObject(self, instance))
5753     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5754     nl_post = list(nl)
5755     nl_post.append(source_node)
5756     return env, nl, nl_post
5757
5758   def CheckPrereq(self):
5759     """Check prerequisites.
5760
5761     This checks that the instance is in the cluster.
5762
5763     """
5764     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5765     assert self.instance is not None, \
5766       "Cannot retrieve locked instance %s" % self.op.instance_name
5767
5768     bep = self.cfg.GetClusterInfo().FillBE(instance)
5769     if instance.disk_template not in constants.DTS_NET_MIRROR:
5770       raise errors.OpPrereqError("Instance's disk layout is not"
5771                                  " network mirrored, cannot failover.",
5772                                  errors.ECODE_STATE)
5773
5774     secondary_nodes = instance.secondary_nodes
5775     if not secondary_nodes:
5776       raise errors.ProgrammerError("no secondary node but using "
5777                                    "a mirrored disk template")
5778
5779     target_node = secondary_nodes[0]
5780     _CheckNodeOnline(self, target_node)
5781     _CheckNodeNotDrained(self, target_node)
5782     if instance.admin_up:
5783       # check memory requirements on the secondary node
5784       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5785                            instance.name, bep[constants.BE_MEMORY],
5786                            instance.hypervisor)
5787     else:
5788       self.LogInfo("Not checking memory on the secondary node as"
5789                    " instance will not be started")
5790
5791     # check bridge existance
5792     _CheckInstanceBridgesExist(self, instance, node=target_node)
5793
5794   def Exec(self, feedback_fn):
5795     """Failover an instance.
5796
5797     The failover is done by shutting it down on its present node and
5798     starting it on the secondary.
5799
5800     """
5801     instance = self.instance
5802     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5803
5804     source_node = instance.primary_node
5805     target_node = instance.secondary_nodes[0]
5806
5807     if instance.admin_up:
5808       feedback_fn("* checking disk consistency between source and target")
5809       for dev in instance.disks:
5810         # for drbd, these are drbd over lvm
5811         if not _CheckDiskConsistency(self, dev, target_node, False):
5812           if not self.op.ignore_consistency:
5813             raise errors.OpExecError("Disk %s is degraded on target node,"
5814                                      " aborting failover." % dev.iv_name)
5815     else:
5816       feedback_fn("* not checking disk consistency as instance is not running")
5817
5818     feedback_fn("* shutting down instance on source node")
5819     logging.info("Shutting down instance %s on node %s",
5820                  instance.name, source_node)
5821
5822     result = self.rpc.call_instance_shutdown(source_node, instance,
5823                                              self.op.shutdown_timeout)
5824     msg = result.fail_msg
5825     if msg:
5826       if self.op.ignore_consistency or primary_node.offline:
5827         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5828                              " Proceeding anyway. Please make sure node"
5829                              " %s is down. Error details: %s",
5830                              instance.name, source_node, source_node, msg)
5831       else:
5832         raise errors.OpExecError("Could not shutdown instance %s on"
5833                                  " node %s: %s" %
5834                                  (instance.name, source_node, msg))
5835
5836     feedback_fn("* deactivating the instance's disks on source node")
5837     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5838       raise errors.OpExecError("Can't shut down the instance's disks.")
5839
5840     instance.primary_node = target_node
5841     # distribute new instance config to the other nodes
5842     self.cfg.Update(instance, feedback_fn)
5843
5844     # Only start the instance if it's marked as up
5845     if instance.admin_up:
5846       feedback_fn("* activating the instance's disks on target node")
5847       logging.info("Starting instance %s on node %s",
5848                    instance.name, target_node)
5849
5850       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5851                                            ignore_secondaries=True)
5852       if not disks_ok:
5853         _ShutdownInstanceDisks(self, instance)
5854         raise errors.OpExecError("Can't activate the instance's disks")
5855
5856       feedback_fn("* starting the instance on the target node")
5857       result = self.rpc.call_instance_start(target_node, instance, None, None)
5858       msg = result.fail_msg
5859       if msg:
5860         _ShutdownInstanceDisks(self, instance)
5861         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5862                                  (instance.name, target_node, msg))
5863
5864
5865 class LUMigrateInstance(LogicalUnit):
5866   """Migrate an instance.
5867
5868   This is migration without shutting down, compared to the failover,
5869   which is done with shutdown.
5870
5871   """
5872   HPATH = "instance-migrate"
5873   HTYPE = constants.HTYPE_INSTANCE
5874   _OP_PARAMS = [
5875     _PInstanceName,
5876     _PMigrationMode,
5877     _PMigrationLive,
5878     ("cleanup", False, ht.TBool),
5879     ]
5880
5881   REQ_BGL = False
5882
5883   def ExpandNames(self):
5884     self._ExpandAndLockInstance()
5885
5886     self.needed_locks[locking.LEVEL_NODE] = []
5887     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5888
5889     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5890                                        self.op.cleanup)
5891     self.tasklets = [self._migrater]
5892
5893   def DeclareLocks(self, level):
5894     if level == locking.LEVEL_NODE:
5895       self._LockInstancesNodes()
5896
5897   def BuildHooksEnv(self):
5898     """Build hooks env.
5899
5900     This runs on master, primary and secondary nodes of the instance.
5901
5902     """
5903     instance = self._migrater.instance
5904     source_node = instance.primary_node
5905     target_node = instance.secondary_nodes[0]
5906     env = _BuildInstanceHookEnvByObject(self, instance)
5907     env["MIGRATE_LIVE"] = self._migrater.live
5908     env["MIGRATE_CLEANUP"] = self.op.cleanup
5909     env.update({
5910         "OLD_PRIMARY": source_node,
5911         "OLD_SECONDARY": target_node,
5912         "NEW_PRIMARY": target_node,
5913         "NEW_SECONDARY": source_node,
5914         })
5915     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5916     nl_post = list(nl)
5917     nl_post.append(source_node)
5918     return env, nl, nl_post
5919
5920
5921 class LUMoveInstance(LogicalUnit):
5922   """Move an instance by data-copying.
5923
5924   """
5925   HPATH = "instance-move"
5926   HTYPE = constants.HTYPE_INSTANCE
5927   _OP_PARAMS = [
5928     _PInstanceName,
5929     ("target_node", ht.NoDefault, ht.TNonEmptyString),
5930     _PShutdownTimeout,
5931     ]
5932   REQ_BGL = False
5933
5934   def ExpandNames(self):
5935     self._ExpandAndLockInstance()
5936     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5937     self.op.target_node = target_node
5938     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5939     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5940
5941   def DeclareLocks(self, level):
5942     if level == locking.LEVEL_NODE:
5943       self._LockInstancesNodes(primary_only=True)
5944
5945   def BuildHooksEnv(self):
5946     """Build hooks env.
5947
5948     This runs on master, primary and secondary nodes of the instance.
5949
5950     """
5951     env = {
5952       "TARGET_NODE": self.op.target_node,
5953       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5954       }
5955     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5956     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5957                                        self.op.target_node]
5958     return env, nl, nl
5959
5960   def CheckPrereq(self):
5961     """Check prerequisites.
5962
5963     This checks that the instance is in the cluster.
5964
5965     """
5966     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5967     assert self.instance is not None, \
5968       "Cannot retrieve locked instance %s" % self.op.instance_name
5969
5970     node = self.cfg.GetNodeInfo(self.op.target_node)
5971     assert node is not None, \
5972       "Cannot retrieve locked node %s" % self.op.target_node
5973
5974     self.target_node = target_node = node.name
5975
5976     if target_node == instance.primary_node:
5977       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5978                                  (instance.name, target_node),
5979                                  errors.ECODE_STATE)
5980
5981     bep = self.cfg.GetClusterInfo().FillBE(instance)
5982
5983     for idx, dsk in enumerate(instance.disks):
5984       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5985         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5986                                    " cannot copy" % idx, errors.ECODE_STATE)
5987
5988     _CheckNodeOnline(self, target_node)
5989     _CheckNodeNotDrained(self, target_node)
5990     _CheckNodeVmCapable(self, target_node)
5991
5992     if instance.admin_up:
5993       # check memory requirements on the secondary node
5994       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5995                            instance.name, bep[constants.BE_MEMORY],
5996                            instance.hypervisor)
5997     else:
5998       self.LogInfo("Not checking memory on the secondary node as"
5999                    " instance will not be started")
6000
6001     # check bridge existance
6002     _CheckInstanceBridgesExist(self, instance, node=target_node)
6003
6004   def Exec(self, feedback_fn):
6005     """Move an instance.
6006
6007     The move is done by shutting it down on its present node, copying
6008     the data over (slow) and starting it on the new node.
6009
6010     """
6011     instance = self.instance
6012
6013     source_node = instance.primary_node
6014     target_node = self.target_node
6015
6016     self.LogInfo("Shutting down instance %s on source node %s",
6017                  instance.name, source_node)
6018
6019     result = self.rpc.call_instance_shutdown(source_node, instance,
6020                                              self.op.shutdown_timeout)
6021     msg = result.fail_msg
6022     if msg:
6023       if self.op.ignore_consistency:
6024         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6025                              " Proceeding anyway. Please make sure node"
6026                              " %s is down. Error details: %s",
6027                              instance.name, source_node, source_node, msg)
6028       else:
6029         raise errors.OpExecError("Could not shutdown instance %s on"
6030                                  " node %s: %s" %
6031                                  (instance.name, source_node, msg))
6032
6033     # create the target disks
6034     try:
6035       _CreateDisks(self, instance, target_node=target_node)
6036     except errors.OpExecError:
6037       self.LogWarning("Device creation failed, reverting...")
6038       try:
6039         _RemoveDisks(self, instance, target_node=target_node)
6040       finally:
6041         self.cfg.ReleaseDRBDMinors(instance.name)
6042         raise
6043
6044     cluster_name = self.cfg.GetClusterInfo().cluster_name
6045
6046     errs = []
6047     # activate, get path, copy the data over
6048     for idx, disk in enumerate(instance.disks):
6049       self.LogInfo("Copying data for disk %d", idx)
6050       result = self.rpc.call_blockdev_assemble(target_node, disk,
6051                                                instance.name, True)
6052       if result.fail_msg:
6053         self.LogWarning("Can't assemble newly created disk %d: %s",
6054                         idx, result.fail_msg)
6055         errs.append(result.fail_msg)
6056         break
6057       dev_path = result.payload
6058       result = self.rpc.call_blockdev_export(source_node, disk,
6059                                              target_node, dev_path,
6060                                              cluster_name)
6061       if result.fail_msg:
6062         self.LogWarning("Can't copy data over for disk %d: %s",
6063                         idx, result.fail_msg)
6064         errs.append(result.fail_msg)
6065         break
6066
6067     if errs:
6068       self.LogWarning("Some disks failed to copy, aborting")
6069       try:
6070         _RemoveDisks(self, instance, target_node=target_node)
6071       finally:
6072         self.cfg.ReleaseDRBDMinors(instance.name)
6073         raise errors.OpExecError("Errors during disk copy: %s" %
6074                                  (",".join(errs),))
6075
6076     instance.primary_node = target_node
6077     self.cfg.Update(instance, feedback_fn)
6078
6079     self.LogInfo("Removing the disks on the original node")
6080     _RemoveDisks(self, instance, target_node=source_node)
6081
6082     # Only start the instance if it's marked as up
6083     if instance.admin_up:
6084       self.LogInfo("Starting instance %s on node %s",
6085                    instance.name, target_node)
6086
6087       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6088                                            ignore_secondaries=True)
6089       if not disks_ok:
6090         _ShutdownInstanceDisks(self, instance)
6091         raise errors.OpExecError("Can't activate the instance's disks")
6092
6093       result = self.rpc.call_instance_start(target_node, instance, None, None)
6094       msg = result.fail_msg
6095       if msg:
6096         _ShutdownInstanceDisks(self, instance)
6097         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6098                                  (instance.name, target_node, msg))
6099
6100
6101 class LUMigrateNode(LogicalUnit):
6102   """Migrate all instances from a node.
6103
6104   """
6105   HPATH = "node-migrate"
6106   HTYPE = constants.HTYPE_NODE
6107   _OP_PARAMS = [
6108     _PNodeName,
6109     _PMigrationMode,
6110     _PMigrationLive,
6111     ]
6112   REQ_BGL = False
6113
6114   def ExpandNames(self):
6115     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6116
6117     self.needed_locks = {
6118       locking.LEVEL_NODE: [self.op.node_name],
6119       }
6120
6121     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6122
6123     # Create tasklets for migrating instances for all instances on this node
6124     names = []
6125     tasklets = []
6126
6127     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6128       logging.debug("Migrating instance %s", inst.name)
6129       names.append(inst.name)
6130
6131       tasklets.append(TLMigrateInstance(self, inst.name, False))
6132
6133     self.tasklets = tasklets
6134
6135     # Declare instance locks
6136     self.needed_locks[locking.LEVEL_INSTANCE] = names
6137
6138   def DeclareLocks(self, level):
6139     if level == locking.LEVEL_NODE:
6140       self._LockInstancesNodes()
6141
6142   def BuildHooksEnv(self):
6143     """Build hooks env.
6144
6145     This runs on the master, the primary and all the secondaries.
6146
6147     """
6148     env = {
6149       "NODE_NAME": self.op.node_name,
6150       }
6151
6152     nl = [self.cfg.GetMasterNode()]
6153
6154     return (env, nl, nl)
6155
6156
6157 class TLMigrateInstance(Tasklet):
6158   """Tasklet class for instance migration.
6159
6160   @type live: boolean
6161   @ivar live: whether the migration will be done live or non-live;
6162       this variable is initalized only after CheckPrereq has run
6163
6164   """
6165   def __init__(self, lu, instance_name, cleanup):
6166     """Initializes this class.
6167
6168     """
6169     Tasklet.__init__(self, lu)
6170
6171     # Parameters
6172     self.instance_name = instance_name
6173     self.cleanup = cleanup
6174     self.live = False # will be overridden later
6175
6176   def CheckPrereq(self):
6177     """Check prerequisites.
6178
6179     This checks that the instance is in the cluster.
6180
6181     """
6182     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6183     instance = self.cfg.GetInstanceInfo(instance_name)
6184     assert instance is not None
6185
6186     if instance.disk_template != constants.DT_DRBD8:
6187       raise errors.OpPrereqError("Instance's disk layout is not"
6188                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
6189
6190     secondary_nodes = instance.secondary_nodes
6191     if not secondary_nodes:
6192       raise errors.ConfigurationError("No secondary node but using"
6193                                       " drbd8 disk template")
6194
6195     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6196
6197     target_node = secondary_nodes[0]
6198     # check memory requirements on the secondary node
6199     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6200                          instance.name, i_be[constants.BE_MEMORY],
6201                          instance.hypervisor)
6202
6203     # check bridge existance
6204     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6205
6206     if not self.cleanup:
6207       _CheckNodeNotDrained(self.lu, target_node)
6208       result = self.rpc.call_instance_migratable(instance.primary_node,
6209                                                  instance)
6210       result.Raise("Can't migrate, please use failover",
6211                    prereq=True, ecode=errors.ECODE_STATE)
6212
6213     self.instance = instance
6214
6215     if self.lu.op.live is not None and self.lu.op.mode is not None:
6216       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6217                                  " parameters are accepted",
6218                                  errors.ECODE_INVAL)
6219     if self.lu.op.live is not None:
6220       if self.lu.op.live:
6221         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6222       else:
6223         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6224       # reset the 'live' parameter to None so that repeated
6225       # invocations of CheckPrereq do not raise an exception
6226       self.lu.op.live = None
6227     elif self.lu.op.mode is None:
6228       # read the default value from the hypervisor
6229       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6230       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6231
6232     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6233
6234   def _WaitUntilSync(self):
6235     """Poll with custom rpc for disk sync.
6236
6237     This uses our own step-based rpc call.
6238
6239     """
6240     self.feedback_fn("* wait until resync is done")
6241     all_done = False
6242     while not all_done:
6243       all_done = True
6244       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6245                                             self.nodes_ip,
6246                                             self.instance.disks)
6247       min_percent = 100
6248       for node, nres in result.items():
6249         nres.Raise("Cannot resync disks on node %s" % node)
6250         node_done, node_percent = nres.payload
6251         all_done = all_done and node_done
6252         if node_percent is not None:
6253           min_percent = min(min_percent, node_percent)
6254       if not all_done:
6255         if min_percent < 100:
6256           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6257         time.sleep(2)
6258
6259   def _EnsureSecondary(self, node):
6260     """Demote a node to secondary.
6261
6262     """
6263     self.feedback_fn("* switching node %s to secondary mode" % node)
6264
6265     for dev in self.instance.disks:
6266       self.cfg.SetDiskID(dev, node)
6267
6268     result = self.rpc.call_blockdev_close(node, self.instance.name,
6269                                           self.instance.disks)
6270     result.Raise("Cannot change disk to secondary on node %s" % node)
6271
6272   def _GoStandalone(self):
6273     """Disconnect from the network.
6274
6275     """
6276     self.feedback_fn("* changing into standalone mode")
6277     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6278                                                self.instance.disks)
6279     for node, nres in result.items():
6280       nres.Raise("Cannot disconnect disks node %s" % node)
6281
6282   def _GoReconnect(self, multimaster):
6283     """Reconnect to the network.
6284
6285     """
6286     if multimaster:
6287       msg = "dual-master"
6288     else:
6289       msg = "single-master"
6290     self.feedback_fn("* changing disks into %s mode" % msg)
6291     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6292                                            self.instance.disks,
6293                                            self.instance.name, multimaster)
6294     for node, nres in result.items():
6295       nres.Raise("Cannot change disks config on node %s" % node)
6296
6297   def _ExecCleanup(self):
6298     """Try to cleanup after a failed migration.
6299
6300     The cleanup is done by:
6301       - check that the instance is running only on one node
6302         (and update the config if needed)
6303       - change disks on its secondary node to secondary
6304       - wait until disks are fully synchronized
6305       - disconnect from the network
6306       - change disks into single-master mode
6307       - wait again until disks are fully synchronized
6308
6309     """
6310     instance = self.instance
6311     target_node = self.target_node
6312     source_node = self.source_node
6313
6314     # check running on only one node
6315     self.feedback_fn("* checking where the instance actually runs"
6316                      " (if this hangs, the hypervisor might be in"
6317                      " a bad state)")
6318     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6319     for node, result in ins_l.items():
6320       result.Raise("Can't contact node %s" % node)
6321
6322     runningon_source = instance.name in ins_l[source_node].payload
6323     runningon_target = instance.name in ins_l[target_node].payload
6324
6325     if runningon_source and runningon_target:
6326       raise errors.OpExecError("Instance seems to be running on two nodes,"
6327                                " or the hypervisor is confused. You will have"
6328                                " to ensure manually that it runs only on one"
6329                                " and restart this operation.")
6330
6331     if not (runningon_source or runningon_target):
6332       raise errors.OpExecError("Instance does not seem to be running at all."
6333                                " In this case, it's safer to repair by"
6334                                " running 'gnt-instance stop' to ensure disk"
6335                                " shutdown, and then restarting it.")
6336
6337     if runningon_target:
6338       # the migration has actually succeeded, we need to update the config
6339       self.feedback_fn("* instance running on secondary node (%s),"
6340                        " updating config" % target_node)
6341       instance.primary_node = target_node
6342       self.cfg.Update(instance, self.feedback_fn)
6343       demoted_node = source_node
6344     else:
6345       self.feedback_fn("* instance confirmed to be running on its"
6346                        " primary node (%s)" % source_node)
6347       demoted_node = target_node
6348
6349     self._EnsureSecondary(demoted_node)
6350     try:
6351       self._WaitUntilSync()
6352     except errors.OpExecError:
6353       # we ignore here errors, since if the device is standalone, it
6354       # won't be able to sync
6355       pass
6356     self._GoStandalone()
6357     self._GoReconnect(False)
6358     self._WaitUntilSync()
6359
6360     self.feedback_fn("* done")
6361
6362   def _RevertDiskStatus(self):
6363     """Try to revert the disk status after a failed migration.
6364
6365     """
6366     target_node = self.target_node
6367     try:
6368       self._EnsureSecondary(target_node)
6369       self._GoStandalone()
6370       self._GoReconnect(False)
6371       self._WaitUntilSync()
6372     except errors.OpExecError, err:
6373       self.lu.LogWarning("Migration failed and I can't reconnect the"
6374                          " drives: error '%s'\n"
6375                          "Please look and recover the instance status" %
6376                          str(err))
6377
6378   def _AbortMigration(self):
6379     """Call the hypervisor code to abort a started migration.
6380
6381     """
6382     instance = self.instance
6383     target_node = self.target_node
6384     migration_info = self.migration_info
6385
6386     abort_result = self.rpc.call_finalize_migration(target_node,
6387                                                     instance,
6388                                                     migration_info,
6389                                                     False)
6390     abort_msg = abort_result.fail_msg
6391     if abort_msg:
6392       logging.error("Aborting migration failed on target node %s: %s",
6393                     target_node, abort_msg)
6394       # Don't raise an exception here, as we stil have to try to revert the
6395       # disk status, even if this step failed.
6396
6397   def _ExecMigration(self):
6398     """Migrate an instance.
6399
6400     The migrate is done by:
6401       - change the disks into dual-master mode
6402       - wait until disks are fully synchronized again
6403       - migrate the instance
6404       - change disks on the new secondary node (the old primary) to secondary
6405       - wait until disks are fully synchronized
6406       - change disks into single-master mode
6407
6408     """
6409     instance = self.instance
6410     target_node = self.target_node
6411     source_node = self.source_node
6412
6413     self.feedback_fn("* checking disk consistency between source and target")
6414     for dev in instance.disks:
6415       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6416         raise errors.OpExecError("Disk %s is degraded or not fully"
6417                                  " synchronized on target node,"
6418                                  " aborting migrate." % dev.iv_name)
6419
6420     # First get the migration information from the remote node
6421     result = self.rpc.call_migration_info(source_node, instance)
6422     msg = result.fail_msg
6423     if msg:
6424       log_err = ("Failed fetching source migration information from %s: %s" %
6425                  (source_node, msg))
6426       logging.error(log_err)
6427       raise errors.OpExecError(log_err)
6428
6429     self.migration_info = migration_info = result.payload
6430
6431     # Then switch the disks to master/master mode
6432     self._EnsureSecondary(target_node)
6433     self._GoStandalone()
6434     self._GoReconnect(True)
6435     self._WaitUntilSync()
6436
6437     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6438     result = self.rpc.call_accept_instance(target_node,
6439                                            instance,
6440                                            migration_info,
6441                                            self.nodes_ip[target_node])
6442
6443     msg = result.fail_msg
6444     if msg:
6445       logging.error("Instance pre-migration failed, trying to revert"
6446                     " disk status: %s", msg)
6447       self.feedback_fn("Pre-migration failed, aborting")
6448       self._AbortMigration()
6449       self._RevertDiskStatus()
6450       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6451                                (instance.name, msg))
6452
6453     self.feedback_fn("* migrating instance to %s" % target_node)
6454     time.sleep(10)
6455     result = self.rpc.call_instance_migrate(source_node, instance,
6456                                             self.nodes_ip[target_node],
6457                                             self.live)
6458     msg = result.fail_msg
6459     if msg:
6460       logging.error("Instance migration failed, trying to revert"
6461                     " disk status: %s", msg)
6462       self.feedback_fn("Migration failed, aborting")
6463       self._AbortMigration()
6464       self._RevertDiskStatus()
6465       raise errors.OpExecError("Could not migrate instance %s: %s" %
6466                                (instance.name, msg))
6467     time.sleep(10)
6468
6469     instance.primary_node = target_node
6470     # distribute new instance config to the other nodes
6471     self.cfg.Update(instance, self.feedback_fn)
6472
6473     result = self.rpc.call_finalize_migration(target_node,
6474                                               instance,
6475                                               migration_info,
6476                                               True)
6477     msg = result.fail_msg
6478     if msg:
6479       logging.error("Instance migration succeeded, but finalization failed:"
6480                     " %s", msg)
6481       raise errors.OpExecError("Could not finalize instance migration: %s" %
6482                                msg)
6483
6484     self._EnsureSecondary(source_node)
6485     self._WaitUntilSync()
6486     self._GoStandalone()
6487     self._GoReconnect(False)
6488     self._WaitUntilSync()
6489
6490     self.feedback_fn("* done")
6491
6492   def Exec(self, feedback_fn):
6493     """Perform the migration.
6494
6495     """
6496     feedback_fn("Migrating instance %s" % self.instance.name)
6497
6498     self.feedback_fn = feedback_fn
6499
6500     self.source_node = self.instance.primary_node
6501     self.target_node = self.instance.secondary_nodes[0]
6502     self.all_nodes = [self.source_node, self.target_node]
6503     self.nodes_ip = {
6504       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6505       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6506       }
6507
6508     if self.cleanup:
6509       return self._ExecCleanup()
6510     else:
6511       return self._ExecMigration()
6512
6513
6514 def _CreateBlockDev(lu, node, instance, device, force_create,
6515                     info, force_open):
6516   """Create a tree of block devices on a given node.
6517
6518   If this device type has to be created on secondaries, create it and
6519   all its children.
6520
6521   If not, just recurse to children keeping the same 'force' value.
6522
6523   @param lu: the lu on whose behalf we execute
6524   @param node: the node on which to create the device
6525   @type instance: L{objects.Instance}
6526   @param instance: the instance which owns the device
6527   @type device: L{objects.Disk}
6528   @param device: the device to create
6529   @type force_create: boolean
6530   @param force_create: whether to force creation of this device; this
6531       will be change to True whenever we find a device which has
6532       CreateOnSecondary() attribute
6533   @param info: the extra 'metadata' we should attach to the device
6534       (this will be represented as a LVM tag)
6535   @type force_open: boolean
6536   @param force_open: this parameter will be passes to the
6537       L{backend.BlockdevCreate} function where it specifies
6538       whether we run on primary or not, and it affects both
6539       the child assembly and the device own Open() execution
6540
6541   """
6542   if device.CreateOnSecondary():
6543     force_create = True
6544
6545   if device.children:
6546     for child in device.children:
6547       _CreateBlockDev(lu, node, instance, child, force_create,
6548                       info, force_open)
6549
6550   if not force_create:
6551     return
6552
6553   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6554
6555
6556 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6557   """Create a single block device on a given node.
6558
6559   This will not recurse over children of the device, so they must be
6560   created in advance.
6561
6562   @param lu: the lu on whose behalf we execute
6563   @param node: the node on which to create the device
6564   @type instance: L{objects.Instance}
6565   @param instance: the instance which owns the device
6566   @type device: L{objects.Disk}
6567   @param device: the device to create
6568   @param info: the extra 'metadata' we should attach to the device
6569       (this will be represented as a LVM tag)
6570   @type force_open: boolean
6571   @param force_open: this parameter will be passes to the
6572       L{backend.BlockdevCreate} function where it specifies
6573       whether we run on primary or not, and it affects both
6574       the child assembly and the device own Open() execution
6575
6576   """
6577   lu.cfg.SetDiskID(device, node)
6578   result = lu.rpc.call_blockdev_create(node, device, device.size,
6579                                        instance.name, force_open, info)
6580   result.Raise("Can't create block device %s on"
6581                " node %s for instance %s" % (device, node, instance.name))
6582   if device.physical_id is None:
6583     device.physical_id = result.payload
6584
6585
6586 def _GenerateUniqueNames(lu, exts):
6587   """Generate a suitable LV name.
6588
6589   This will generate a logical volume name for the given instance.
6590
6591   """
6592   results = []
6593   for val in exts:
6594     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6595     results.append("%s%s" % (new_id, val))
6596   return results
6597
6598
6599 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6600                          p_minor, s_minor):
6601   """Generate a drbd8 device complete with its children.
6602
6603   """
6604   port = lu.cfg.AllocatePort()
6605   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6606   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6607                           logical_id=(vgname, names[0]))
6608   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6609                           logical_id=(vgname, names[1]))
6610   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6611                           logical_id=(primary, secondary, port,
6612                                       p_minor, s_minor,
6613                                       shared_secret),
6614                           children=[dev_data, dev_meta],
6615                           iv_name=iv_name)
6616   return drbd_dev
6617
6618
6619 def _GenerateDiskTemplate(lu, template_name,
6620                           instance_name, primary_node,
6621                           secondary_nodes, disk_info,
6622                           file_storage_dir, file_driver,
6623                           base_index, feedback_fn):
6624   """Generate the entire disk layout for a given template type.
6625
6626   """
6627   #TODO: compute space requirements
6628
6629   vgname = lu.cfg.GetVGName()
6630   disk_count = len(disk_info)
6631   disks = []
6632   if template_name == constants.DT_DISKLESS:
6633     pass
6634   elif template_name == constants.DT_PLAIN:
6635     if len(secondary_nodes) != 0:
6636       raise errors.ProgrammerError("Wrong template configuration")
6637
6638     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6639                                       for i in range(disk_count)])
6640     for idx, disk in enumerate(disk_info):
6641       disk_index = idx + base_index
6642       vg = disk.get("vg", vgname)
6643       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6644       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6645                               logical_id=(vg, names[idx]),
6646                               iv_name="disk/%d" % disk_index,
6647                               mode=disk["mode"])
6648       disks.append(disk_dev)
6649   elif template_name == constants.DT_DRBD8:
6650     if len(secondary_nodes) != 1:
6651       raise errors.ProgrammerError("Wrong template configuration")
6652     remote_node = secondary_nodes[0]
6653     minors = lu.cfg.AllocateDRBDMinor(
6654       [primary_node, remote_node] * len(disk_info), instance_name)
6655
6656     names = []
6657     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6658                                                for i in range(disk_count)]):
6659       names.append(lv_prefix + "_data")
6660       names.append(lv_prefix + "_meta")
6661     for idx, disk in enumerate(disk_info):
6662       disk_index = idx + base_index
6663       vg = disk.get("vg", vgname)
6664       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6665                                       disk["size"], vg, names[idx*2:idx*2+2],
6666                                       "disk/%d" % disk_index,
6667                                       minors[idx*2], minors[idx*2+1])
6668       disk_dev.mode = disk["mode"]
6669       disks.append(disk_dev)
6670   elif template_name == constants.DT_FILE:
6671     if len(secondary_nodes) != 0:
6672       raise errors.ProgrammerError("Wrong template configuration")
6673
6674     _RequireFileStorage()
6675
6676     for idx, disk in enumerate(disk_info):
6677       disk_index = idx + base_index
6678       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6679                               iv_name="disk/%d" % disk_index,
6680                               logical_id=(file_driver,
6681                                           "%s/disk%d" % (file_storage_dir,
6682                                                          disk_index)),
6683                               mode=disk["mode"])
6684       disks.append(disk_dev)
6685   else:
6686     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6687   return disks
6688
6689
6690 def _GetInstanceInfoText(instance):
6691   """Compute that text that should be added to the disk's metadata.
6692
6693   """
6694   return "originstname+%s" % instance.name
6695
6696
6697 def _CalcEta(time_taken, written, total_size):
6698   """Calculates the ETA based on size written and total size.
6699
6700   @param time_taken: The time taken so far
6701   @param written: amount written so far
6702   @param total_size: The total size of data to be written
6703   @return: The remaining time in seconds
6704
6705   """
6706   avg_time = time_taken / float(written)
6707   return (total_size - written) * avg_time
6708
6709
6710 def _WipeDisks(lu, instance):
6711   """Wipes instance disks.
6712
6713   @type lu: L{LogicalUnit}
6714   @param lu: the logical unit on whose behalf we execute
6715   @type instance: L{objects.Instance}
6716   @param instance: the instance whose disks we should create
6717   @return: the success of the wipe
6718
6719   """
6720   node = instance.primary_node
6721   for idx, device in enumerate(instance.disks):
6722     lu.LogInfo("* Wiping disk %d", idx)
6723     logging.info("Wiping disk %d for instance %s", idx, instance.name)
6724
6725     # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6726     # MAX_WIPE_CHUNK at max
6727     wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6728                           constants.MIN_WIPE_CHUNK_PERCENT)
6729
6730     offset = 0
6731     size = device.size
6732     last_output = 0
6733     start_time = time.time()
6734
6735     while offset < size:
6736       wipe_size = min(wipe_chunk_size, size - offset)
6737       result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6738       result.Raise("Could not wipe disk %d at offset %d for size %d" %
6739                    (idx, offset, wipe_size))
6740       now = time.time()
6741       offset += wipe_size
6742       if now - last_output >= 60:
6743         eta = _CalcEta(now - start_time, offset, size)
6744         lu.LogInfo(" - done: %.1f%% ETA: %s" %
6745                    (offset / float(size) * 100, utils.FormatSeconds(eta)))
6746         last_output = now
6747
6748
6749 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6750   """Create all disks for an instance.
6751
6752   This abstracts away some work from AddInstance.
6753
6754   @type lu: L{LogicalUnit}
6755   @param lu: the logical unit on whose behalf we execute
6756   @type instance: L{objects.Instance}
6757   @param instance: the instance whose disks we should create
6758   @type to_skip: list
6759   @param to_skip: list of indices to skip
6760   @type target_node: string
6761   @param target_node: if passed, overrides the target node for creation
6762   @rtype: boolean
6763   @return: the success of the creation
6764
6765   """
6766   info = _GetInstanceInfoText(instance)
6767   if target_node is None:
6768     pnode = instance.primary_node
6769     all_nodes = instance.all_nodes
6770   else:
6771     pnode = target_node
6772     all_nodes = [pnode]
6773
6774   if instance.disk_template == constants.DT_FILE:
6775     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6776     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6777
6778     result.Raise("Failed to create directory '%s' on"
6779                  " node %s" % (file_storage_dir, pnode))
6780
6781   # Note: this needs to be kept in sync with adding of disks in
6782   # LUSetInstanceParams
6783   for idx, device in enumerate(instance.disks):
6784     if to_skip and idx in to_skip:
6785       continue
6786     logging.info("Creating volume %s for instance %s",
6787                  device.iv_name, instance.name)
6788     #HARDCODE
6789     for node in all_nodes:
6790       f_create = node == pnode
6791       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6792
6793
6794 def _RemoveDisks(lu, instance, target_node=None):
6795   """Remove all disks for an instance.
6796
6797   This abstracts away some work from `AddInstance()` and
6798   `RemoveInstance()`. Note that in case some of the devices couldn't
6799   be removed, the removal will continue with the other ones (compare
6800   with `_CreateDisks()`).
6801
6802   @type lu: L{LogicalUnit}
6803   @param lu: the logical unit on whose behalf we execute
6804   @type instance: L{objects.Instance}
6805   @param instance: the instance whose disks we should remove
6806   @type target_node: string
6807   @param target_node: used to override the node on which to remove the disks
6808   @rtype: boolean
6809   @return: the success of the removal
6810
6811   """
6812   logging.info("Removing block devices for instance %s", instance.name)
6813
6814   all_result = True
6815   for device in instance.disks:
6816     if target_node:
6817       edata = [(target_node, device)]
6818     else:
6819       edata = device.ComputeNodeTree(instance.primary_node)
6820     for node, disk in edata:
6821       lu.cfg.SetDiskID(disk, node)
6822       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6823       if msg:
6824         lu.LogWarning("Could not remove block device %s on node %s,"
6825                       " continuing anyway: %s", device.iv_name, node, msg)
6826         all_result = False
6827
6828   if instance.disk_template == constants.DT_FILE:
6829     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6830     if target_node:
6831       tgt = target_node
6832     else:
6833       tgt = instance.primary_node
6834     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6835     if result.fail_msg:
6836       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6837                     file_storage_dir, instance.primary_node, result.fail_msg)
6838       all_result = False
6839
6840   return all_result
6841
6842
6843 def _ComputeDiskSizePerVG(disk_template, disks):
6844   """Compute disk size requirements in the volume group
6845
6846   """
6847   def _compute(disks, payload):
6848     """Universal algorithm
6849
6850     """
6851     vgs = {}
6852     for disk in disks:
6853       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6854
6855     return vgs
6856
6857   # Required free disk space as a function of disk and swap space
6858   req_size_dict = {
6859     constants.DT_DISKLESS: None,
6860     constants.DT_PLAIN: _compute(disks, 0),
6861     # 128 MB are added for drbd metadata for each disk
6862     constants.DT_DRBD8: _compute(disks, 128),
6863     constants.DT_FILE: None,
6864   }
6865
6866   if disk_template not in req_size_dict:
6867     raise errors.ProgrammerError("Disk template '%s' size requirement"
6868                                  " is unknown" %  disk_template)
6869
6870   return req_size_dict[disk_template]
6871
6872
6873 def _ComputeDiskSize(disk_template, disks):
6874   """Compute disk size requirements in the volume group
6875
6876   """
6877   # Required free disk space as a function of disk and swap space
6878   req_size_dict = {
6879     constants.DT_DISKLESS: None,
6880     constants.DT_PLAIN: sum(d["size"] for d in disks),
6881     # 128 MB are added for drbd metadata for each disk
6882     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6883     constants.DT_FILE: None,
6884   }
6885
6886   if disk_template not in req_size_dict:
6887     raise errors.ProgrammerError("Disk template '%s' size requirement"
6888                                  " is unknown" %  disk_template)
6889
6890   return req_size_dict[disk_template]
6891
6892
6893 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6894   """Hypervisor parameter validation.
6895
6896   This function abstract the hypervisor parameter validation to be
6897   used in both instance create and instance modify.
6898
6899   @type lu: L{LogicalUnit}
6900   @param lu: the logical unit for which we check
6901   @type nodenames: list
6902   @param nodenames: the list of nodes on which we should check
6903   @type hvname: string
6904   @param hvname: the name of the hypervisor we should use
6905   @type hvparams: dict
6906   @param hvparams: the parameters which we need to check
6907   @raise errors.OpPrereqError: if the parameters are not valid
6908
6909   """
6910   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6911                                                   hvname,
6912                                                   hvparams)
6913   for node in nodenames:
6914     info = hvinfo[node]
6915     if info.offline:
6916       continue
6917     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6918
6919
6920 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6921   """OS parameters validation.
6922
6923   @type lu: L{LogicalUnit}
6924   @param lu: the logical unit for which we check
6925   @type required: boolean
6926   @param required: whether the validation should fail if the OS is not
6927       found
6928   @type nodenames: list
6929   @param nodenames: the list of nodes on which we should check
6930   @type osname: string
6931   @param osname: the name of the hypervisor we should use
6932   @type osparams: dict
6933   @param osparams: the parameters which we need to check
6934   @raise errors.OpPrereqError: if the parameters are not valid
6935
6936   """
6937   result = lu.rpc.call_os_validate(required, nodenames, osname,
6938                                    [constants.OS_VALIDATE_PARAMETERS],
6939                                    osparams)
6940   for node, nres in result.items():
6941     # we don't check for offline cases since this should be run only
6942     # against the master node and/or an instance's nodes
6943     nres.Raise("OS Parameters validation failed on node %s" % node)
6944     if not nres.payload:
6945       lu.LogInfo("OS %s not found on node %s, validation skipped",
6946                  osname, node)
6947
6948
6949 class LUCreateInstance(LogicalUnit):
6950   """Create an instance.
6951
6952   """
6953   HPATH = "instance-add"
6954   HTYPE = constants.HTYPE_INSTANCE
6955   _OP_PARAMS = [
6956     _PInstanceName,
6957     ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6958     ("start", True, ht.TBool),
6959     ("wait_for_sync", True, ht.TBool),
6960     ("ip_check", True, ht.TBool),
6961     ("name_check", True, ht.TBool),
6962     ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6963     ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6964     ("hvparams", ht.EmptyDict, ht.TDict),
6965     ("beparams", ht.EmptyDict, ht.TDict),
6966     ("osparams", ht.EmptyDict, ht.TDict),
6967     ("no_install", None, ht.TMaybeBool),
6968     ("os_type", None, ht.TMaybeString),
6969     ("force_variant", False, ht.TBool),
6970     ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6971     ("source_x509_ca", None, ht.TMaybeString),
6972     ("source_instance_name", None, ht.TMaybeString),
6973     ("source_shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
6974      ht.TPositiveInt),
6975     ("src_node", None, ht.TMaybeString),
6976     ("src_path", None, ht.TMaybeString),
6977     ("pnode", None, ht.TMaybeString),
6978     ("snode", None, ht.TMaybeString),
6979     ("iallocator", None, ht.TMaybeString),
6980     ("hypervisor", None, ht.TMaybeString),
6981     ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6982     ("identify_defaults", False, ht.TBool),
6983     ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6984     ("file_storage_dir", None, ht.TMaybeString),
6985     ]
6986   REQ_BGL = False
6987
6988   def CheckArguments(self):
6989     """Check arguments.
6990
6991     """
6992     # do not require name_check to ease forward/backward compatibility
6993     # for tools
6994     if self.op.no_install and self.op.start:
6995       self.LogInfo("No-installation mode selected, disabling startup")
6996       self.op.start = False
6997     # validate/normalize the instance name
6998     self.op.instance_name = \
6999       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7000
7001     if self.op.ip_check and not self.op.name_check:
7002       # TODO: make the ip check more flexible and not depend on the name check
7003       raise errors.OpPrereqError("Cannot do ip check without a name check",
7004                                  errors.ECODE_INVAL)
7005
7006     # check nics' parameter names
7007     for nic in self.op.nics:
7008       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7009
7010     # check disks. parameter names and consistent adopt/no-adopt strategy
7011     has_adopt = has_no_adopt = False
7012     for disk in self.op.disks:
7013       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7014       if "adopt" in disk:
7015         has_adopt = True
7016       else:
7017         has_no_adopt = True
7018     if has_adopt and has_no_adopt:
7019       raise errors.OpPrereqError("Either all disks are adopted or none is",
7020                                  errors.ECODE_INVAL)
7021     if has_adopt:
7022       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7023         raise errors.OpPrereqError("Disk adoption is not supported for the"
7024                                    " '%s' disk template" %
7025                                    self.op.disk_template,
7026                                    errors.ECODE_INVAL)
7027       if self.op.iallocator is not None:
7028         raise errors.OpPrereqError("Disk adoption not allowed with an"
7029                                    " iallocator script", errors.ECODE_INVAL)
7030       if self.op.mode == constants.INSTANCE_IMPORT:
7031         raise errors.OpPrereqError("Disk adoption not allowed for"
7032                                    " instance import", errors.ECODE_INVAL)
7033
7034     self.adopt_disks = has_adopt
7035
7036     # instance name verification
7037     if self.op.name_check:
7038       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7039       self.op.instance_name = self.hostname1.name
7040       # used in CheckPrereq for ip ping check
7041       self.check_ip = self.hostname1.ip
7042     else:
7043       self.check_ip = None
7044
7045     # file storage checks
7046     if (self.op.file_driver and
7047         not self.op.file_driver in constants.FILE_DRIVER):
7048       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7049                                  self.op.file_driver, errors.ECODE_INVAL)
7050
7051     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7052       raise errors.OpPrereqError("File storage directory path not absolute",
7053                                  errors.ECODE_INVAL)
7054
7055     ### Node/iallocator related checks
7056     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7057
7058     if self.op.pnode is not None:
7059       if self.op.disk_template in constants.DTS_NET_MIRROR:
7060         if self.op.snode is None:
7061           raise errors.OpPrereqError("The networked disk templates need"
7062                                      " a mirror node", errors.ECODE_INVAL)
7063       elif self.op.snode:
7064         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7065                         " template")
7066         self.op.snode = None
7067
7068     self._cds = _GetClusterDomainSecret()
7069
7070     if self.op.mode == constants.INSTANCE_IMPORT:
7071       # On import force_variant must be True, because if we forced it at
7072       # initial install, our only chance when importing it back is that it
7073       # works again!
7074       self.op.force_variant = True
7075
7076       if self.op.no_install:
7077         self.LogInfo("No-installation mode has no effect during import")
7078
7079     elif self.op.mode == constants.INSTANCE_CREATE:
7080       if self.op.os_type is None:
7081         raise errors.OpPrereqError("No guest OS specified",
7082                                    errors.ECODE_INVAL)
7083       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7084         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7085                                    " installation" % self.op.os_type,
7086                                    errors.ECODE_STATE)
7087       if self.op.disk_template is None:
7088         raise errors.OpPrereqError("No disk template specified",
7089                                    errors.ECODE_INVAL)
7090
7091     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7092       # Check handshake to ensure both clusters have the same domain secret
7093       src_handshake = self.op.source_handshake
7094       if not src_handshake:
7095         raise errors.OpPrereqError("Missing source handshake",
7096                                    errors.ECODE_INVAL)
7097
7098       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7099                                                            src_handshake)
7100       if errmsg:
7101         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7102                                    errors.ECODE_INVAL)
7103
7104       # Load and check source CA
7105       self.source_x509_ca_pem = self.op.source_x509_ca
7106       if not self.source_x509_ca_pem:
7107         raise errors.OpPrereqError("Missing source X509 CA",
7108                                    errors.ECODE_INVAL)
7109
7110       try:
7111         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7112                                                     self._cds)
7113       except OpenSSL.crypto.Error, err:
7114         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7115                                    (err, ), errors.ECODE_INVAL)
7116
7117       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7118       if errcode is not None:
7119         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7120                                    errors.ECODE_INVAL)
7121
7122       self.source_x509_ca = cert
7123
7124       src_instance_name = self.op.source_instance_name
7125       if not src_instance_name:
7126         raise errors.OpPrereqError("Missing source instance name",
7127                                    errors.ECODE_INVAL)
7128
7129       self.source_instance_name = \
7130           netutils.GetHostname(name=src_instance_name).name
7131
7132     else:
7133       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7134                                  self.op.mode, errors.ECODE_INVAL)
7135
7136   def ExpandNames(self):
7137     """ExpandNames for CreateInstance.
7138
7139     Figure out the right locks for instance creation.
7140
7141     """
7142     self.needed_locks = {}
7143
7144     instance_name = self.op.instance_name
7145     # this is just a preventive check, but someone might still add this
7146     # instance in the meantime, and creation will fail at lock-add time
7147     if instance_name in self.cfg.GetInstanceList():
7148       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7149                                  instance_name, errors.ECODE_EXISTS)
7150
7151     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7152
7153     if self.op.iallocator:
7154       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7155     else:
7156       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7157       nodelist = [self.op.pnode]
7158       if self.op.snode is not None:
7159         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7160         nodelist.append(self.op.snode)
7161       self.needed_locks[locking.LEVEL_NODE] = nodelist
7162
7163     # in case of import lock the source node too
7164     if self.op.mode == constants.INSTANCE_IMPORT:
7165       src_node = self.op.src_node
7166       src_path = self.op.src_path
7167
7168       if src_path is None:
7169         self.op.src_path = src_path = self.op.instance_name
7170
7171       if src_node is None:
7172         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7173         self.op.src_node = None
7174         if os.path.isabs(src_path):
7175           raise errors.OpPrereqError("Importing an instance from an absolute"
7176                                      " path requires a source node option.",
7177                                      errors.ECODE_INVAL)
7178       else:
7179         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7180         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7181           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7182         if not os.path.isabs(src_path):
7183           self.op.src_path = src_path = \
7184             utils.PathJoin(constants.EXPORT_DIR, src_path)
7185
7186   def _RunAllocator(self):
7187     """Run the allocator based on input opcode.
7188
7189     """
7190     nics = [n.ToDict() for n in self.nics]
7191     ial = IAllocator(self.cfg, self.rpc,
7192                      mode=constants.IALLOCATOR_MODE_ALLOC,
7193                      name=self.op.instance_name,
7194                      disk_template=self.op.disk_template,
7195                      tags=[],
7196                      os=self.op.os_type,
7197                      vcpus=self.be_full[constants.BE_VCPUS],
7198                      mem_size=self.be_full[constants.BE_MEMORY],
7199                      disks=self.disks,
7200                      nics=nics,
7201                      hypervisor=self.op.hypervisor,
7202                      )
7203
7204     ial.Run(self.op.iallocator)
7205
7206     if not ial.success:
7207       raise errors.OpPrereqError("Can't compute nodes using"
7208                                  " iallocator '%s': %s" %
7209                                  (self.op.iallocator, ial.info),
7210                                  errors.ECODE_NORES)
7211     if len(ial.result) != ial.required_nodes:
7212       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7213                                  " of nodes (%s), required %s" %
7214                                  (self.op.iallocator, len(ial.result),
7215                                   ial.required_nodes), errors.ECODE_FAULT)
7216     self.op.pnode = ial.result[0]
7217     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7218                  self.op.instance_name, self.op.iallocator,
7219                  utils.CommaJoin(ial.result))
7220     if ial.required_nodes == 2:
7221       self.op.snode = ial.result[1]
7222
7223   def BuildHooksEnv(self):
7224     """Build hooks env.
7225
7226     This runs on master, primary and secondary nodes of the instance.
7227
7228     """
7229     env = {
7230       "ADD_MODE": self.op.mode,
7231       }
7232     if self.op.mode == constants.INSTANCE_IMPORT:
7233       env["SRC_NODE"] = self.op.src_node
7234       env["SRC_PATH"] = self.op.src_path
7235       env["SRC_IMAGES"] = self.src_images
7236
7237     env.update(_BuildInstanceHookEnv(
7238       name=self.op.instance_name,
7239       primary_node=self.op.pnode,
7240       secondary_nodes=self.secondaries,
7241       status=self.op.start,
7242       os_type=self.op.os_type,
7243       memory=self.be_full[constants.BE_MEMORY],
7244       vcpus=self.be_full[constants.BE_VCPUS],
7245       nics=_NICListToTuple(self, self.nics),
7246       disk_template=self.op.disk_template,
7247       disks=[(d["size"], d["mode"]) for d in self.disks],
7248       bep=self.be_full,
7249       hvp=self.hv_full,
7250       hypervisor_name=self.op.hypervisor,
7251     ))
7252
7253     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7254           self.secondaries)
7255     return env, nl, nl
7256
7257   def _ReadExportInfo(self):
7258     """Reads the export information from disk.
7259
7260     It will override the opcode source node and path with the actual
7261     information, if these two were not specified before.
7262
7263     @return: the export information
7264
7265     """
7266     assert self.op.mode == constants.INSTANCE_IMPORT
7267
7268     src_node = self.op.src_node
7269     src_path = self.op.src_path
7270
7271     if src_node is None:
7272       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7273       exp_list = self.rpc.call_export_list(locked_nodes)
7274       found = False
7275       for node in exp_list:
7276         if exp_list[node].fail_msg:
7277           continue
7278         if src_path in exp_list[node].payload:
7279           found = True
7280           self.op.src_node = src_node = node
7281           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7282                                                        src_path)
7283           break
7284       if not found:
7285         raise errors.OpPrereqError("No export found for relative path %s" %
7286                                     src_path, errors.ECODE_INVAL)
7287
7288     _CheckNodeOnline(self, src_node)
7289     result = self.rpc.call_export_info(src_node, src_path)
7290     result.Raise("No export or invalid export found in dir %s" % src_path)
7291
7292     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7293     if not export_info.has_section(constants.INISECT_EXP):
7294       raise errors.ProgrammerError("Corrupted export config",
7295                                    errors.ECODE_ENVIRON)
7296
7297     ei_version = export_info.get(constants.INISECT_EXP, "version")
7298     if (int(ei_version) != constants.EXPORT_VERSION):
7299       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7300                                  (ei_version, constants.EXPORT_VERSION),
7301                                  errors.ECODE_ENVIRON)
7302     return export_info
7303
7304   def _ReadExportParams(self, einfo):
7305     """Use export parameters as defaults.
7306
7307     In case the opcode doesn't specify (as in override) some instance
7308     parameters, then try to use them from the export information, if
7309     that declares them.
7310
7311     """
7312     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7313
7314     if self.op.disk_template is None:
7315       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7316         self.op.disk_template = einfo.get(constants.INISECT_INS,
7317                                           "disk_template")
7318       else:
7319         raise errors.OpPrereqError("No disk template specified and the export"
7320                                    " is missing the disk_template information",
7321                                    errors.ECODE_INVAL)
7322
7323     if not self.op.disks:
7324       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7325         disks = []
7326         # TODO: import the disk iv_name too
7327         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7328           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7329           disks.append({"size": disk_sz})
7330         self.op.disks = disks
7331       else:
7332         raise errors.OpPrereqError("No disk info specified and the export"
7333                                    " is missing the disk information",
7334                                    errors.ECODE_INVAL)
7335
7336     if (not self.op.nics and
7337         einfo.has_option(constants.INISECT_INS, "nic_count")):
7338       nics = []
7339       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7340         ndict = {}
7341         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7342           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7343           ndict[name] = v
7344         nics.append(ndict)
7345       self.op.nics = nics
7346
7347     if (self.op.hypervisor is None and
7348         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7349       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7350     if einfo.has_section(constants.INISECT_HYP):
7351       # use the export parameters but do not override the ones
7352       # specified by the user
7353       for name, value in einfo.items(constants.INISECT_HYP):
7354         if name not in self.op.hvparams:
7355           self.op.hvparams[name] = value
7356
7357     if einfo.has_section(constants.INISECT_BEP):
7358       # use the parameters, without overriding
7359       for name, value in einfo.items(constants.INISECT_BEP):
7360         if name not in self.op.beparams:
7361           self.op.beparams[name] = value
7362     else:
7363       # try to read the parameters old style, from the main section
7364       for name in constants.BES_PARAMETERS:
7365         if (name not in self.op.beparams and
7366             einfo.has_option(constants.INISECT_INS, name)):
7367           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7368
7369     if einfo.has_section(constants.INISECT_OSP):
7370       # use the parameters, without overriding
7371       for name, value in einfo.items(constants.INISECT_OSP):
7372         if name not in self.op.osparams:
7373           self.op.osparams[name] = value
7374
7375   def _RevertToDefaults(self, cluster):
7376     """Revert the instance parameters to the default values.
7377
7378     """
7379     # hvparams
7380     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7381     for name in self.op.hvparams.keys():
7382       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7383         del self.op.hvparams[name]
7384     # beparams
7385     be_defs = cluster.SimpleFillBE({})
7386     for name in self.op.beparams.keys():
7387       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7388         del self.op.beparams[name]
7389     # nic params
7390     nic_defs = cluster.SimpleFillNIC({})
7391     for nic in self.op.nics:
7392       for name in constants.NICS_PARAMETERS:
7393         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7394           del nic[name]
7395     # osparams
7396     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7397     for name in self.op.osparams.keys():
7398       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7399         del self.op.osparams[name]
7400
7401   def CheckPrereq(self):
7402     """Check prerequisites.
7403
7404     """
7405     if self.op.mode == constants.INSTANCE_IMPORT:
7406       export_info = self._ReadExportInfo()
7407       self._ReadExportParams(export_info)
7408
7409     _CheckDiskTemplate(self.op.disk_template)
7410
7411     if (not self.cfg.GetVGName() and
7412         self.op.disk_template not in constants.DTS_NOT_LVM):
7413       raise errors.OpPrereqError("Cluster does not support lvm-based"
7414                                  " instances", errors.ECODE_STATE)
7415
7416     if self.op.hypervisor is None:
7417       self.op.hypervisor = self.cfg.GetHypervisorType()
7418
7419     cluster = self.cfg.GetClusterInfo()
7420     enabled_hvs = cluster.enabled_hypervisors
7421     if self.op.hypervisor not in enabled_hvs:
7422       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7423                                  " cluster (%s)" % (self.op.hypervisor,
7424                                   ",".join(enabled_hvs)),
7425                                  errors.ECODE_STATE)
7426
7427     # check hypervisor parameter syntax (locally)
7428     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7429     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7430                                       self.op.hvparams)
7431     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7432     hv_type.CheckParameterSyntax(filled_hvp)
7433     self.hv_full = filled_hvp
7434     # check that we don't specify global parameters on an instance
7435     _CheckGlobalHvParams(self.op.hvparams)
7436
7437     # fill and remember the beparams dict
7438     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7439     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7440
7441     # build os parameters
7442     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7443
7444     # now that hvp/bep are in final format, let's reset to defaults,
7445     # if told to do so
7446     if self.op.identify_defaults:
7447       self._RevertToDefaults(cluster)
7448
7449     # NIC buildup
7450     self.nics = []
7451     for idx, nic in enumerate(self.op.nics):
7452       nic_mode_req = nic.get("mode", None)
7453       nic_mode = nic_mode_req
7454       if nic_mode is None:
7455         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7456
7457       # in routed mode, for the first nic, the default ip is 'auto'
7458       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7459         default_ip_mode = constants.VALUE_AUTO
7460       else:
7461         default_ip_mode = constants.VALUE_NONE
7462
7463       # ip validity checks
7464       ip = nic.get("ip", default_ip_mode)
7465       if ip is None or ip.lower() == constants.VALUE_NONE:
7466         nic_ip = None
7467       elif ip.lower() == constants.VALUE_AUTO:
7468         if not self.op.name_check:
7469           raise errors.OpPrereqError("IP address set to auto but name checks"
7470                                      " have been skipped",
7471                                      errors.ECODE_INVAL)
7472         nic_ip = self.hostname1.ip
7473       else:
7474         if not netutils.IPAddress.IsValid(ip):
7475           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7476                                      errors.ECODE_INVAL)
7477         nic_ip = ip
7478
7479       # TODO: check the ip address for uniqueness
7480       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7481         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7482                                    errors.ECODE_INVAL)
7483
7484       # MAC address verification
7485       mac = nic.get("mac", constants.VALUE_AUTO)
7486       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7487         mac = utils.NormalizeAndValidateMac(mac)
7488
7489         try:
7490           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7491         except errors.ReservationError:
7492           raise errors.OpPrereqError("MAC address %s already in use"
7493                                      " in cluster" % mac,
7494                                      errors.ECODE_NOTUNIQUE)
7495
7496       # bridge verification
7497       bridge = nic.get("bridge", None)
7498       link = nic.get("link", None)
7499       if bridge and link:
7500         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7501                                    " at the same time", errors.ECODE_INVAL)
7502       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7503         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7504                                    errors.ECODE_INVAL)
7505       elif bridge:
7506         link = bridge
7507
7508       nicparams = {}
7509       if nic_mode_req:
7510         nicparams[constants.NIC_MODE] = nic_mode_req
7511       if link:
7512         nicparams[constants.NIC_LINK] = link
7513
7514       check_params = cluster.SimpleFillNIC(nicparams)
7515       objects.NIC.CheckParameterSyntax(check_params)
7516       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7517
7518     # disk checks/pre-build
7519     self.disks = []
7520     for disk in self.op.disks:
7521       mode = disk.get("mode", constants.DISK_RDWR)
7522       if mode not in constants.DISK_ACCESS_SET:
7523         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7524                                    mode, errors.ECODE_INVAL)
7525       size = disk.get("size", None)
7526       if size is None:
7527         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7528       try:
7529         size = int(size)
7530       except (TypeError, ValueError):
7531         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7532                                    errors.ECODE_INVAL)
7533       vg = disk.get("vg", self.cfg.GetVGName())
7534       new_disk = {"size": size, "mode": mode, "vg": vg}
7535       if "adopt" in disk:
7536         new_disk["adopt"] = disk["adopt"]
7537       self.disks.append(new_disk)
7538
7539     if self.op.mode == constants.INSTANCE_IMPORT:
7540
7541       # Check that the new instance doesn't have less disks than the export
7542       instance_disks = len(self.disks)
7543       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7544       if instance_disks < export_disks:
7545         raise errors.OpPrereqError("Not enough disks to import."
7546                                    " (instance: %d, export: %d)" %
7547                                    (instance_disks, export_disks),
7548                                    errors.ECODE_INVAL)
7549
7550       disk_images = []
7551       for idx in range(export_disks):
7552         option = 'disk%d_dump' % idx
7553         if export_info.has_option(constants.INISECT_INS, option):
7554           # FIXME: are the old os-es, disk sizes, etc. useful?
7555           export_name = export_info.get(constants.INISECT_INS, option)
7556           image = utils.PathJoin(self.op.src_path, export_name)
7557           disk_images.append(image)
7558         else:
7559           disk_images.append(False)
7560
7561       self.src_images = disk_images
7562
7563       old_name = export_info.get(constants.INISECT_INS, 'name')
7564       try:
7565         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7566       except (TypeError, ValueError), err:
7567         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7568                                    " an integer: %s" % str(err),
7569                                    errors.ECODE_STATE)
7570       if self.op.instance_name == old_name:
7571         for idx, nic in enumerate(self.nics):
7572           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7573             nic_mac_ini = 'nic%d_mac' % idx
7574             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7575
7576     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7577
7578     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7579     if self.op.ip_check:
7580       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7581         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7582                                    (self.check_ip, self.op.instance_name),
7583                                    errors.ECODE_NOTUNIQUE)
7584
7585     #### mac address generation
7586     # By generating here the mac address both the allocator and the hooks get
7587     # the real final mac address rather than the 'auto' or 'generate' value.
7588     # There is a race condition between the generation and the instance object
7589     # creation, which means that we know the mac is valid now, but we're not
7590     # sure it will be when we actually add the instance. If things go bad
7591     # adding the instance will abort because of a duplicate mac, and the
7592     # creation job will fail.
7593     for nic in self.nics:
7594       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7595         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7596
7597     #### allocator run
7598
7599     if self.op.iallocator is not None:
7600       self._RunAllocator()
7601
7602     #### node related checks
7603
7604     # check primary node
7605     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7606     assert self.pnode is not None, \
7607       "Cannot retrieve locked node %s" % self.op.pnode
7608     if pnode.offline:
7609       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7610                                  pnode.name, errors.ECODE_STATE)
7611     if pnode.drained:
7612       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7613                                  pnode.name, errors.ECODE_STATE)
7614     if not pnode.vm_capable:
7615       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7616                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7617
7618     self.secondaries = []
7619
7620     # mirror node verification
7621     if self.op.disk_template in constants.DTS_NET_MIRROR:
7622       if self.op.snode == pnode.name:
7623         raise errors.OpPrereqError("The secondary node cannot be the"
7624                                    " primary node.", errors.ECODE_INVAL)
7625       _CheckNodeOnline(self, self.op.snode)
7626       _CheckNodeNotDrained(self, self.op.snode)
7627       _CheckNodeVmCapable(self, self.op.snode)
7628       self.secondaries.append(self.op.snode)
7629
7630     nodenames = [pnode.name] + self.secondaries
7631
7632     if not self.adopt_disks:
7633       # Check lv size requirements, if not adopting
7634       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7635       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7636
7637     else: # instead, we must check the adoption data
7638       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7639       if len(all_lvs) != len(self.disks):
7640         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7641                                    errors.ECODE_INVAL)
7642       for lv_name in all_lvs:
7643         try:
7644           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7645           # to ReserveLV uses the same syntax
7646           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7647         except errors.ReservationError:
7648           raise errors.OpPrereqError("LV named %s used by another instance" %
7649                                      lv_name, errors.ECODE_NOTUNIQUE)
7650
7651       vg_names = self.rpc.call_vg_list([pnode.name])
7652       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7653
7654       node_lvs = self.rpc.call_lv_list([pnode.name],
7655                                        vg_names[pnode.name].payload.keys()
7656                                       )[pnode.name]
7657       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7658       node_lvs = node_lvs.payload
7659
7660       delta = all_lvs.difference(node_lvs.keys())
7661       if delta:
7662         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7663                                    utils.CommaJoin(delta),
7664                                    errors.ECODE_INVAL)
7665       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7666       if online_lvs:
7667         raise errors.OpPrereqError("Online logical volumes found, cannot"
7668                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7669                                    errors.ECODE_STATE)
7670       # update the size of disk based on what is found
7671       for dsk in self.disks:
7672         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7673
7674     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7675
7676     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7677     # check OS parameters (remotely)
7678     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7679
7680     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7681
7682     # memory check on primary node
7683     if self.op.start:
7684       _CheckNodeFreeMemory(self, self.pnode.name,
7685                            "creating instance %s" % self.op.instance_name,
7686                            self.be_full[constants.BE_MEMORY],
7687                            self.op.hypervisor)
7688
7689     self.dry_run_result = list(nodenames)
7690
7691   def Exec(self, feedback_fn):
7692     """Create and add the instance to the cluster.
7693
7694     """
7695     instance = self.op.instance_name
7696     pnode_name = self.pnode.name
7697
7698     ht_kind = self.op.hypervisor
7699     if ht_kind in constants.HTS_REQ_PORT:
7700       network_port = self.cfg.AllocatePort()
7701     else:
7702       network_port = None
7703
7704     if constants.ENABLE_FILE_STORAGE:
7705       # this is needed because os.path.join does not accept None arguments
7706       if self.op.file_storage_dir is None:
7707         string_file_storage_dir = ""
7708       else:
7709         string_file_storage_dir = self.op.file_storage_dir
7710
7711       # build the full file storage dir path
7712       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7713                                         string_file_storage_dir, instance)
7714     else:
7715       file_storage_dir = ""
7716
7717     disks = _GenerateDiskTemplate(self,
7718                                   self.op.disk_template,
7719                                   instance, pnode_name,
7720                                   self.secondaries,
7721                                   self.disks,
7722                                   file_storage_dir,
7723                                   self.op.file_driver,
7724                                   0,
7725                                   feedback_fn)
7726
7727     iobj = objects.Instance(name=instance, os=self.op.os_type,
7728                             primary_node=pnode_name,
7729                             nics=self.nics, disks=disks,
7730                             disk_template=self.op.disk_template,
7731                             admin_up=False,
7732                             network_port=network_port,
7733                             beparams=self.op.beparams,
7734                             hvparams=self.op.hvparams,
7735                             hypervisor=self.op.hypervisor,
7736                             osparams=self.op.osparams,
7737                             )
7738
7739     if self.adopt_disks:
7740       # rename LVs to the newly-generated names; we need to construct
7741       # 'fake' LV disks with the old data, plus the new unique_id
7742       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7743       rename_to = []
7744       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7745         rename_to.append(t_dsk.logical_id)
7746         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7747         self.cfg.SetDiskID(t_dsk, pnode_name)
7748       result = self.rpc.call_blockdev_rename(pnode_name,
7749                                              zip(tmp_disks, rename_to))
7750       result.Raise("Failed to rename adoped LVs")
7751     else:
7752       feedback_fn("* creating instance disks...")
7753       try:
7754         _CreateDisks(self, iobj)
7755       except errors.OpExecError:
7756         self.LogWarning("Device creation failed, reverting...")
7757         try:
7758           _RemoveDisks(self, iobj)
7759         finally:
7760           self.cfg.ReleaseDRBDMinors(instance)
7761           raise
7762
7763       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7764         feedback_fn("* wiping instance disks...")
7765         try:
7766           _WipeDisks(self, iobj)
7767         except errors.OpExecError:
7768           self.LogWarning("Device wiping failed, reverting...")
7769           try:
7770             _RemoveDisks(self, iobj)
7771           finally:
7772             self.cfg.ReleaseDRBDMinors(instance)
7773             raise
7774
7775     feedback_fn("adding instance %s to cluster config" % instance)
7776
7777     self.cfg.AddInstance(iobj, self.proc.GetECId())
7778
7779     # Declare that we don't want to remove the instance lock anymore, as we've
7780     # added the instance to the config
7781     del self.remove_locks[locking.LEVEL_INSTANCE]
7782     # Unlock all the nodes
7783     if self.op.mode == constants.INSTANCE_IMPORT:
7784       nodes_keep = [self.op.src_node]
7785       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7786                        if node != self.op.src_node]
7787       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7788       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7789     else:
7790       self.context.glm.release(locking.LEVEL_NODE)
7791       del self.acquired_locks[locking.LEVEL_NODE]
7792
7793     if self.op.wait_for_sync:
7794       disk_abort = not _WaitForSync(self, iobj)
7795     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7796       # make sure the disks are not degraded (still sync-ing is ok)
7797       time.sleep(15)
7798       feedback_fn("* checking mirrors status")
7799       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7800     else:
7801       disk_abort = False
7802
7803     if disk_abort:
7804       _RemoveDisks(self, iobj)
7805       self.cfg.RemoveInstance(iobj.name)
7806       # Make sure the instance lock gets removed
7807       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7808       raise errors.OpExecError("There are some degraded disks for"
7809                                " this instance")
7810
7811     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7812       if self.op.mode == constants.INSTANCE_CREATE:
7813         if not self.op.no_install:
7814           feedback_fn("* running the instance OS create scripts...")
7815           # FIXME: pass debug option from opcode to backend
7816           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7817                                                  self.op.debug_level)
7818           result.Raise("Could not add os for instance %s"
7819                        " on node %s" % (instance, pnode_name))
7820
7821       elif self.op.mode == constants.INSTANCE_IMPORT:
7822         feedback_fn("* running the instance OS import scripts...")
7823
7824         transfers = []
7825
7826         for idx, image in enumerate(self.src_images):
7827           if not image:
7828             continue
7829
7830           # FIXME: pass debug option from opcode to backend
7831           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7832                                              constants.IEIO_FILE, (image, ),
7833                                              constants.IEIO_SCRIPT,
7834                                              (iobj.disks[idx], idx),
7835                                              None)
7836           transfers.append(dt)
7837
7838         import_result = \
7839           masterd.instance.TransferInstanceData(self, feedback_fn,
7840                                                 self.op.src_node, pnode_name,
7841                                                 self.pnode.secondary_ip,
7842                                                 iobj, transfers)
7843         if not compat.all(import_result):
7844           self.LogWarning("Some disks for instance %s on node %s were not"
7845                           " imported successfully" % (instance, pnode_name))
7846
7847       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7848         feedback_fn("* preparing remote import...")
7849         # The source cluster will stop the instance before attempting to make a
7850         # connection. In some cases stopping an instance can take a long time,
7851         # hence the shutdown timeout is added to the connection timeout.
7852         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7853                            self.op.source_shutdown_timeout)
7854         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7855
7856         assert iobj.primary_node == self.pnode.name
7857         disk_results = \
7858           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7859                                         self.source_x509_ca,
7860                                         self._cds, timeouts)
7861         if not compat.all(disk_results):
7862           # TODO: Should the instance still be started, even if some disks
7863           # failed to import (valid for local imports, too)?
7864           self.LogWarning("Some disks for instance %s on node %s were not"
7865                           " imported successfully" % (instance, pnode_name))
7866
7867         # Run rename script on newly imported instance
7868         assert iobj.name == instance
7869         feedback_fn("Running rename script for %s" % instance)
7870         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7871                                                    self.source_instance_name,
7872                                                    self.op.debug_level)
7873         if result.fail_msg:
7874           self.LogWarning("Failed to run rename script for %s on node"
7875                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7876
7877       else:
7878         # also checked in the prereq part
7879         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7880                                      % self.op.mode)
7881
7882     if self.op.start:
7883       iobj.admin_up = True
7884       self.cfg.Update(iobj, feedback_fn)
7885       logging.info("Starting instance %s on node %s", instance, pnode_name)
7886       feedback_fn("* starting instance...")
7887       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7888       result.Raise("Could not start instance")
7889
7890     return list(iobj.all_nodes)
7891
7892
7893 class LUConnectConsole(NoHooksLU):
7894   """Connect to an instance's console.
7895
7896   This is somewhat special in that it returns the command line that
7897   you need to run on the master node in order to connect to the
7898   console.
7899
7900   """
7901   _OP_PARAMS = [
7902     _PInstanceName
7903     ]
7904   REQ_BGL = False
7905
7906   def ExpandNames(self):
7907     self._ExpandAndLockInstance()
7908
7909   def CheckPrereq(self):
7910     """Check prerequisites.
7911
7912     This checks that the instance is in the cluster.
7913
7914     """
7915     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7916     assert self.instance is not None, \
7917       "Cannot retrieve locked instance %s" % self.op.instance_name
7918     _CheckNodeOnline(self, self.instance.primary_node)
7919
7920   def Exec(self, feedback_fn):
7921     """Connect to the console of an instance
7922
7923     """
7924     instance = self.instance
7925     node = instance.primary_node
7926
7927     node_insts = self.rpc.call_instance_list([node],
7928                                              [instance.hypervisor])[node]
7929     node_insts.Raise("Can't get node information from %s" % node)
7930
7931     if instance.name not in node_insts.payload:
7932       if instance.admin_up:
7933         state = "ERROR_down"
7934       else:
7935         state = "ADMIN_down"
7936       raise errors.OpExecError("Instance %s is not running (state %s)" %
7937                                (instance.name, state))
7938
7939     logging.debug("Connecting to console of %s on %s", instance.name, node)
7940
7941     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7942     cluster = self.cfg.GetClusterInfo()
7943     # beparams and hvparams are passed separately, to avoid editing the
7944     # instance and then saving the defaults in the instance itself.
7945     hvparams = cluster.FillHV(instance)
7946     beparams = cluster.FillBE(instance)
7947     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7948
7949     # build ssh cmdline
7950     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7951
7952
7953 class LUReplaceDisks(LogicalUnit):
7954   """Replace the disks of an instance.
7955
7956   """
7957   HPATH = "mirrors-replace"
7958   HTYPE = constants.HTYPE_INSTANCE
7959   _OP_PARAMS = [
7960     _PInstanceName,
7961     ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7962     ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7963     ("remote_node", None, ht.TMaybeString),
7964     ("iallocator", None, ht.TMaybeString),
7965     ("early_release", False, ht.TBool),
7966     ]
7967   REQ_BGL = False
7968
7969   def CheckArguments(self):
7970     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7971                                   self.op.iallocator)
7972
7973   def ExpandNames(self):
7974     self._ExpandAndLockInstance()
7975
7976     if self.op.iallocator is not None:
7977       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7978
7979     elif self.op.remote_node is not None:
7980       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7981       self.op.remote_node = remote_node
7982
7983       # Warning: do not remove the locking of the new secondary here
7984       # unless DRBD8.AddChildren is changed to work in parallel;
7985       # currently it doesn't since parallel invocations of
7986       # FindUnusedMinor will conflict
7987       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7988       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7989
7990     else:
7991       self.needed_locks[locking.LEVEL_NODE] = []
7992       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7993
7994     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7995                                    self.op.iallocator, self.op.remote_node,
7996                                    self.op.disks, False, self.op.early_release)
7997
7998     self.tasklets = [self.replacer]
7999
8000   def DeclareLocks(self, level):
8001     # If we're not already locking all nodes in the set we have to declare the
8002     # instance's primary/secondary nodes.
8003     if (level == locking.LEVEL_NODE and
8004         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8005       self._LockInstancesNodes()
8006
8007   def BuildHooksEnv(self):
8008     """Build hooks env.
8009
8010     This runs on the master, the primary and all the secondaries.
8011
8012     """
8013     instance = self.replacer.instance
8014     env = {
8015       "MODE": self.op.mode,
8016       "NEW_SECONDARY": self.op.remote_node,
8017       "OLD_SECONDARY": instance.secondary_nodes[0],
8018       }
8019     env.update(_BuildInstanceHookEnvByObject(self, instance))
8020     nl = [
8021       self.cfg.GetMasterNode(),
8022       instance.primary_node,
8023       ]
8024     if self.op.remote_node is not None:
8025       nl.append(self.op.remote_node)
8026     return env, nl, nl
8027
8028
8029 class TLReplaceDisks(Tasklet):
8030   """Replaces disks for an instance.
8031
8032   Note: Locking is not within the scope of this class.
8033
8034   """
8035   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8036                disks, delay_iallocator, early_release):
8037     """Initializes this class.
8038
8039     """
8040     Tasklet.__init__(self, lu)
8041
8042     # Parameters
8043     self.instance_name = instance_name
8044     self.mode = mode
8045     self.iallocator_name = iallocator_name
8046     self.remote_node = remote_node
8047     self.disks = disks
8048     self.delay_iallocator = delay_iallocator
8049     self.early_release = early_release
8050
8051     # Runtime data
8052     self.instance = None
8053     self.new_node = None
8054     self.target_node = None
8055     self.other_node = None
8056     self.remote_node_info = None
8057     self.node_secondary_ip = None
8058
8059   @staticmethod
8060   def CheckArguments(mode, remote_node, iallocator):
8061     """Helper function for users of this class.
8062
8063     """
8064     # check for valid parameter combination
8065     if mode == constants.REPLACE_DISK_CHG:
8066       if remote_node is None and iallocator is None:
8067         raise errors.OpPrereqError("When changing the secondary either an"
8068                                    " iallocator script must be used or the"
8069                                    " new node given", errors.ECODE_INVAL)
8070
8071       if remote_node is not None and iallocator is not None:
8072         raise errors.OpPrereqError("Give either the iallocator or the new"
8073                                    " secondary, not both", errors.ECODE_INVAL)
8074
8075     elif remote_node is not None or iallocator is not None:
8076       # Not replacing the secondary
8077       raise errors.OpPrereqError("The iallocator and new node options can"
8078                                  " only be used when changing the"
8079                                  " secondary node", errors.ECODE_INVAL)
8080
8081   @staticmethod
8082   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8083     """Compute a new secondary node using an IAllocator.
8084
8085     """
8086     ial = IAllocator(lu.cfg, lu.rpc,
8087                      mode=constants.IALLOCATOR_MODE_RELOC,
8088                      name=instance_name,
8089                      relocate_from=relocate_from)
8090
8091     ial.Run(iallocator_name)
8092
8093     if not ial.success:
8094       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8095                                  " %s" % (iallocator_name, ial.info),
8096                                  errors.ECODE_NORES)
8097
8098     if len(ial.result) != ial.required_nodes:
8099       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8100                                  " of nodes (%s), required %s" %
8101                                  (iallocator_name,
8102                                   len(ial.result), ial.required_nodes),
8103                                  errors.ECODE_FAULT)
8104
8105     remote_node_name = ial.result[0]
8106
8107     lu.LogInfo("Selected new secondary for instance '%s': %s",
8108                instance_name, remote_node_name)
8109
8110     return remote_node_name
8111
8112   def _FindFaultyDisks(self, node_name):
8113     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8114                                     node_name, True)
8115
8116   def CheckPrereq(self):
8117     """Check prerequisites.
8118
8119     This checks that the instance is in the cluster.
8120
8121     """
8122     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8123     assert instance is not None, \
8124       "Cannot retrieve locked instance %s" % self.instance_name
8125
8126     if instance.disk_template != constants.DT_DRBD8:
8127       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8128                                  " instances", errors.ECODE_INVAL)
8129
8130     if len(instance.secondary_nodes) != 1:
8131       raise errors.OpPrereqError("The instance has a strange layout,"
8132                                  " expected one secondary but found %d" %
8133                                  len(instance.secondary_nodes),
8134                                  errors.ECODE_FAULT)
8135
8136     if not self.delay_iallocator:
8137       self._CheckPrereq2()
8138
8139   def _CheckPrereq2(self):
8140     """Check prerequisites, second part.
8141
8142     This function should always be part of CheckPrereq. It was separated and is
8143     now called from Exec because during node evacuation iallocator was only
8144     called with an unmodified cluster model, not taking planned changes into
8145     account.
8146
8147     """
8148     instance = self.instance
8149     secondary_node = instance.secondary_nodes[0]
8150
8151     if self.iallocator_name is None:
8152       remote_node = self.remote_node
8153     else:
8154       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8155                                        instance.name, instance.secondary_nodes)
8156
8157     if remote_node is not None:
8158       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8159       assert self.remote_node_info is not None, \
8160         "Cannot retrieve locked node %s" % remote_node
8161     else:
8162       self.remote_node_info = None
8163
8164     if remote_node == self.instance.primary_node:
8165       raise errors.OpPrereqError("The specified node is the primary node of"
8166                                  " the instance.", errors.ECODE_INVAL)
8167
8168     if remote_node == secondary_node:
8169       raise errors.OpPrereqError("The specified node is already the"
8170                                  " secondary node of the instance.",
8171                                  errors.ECODE_INVAL)
8172
8173     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8174                                     constants.REPLACE_DISK_CHG):
8175       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8176                                  errors.ECODE_INVAL)
8177
8178     if self.mode == constants.REPLACE_DISK_AUTO:
8179       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8180       faulty_secondary = self._FindFaultyDisks(secondary_node)
8181
8182       if faulty_primary and faulty_secondary:
8183         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8184                                    " one node and can not be repaired"
8185                                    " automatically" % self.instance_name,
8186                                    errors.ECODE_STATE)
8187
8188       if faulty_primary:
8189         self.disks = faulty_primary
8190         self.target_node = instance.primary_node
8191         self.other_node = secondary_node
8192         check_nodes = [self.target_node, self.other_node]
8193       elif faulty_secondary:
8194         self.disks = faulty_secondary
8195         self.target_node = secondary_node
8196         self.other_node = instance.primary_node
8197         check_nodes = [self.target_node, self.other_node]
8198       else:
8199         self.disks = []
8200         check_nodes = []
8201
8202     else:
8203       # Non-automatic modes
8204       if self.mode == constants.REPLACE_DISK_PRI:
8205         self.target_node = instance.primary_node
8206         self.other_node = secondary_node
8207         check_nodes = [self.target_node, self.other_node]
8208
8209       elif self.mode == constants.REPLACE_DISK_SEC:
8210         self.target_node = secondary_node
8211         self.other_node = instance.primary_node
8212         check_nodes = [self.target_node, self.other_node]
8213
8214       elif self.mode == constants.REPLACE_DISK_CHG:
8215         self.new_node = remote_node
8216         self.other_node = instance.primary_node
8217         self.target_node = secondary_node
8218         check_nodes = [self.new_node, self.other_node]
8219
8220         _CheckNodeNotDrained(self.lu, remote_node)
8221         _CheckNodeVmCapable(self.lu, remote_node)
8222
8223         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8224         assert old_node_info is not None
8225         if old_node_info.offline and not self.early_release:
8226           # doesn't make sense to delay the release
8227           self.early_release = True
8228           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8229                           " early-release mode", secondary_node)
8230
8231       else:
8232         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8233                                      self.mode)
8234
8235       # If not specified all disks should be replaced
8236       if not self.disks:
8237         self.disks = range(len(self.instance.disks))
8238
8239     for node in check_nodes:
8240       _CheckNodeOnline(self.lu, node)
8241
8242     # Check whether disks are valid
8243     for disk_idx in self.disks:
8244       instance.FindDisk(disk_idx)
8245
8246     # Get secondary node IP addresses
8247     node_2nd_ip = {}
8248
8249     for node_name in [self.target_node, self.other_node, self.new_node]:
8250       if node_name is not None:
8251         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8252
8253     self.node_secondary_ip = node_2nd_ip
8254
8255   def Exec(self, feedback_fn):
8256     """Execute disk replacement.
8257
8258     This dispatches the disk replacement to the appropriate handler.
8259
8260     """
8261     if self.delay_iallocator:
8262       self._CheckPrereq2()
8263
8264     if not self.disks:
8265       feedback_fn("No disks need replacement")
8266       return
8267
8268     feedback_fn("Replacing disk(s) %s for %s" %
8269                 (utils.CommaJoin(self.disks), self.instance.name))
8270
8271     activate_disks = (not self.instance.admin_up)
8272
8273     # Activate the instance disks if we're replacing them on a down instance
8274     if activate_disks:
8275       _StartInstanceDisks(self.lu, self.instance, True)
8276
8277     try:
8278       # Should we replace the secondary node?
8279       if self.new_node is not None:
8280         fn = self._ExecDrbd8Secondary
8281       else:
8282         fn = self._ExecDrbd8DiskOnly
8283
8284       return fn(feedback_fn)
8285
8286     finally:
8287       # Deactivate the instance disks if we're replacing them on a
8288       # down instance
8289       if activate_disks:
8290         _SafeShutdownInstanceDisks(self.lu, self.instance)
8291
8292   def _CheckVolumeGroup(self, nodes):
8293     self.lu.LogInfo("Checking volume groups")
8294
8295     vgname = self.cfg.GetVGName()
8296
8297     # Make sure volume group exists on all involved nodes
8298     results = self.rpc.call_vg_list(nodes)
8299     if not results:
8300       raise errors.OpExecError("Can't list volume groups on the nodes")
8301
8302     for node in nodes:
8303       res = results[node]
8304       res.Raise("Error checking node %s" % node)
8305       if vgname not in res.payload:
8306         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8307                                  (vgname, node))
8308
8309   def _CheckDisksExistence(self, nodes):
8310     # Check disk existence
8311     for idx, dev in enumerate(self.instance.disks):
8312       if idx not in self.disks:
8313         continue
8314
8315       for node in nodes:
8316         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8317         self.cfg.SetDiskID(dev, node)
8318
8319         result = self.rpc.call_blockdev_find(node, dev)
8320
8321         msg = result.fail_msg
8322         if msg or not result.payload:
8323           if not msg:
8324             msg = "disk not found"
8325           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8326                                    (idx, node, msg))
8327
8328   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8329     for idx, dev in enumerate(self.instance.disks):
8330       if idx not in self.disks:
8331         continue
8332
8333       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8334                       (idx, node_name))
8335
8336       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8337                                    ldisk=ldisk):
8338         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8339                                  " replace disks for instance %s" %
8340                                  (node_name, self.instance.name))
8341
8342   def _CreateNewStorage(self, node_name):
8343     vgname = self.cfg.GetVGName()
8344     iv_names = {}
8345
8346     for idx, dev in enumerate(self.instance.disks):
8347       if idx not in self.disks:
8348         continue
8349
8350       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8351
8352       self.cfg.SetDiskID(dev, node_name)
8353
8354       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8355       names = _GenerateUniqueNames(self.lu, lv_names)
8356
8357       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8358                              logical_id=(vgname, names[0]))
8359       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8360                              logical_id=(vgname, names[1]))
8361
8362       new_lvs = [lv_data, lv_meta]
8363       old_lvs = dev.children
8364       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8365
8366       # we pass force_create=True to force the LVM creation
8367       for new_lv in new_lvs:
8368         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8369                         _GetInstanceInfoText(self.instance), False)
8370
8371     return iv_names
8372
8373   def _CheckDevices(self, node_name, iv_names):
8374     for name, (dev, _, _) in iv_names.iteritems():
8375       self.cfg.SetDiskID(dev, node_name)
8376
8377       result = self.rpc.call_blockdev_find(node_name, dev)
8378
8379       msg = result.fail_msg
8380       if msg or not result.payload:
8381         if not msg:
8382           msg = "disk not found"
8383         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8384                                  (name, msg))
8385
8386       if result.payload.is_degraded:
8387         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8388
8389   def _RemoveOldStorage(self, node_name, iv_names):
8390     for name, (_, old_lvs, _) in iv_names.iteritems():
8391       self.lu.LogInfo("Remove logical volumes for %s" % name)
8392
8393       for lv in old_lvs:
8394         self.cfg.SetDiskID(lv, node_name)
8395
8396         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8397         if msg:
8398           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8399                              hint="remove unused LVs manually")
8400
8401   def _ReleaseNodeLock(self, node_name):
8402     """Releases the lock for a given node."""
8403     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8404
8405   def _ExecDrbd8DiskOnly(self, feedback_fn):
8406     """Replace a disk on the primary or secondary for DRBD 8.
8407
8408     The algorithm for replace is quite complicated:
8409
8410       1. for each disk to be replaced:
8411
8412         1. create new LVs on the target node with unique names
8413         1. detach old LVs from the drbd device
8414         1. rename old LVs to name_replaced.<time_t>
8415         1. rename new LVs to old LVs
8416         1. attach the new LVs (with the old names now) to the drbd device
8417
8418       1. wait for sync across all devices
8419
8420       1. for each modified disk:
8421
8422         1. remove old LVs (which have the name name_replaces.<time_t>)
8423
8424     Failures are not very well handled.
8425
8426     """
8427     steps_total = 6
8428
8429     # Step: check device activation
8430     self.lu.LogStep(1, steps_total, "Check device existence")
8431     self._CheckDisksExistence([self.other_node, self.target_node])
8432     self._CheckVolumeGroup([self.target_node, self.other_node])
8433
8434     # Step: check other node consistency
8435     self.lu.LogStep(2, steps_total, "Check peer consistency")
8436     self._CheckDisksConsistency(self.other_node,
8437                                 self.other_node == self.instance.primary_node,
8438                                 False)
8439
8440     # Step: create new storage
8441     self.lu.LogStep(3, steps_total, "Allocate new storage")
8442     iv_names = self._CreateNewStorage(self.target_node)
8443
8444     # Step: for each lv, detach+rename*2+attach
8445     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8446     for dev, old_lvs, new_lvs in iv_names.itervalues():
8447       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8448
8449       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8450                                                      old_lvs)
8451       result.Raise("Can't detach drbd from local storage on node"
8452                    " %s for device %s" % (self.target_node, dev.iv_name))
8453       #dev.children = []
8454       #cfg.Update(instance)
8455
8456       # ok, we created the new LVs, so now we know we have the needed
8457       # storage; as such, we proceed on the target node to rename
8458       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8459       # using the assumption that logical_id == physical_id (which in
8460       # turn is the unique_id on that node)
8461
8462       # FIXME(iustin): use a better name for the replaced LVs
8463       temp_suffix = int(time.time())
8464       ren_fn = lambda d, suff: (d.physical_id[0],
8465                                 d.physical_id[1] + "_replaced-%s" % suff)
8466
8467       # Build the rename list based on what LVs exist on the node
8468       rename_old_to_new = []
8469       for to_ren in old_lvs:
8470         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8471         if not result.fail_msg and result.payload:
8472           # device exists
8473           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8474
8475       self.lu.LogInfo("Renaming the old LVs on the target node")
8476       result = self.rpc.call_blockdev_rename(self.target_node,
8477                                              rename_old_to_new)
8478       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8479
8480       # Now we rename the new LVs to the old LVs
8481       self.lu.LogInfo("Renaming the new LVs on the target node")
8482       rename_new_to_old = [(new, old.physical_id)
8483                            for old, new in zip(old_lvs, new_lvs)]
8484       result = self.rpc.call_blockdev_rename(self.target_node,
8485                                              rename_new_to_old)
8486       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8487
8488       for old, new in zip(old_lvs, new_lvs):
8489         new.logical_id = old.logical_id
8490         self.cfg.SetDiskID(new, self.target_node)
8491
8492       for disk in old_lvs:
8493         disk.logical_id = ren_fn(disk, temp_suffix)
8494         self.cfg.SetDiskID(disk, self.target_node)
8495
8496       # Now that the new lvs have the old name, we can add them to the device
8497       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8498       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8499                                                   new_lvs)
8500       msg = result.fail_msg
8501       if msg:
8502         for new_lv in new_lvs:
8503           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8504                                                new_lv).fail_msg
8505           if msg2:
8506             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8507                                hint=("cleanup manually the unused logical"
8508                                      "volumes"))
8509         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8510
8511       dev.children = new_lvs
8512
8513       self.cfg.Update(self.instance, feedback_fn)
8514
8515     cstep = 5
8516     if self.early_release:
8517       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8518       cstep += 1
8519       self._RemoveOldStorage(self.target_node, iv_names)
8520       # WARNING: we release both node locks here, do not do other RPCs
8521       # than WaitForSync to the primary node
8522       self._ReleaseNodeLock([self.target_node, self.other_node])
8523
8524     # Wait for sync
8525     # This can fail as the old devices are degraded and _WaitForSync
8526     # does a combined result over all disks, so we don't check its return value
8527     self.lu.LogStep(cstep, steps_total, "Sync devices")
8528     cstep += 1
8529     _WaitForSync(self.lu, self.instance)
8530
8531     # Check all devices manually
8532     self._CheckDevices(self.instance.primary_node, iv_names)
8533
8534     # Step: remove old storage
8535     if not self.early_release:
8536       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8537       cstep += 1
8538       self._RemoveOldStorage(self.target_node, iv_names)
8539
8540   def _ExecDrbd8Secondary(self, feedback_fn):
8541     """Replace the secondary node for DRBD 8.
8542
8543     The algorithm for replace is quite complicated:
8544       - for all disks of the instance:
8545         - create new LVs on the new node with same names
8546         - shutdown the drbd device on the old secondary
8547         - disconnect the drbd network on the primary
8548         - create the drbd device on the new secondary
8549         - network attach the drbd on the primary, using an artifice:
8550           the drbd code for Attach() will connect to the network if it
8551           finds a device which is connected to the good local disks but
8552           not network enabled
8553       - wait for sync across all devices
8554       - remove all disks from the old secondary
8555
8556     Failures are not very well handled.
8557
8558     """
8559     steps_total = 6
8560
8561     # Step: check device activation
8562     self.lu.LogStep(1, steps_total, "Check device existence")
8563     self._CheckDisksExistence([self.instance.primary_node])
8564     self._CheckVolumeGroup([self.instance.primary_node])
8565
8566     # Step: check other node consistency
8567     self.lu.LogStep(2, steps_total, "Check peer consistency")
8568     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8569
8570     # Step: create new storage
8571     self.lu.LogStep(3, steps_total, "Allocate new storage")
8572     for idx, dev in enumerate(self.instance.disks):
8573       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8574                       (self.new_node, idx))
8575       # we pass force_create=True to force LVM creation
8576       for new_lv in dev.children:
8577         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8578                         _GetInstanceInfoText(self.instance), False)
8579
8580     # Step 4: dbrd minors and drbd setups changes
8581     # after this, we must manually remove the drbd minors on both the
8582     # error and the success paths
8583     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8584     minors = self.cfg.AllocateDRBDMinor([self.new_node
8585                                          for dev in self.instance.disks],
8586                                         self.instance.name)
8587     logging.debug("Allocated minors %r", minors)
8588
8589     iv_names = {}
8590     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8591       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8592                       (self.new_node, idx))
8593       # create new devices on new_node; note that we create two IDs:
8594       # one without port, so the drbd will be activated without
8595       # networking information on the new node at this stage, and one
8596       # with network, for the latter activation in step 4
8597       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8598       if self.instance.primary_node == o_node1:
8599         p_minor = o_minor1
8600       else:
8601         assert self.instance.primary_node == o_node2, "Three-node instance?"
8602         p_minor = o_minor2
8603
8604       new_alone_id = (self.instance.primary_node, self.new_node, None,
8605                       p_minor, new_minor, o_secret)
8606       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8607                     p_minor, new_minor, o_secret)
8608
8609       iv_names[idx] = (dev, dev.children, new_net_id)
8610       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8611                     new_net_id)
8612       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8613                               logical_id=new_alone_id,
8614                               children=dev.children,
8615                               size=dev.size)
8616       try:
8617         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8618                               _GetInstanceInfoText(self.instance), False)
8619       except errors.GenericError:
8620         self.cfg.ReleaseDRBDMinors(self.instance.name)
8621         raise
8622
8623     # We have new devices, shutdown the drbd on the old secondary
8624     for idx, dev in enumerate(self.instance.disks):
8625       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8626       self.cfg.SetDiskID(dev, self.target_node)
8627       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8628       if msg:
8629         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8630                            "node: %s" % (idx, msg),
8631                            hint=("Please cleanup this device manually as"
8632                                  " soon as possible"))
8633
8634     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8635     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8636                                                self.node_secondary_ip,
8637                                                self.instance.disks)\
8638                                               [self.instance.primary_node]
8639
8640     msg = result.fail_msg
8641     if msg:
8642       # detaches didn't succeed (unlikely)
8643       self.cfg.ReleaseDRBDMinors(self.instance.name)
8644       raise errors.OpExecError("Can't detach the disks from the network on"
8645                                " old node: %s" % (msg,))
8646
8647     # if we managed to detach at least one, we update all the disks of
8648     # the instance to point to the new secondary
8649     self.lu.LogInfo("Updating instance configuration")
8650     for dev, _, new_logical_id in iv_names.itervalues():
8651       dev.logical_id = new_logical_id
8652       self.cfg.SetDiskID(dev, self.instance.primary_node)
8653
8654     self.cfg.Update(self.instance, feedback_fn)
8655
8656     # and now perform the drbd attach
8657     self.lu.LogInfo("Attaching primary drbds to new secondary"
8658                     " (standalone => connected)")
8659     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8660                                             self.new_node],
8661                                            self.node_secondary_ip,
8662                                            self.instance.disks,
8663                                            self.instance.name,
8664                                            False)
8665     for to_node, to_result in result.items():
8666       msg = to_result.fail_msg
8667       if msg:
8668         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8669                            to_node, msg,
8670                            hint=("please do a gnt-instance info to see the"
8671                                  " status of disks"))
8672     cstep = 5
8673     if self.early_release:
8674       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8675       cstep += 1
8676       self._RemoveOldStorage(self.target_node, iv_names)
8677       # WARNING: we release all node locks here, do not do other RPCs
8678       # than WaitForSync to the primary node
8679       self._ReleaseNodeLock([self.instance.primary_node,
8680                              self.target_node,
8681                              self.new_node])
8682
8683     # Wait for sync
8684     # This can fail as the old devices are degraded and _WaitForSync
8685     # does a combined result over all disks, so we don't check its return value
8686     self.lu.LogStep(cstep, steps_total, "Sync devices")
8687     cstep += 1
8688     _WaitForSync(self.lu, self.instance)
8689
8690     # Check all devices manually
8691     self._CheckDevices(self.instance.primary_node, iv_names)
8692
8693     # Step: remove old storage
8694     if not self.early_release:
8695       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8696       self._RemoveOldStorage(self.target_node, iv_names)
8697
8698
8699 class LURepairNodeStorage(NoHooksLU):
8700   """Repairs the volume group on a node.
8701
8702   """
8703   _OP_PARAMS = [
8704     _PNodeName,
8705     ("storage_type", ht.NoDefault, _CheckStorageType),
8706     ("name", ht.NoDefault, ht.TNonEmptyString),
8707     ("ignore_consistency", False, ht.TBool),
8708     ]
8709   REQ_BGL = False
8710
8711   def CheckArguments(self):
8712     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8713
8714     storage_type = self.op.storage_type
8715
8716     if (constants.SO_FIX_CONSISTENCY not in
8717         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8718       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8719                                  " repaired" % storage_type,
8720                                  errors.ECODE_INVAL)
8721
8722   def ExpandNames(self):
8723     self.needed_locks = {
8724       locking.LEVEL_NODE: [self.op.node_name],
8725       }
8726
8727   def _CheckFaultyDisks(self, instance, node_name):
8728     """Ensure faulty disks abort the opcode or at least warn."""
8729     try:
8730       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8731                                   node_name, True):
8732         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8733                                    " node '%s'" % (instance.name, node_name),
8734                                    errors.ECODE_STATE)
8735     except errors.OpPrereqError, err:
8736       if self.op.ignore_consistency:
8737         self.proc.LogWarning(str(err.args[0]))
8738       else:
8739         raise
8740
8741   def CheckPrereq(self):
8742     """Check prerequisites.
8743
8744     """
8745     # Check whether any instance on this node has faulty disks
8746     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8747       if not inst.admin_up:
8748         continue
8749       check_nodes = set(inst.all_nodes)
8750       check_nodes.discard(self.op.node_name)
8751       for inst_node_name in check_nodes:
8752         self._CheckFaultyDisks(inst, inst_node_name)
8753
8754   def Exec(self, feedback_fn):
8755     feedback_fn("Repairing storage unit '%s' on %s ..." %
8756                 (self.op.name, self.op.node_name))
8757
8758     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8759     result = self.rpc.call_storage_execute(self.op.node_name,
8760                                            self.op.storage_type, st_args,
8761                                            self.op.name,
8762                                            constants.SO_FIX_CONSISTENCY)
8763     result.Raise("Failed to repair storage unit '%s' on %s" %
8764                  (self.op.name, self.op.node_name))
8765
8766
8767 class LUNodeEvacuationStrategy(NoHooksLU):
8768   """Computes the node evacuation strategy.
8769
8770   """
8771   _OP_PARAMS = [
8772     ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8773     ("remote_node", None, ht.TMaybeString),
8774     ("iallocator", None, ht.TMaybeString),
8775     ]
8776   REQ_BGL = False
8777
8778   def CheckArguments(self):
8779     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8780
8781   def ExpandNames(self):
8782     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8783     self.needed_locks = locks = {}
8784     if self.op.remote_node is None:
8785       locks[locking.LEVEL_NODE] = locking.ALL_SET
8786     else:
8787       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8788       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8789
8790   def Exec(self, feedback_fn):
8791     if self.op.remote_node is not None:
8792       instances = []
8793       for node in self.op.nodes:
8794         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8795       result = []
8796       for i in instances:
8797         if i.primary_node == self.op.remote_node:
8798           raise errors.OpPrereqError("Node %s is the primary node of"
8799                                      " instance %s, cannot use it as"
8800                                      " secondary" %
8801                                      (self.op.remote_node, i.name),
8802                                      errors.ECODE_INVAL)
8803         result.append([i.name, self.op.remote_node])
8804     else:
8805       ial = IAllocator(self.cfg, self.rpc,
8806                        mode=constants.IALLOCATOR_MODE_MEVAC,
8807                        evac_nodes=self.op.nodes)
8808       ial.Run(self.op.iallocator, validate=True)
8809       if not ial.success:
8810         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8811                                  errors.ECODE_NORES)
8812       result = ial.result
8813     return result
8814
8815
8816 class LUGrowDisk(LogicalUnit):
8817   """Grow a disk of an instance.
8818
8819   """
8820   HPATH = "disk-grow"
8821   HTYPE = constants.HTYPE_INSTANCE
8822   _OP_PARAMS = [
8823     _PInstanceName,
8824     ("disk", ht.NoDefault, ht.TInt),
8825     ("amount", ht.NoDefault, ht.TInt),
8826     ("wait_for_sync", True, ht.TBool),
8827     ]
8828   REQ_BGL = False
8829
8830   def ExpandNames(self):
8831     self._ExpandAndLockInstance()
8832     self.needed_locks[locking.LEVEL_NODE] = []
8833     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8834
8835   def DeclareLocks(self, level):
8836     if level == locking.LEVEL_NODE:
8837       self._LockInstancesNodes()
8838
8839   def BuildHooksEnv(self):
8840     """Build hooks env.
8841
8842     This runs on the master, the primary and all the secondaries.
8843
8844     """
8845     env = {
8846       "DISK": self.op.disk,
8847       "AMOUNT": self.op.amount,
8848       }
8849     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8850     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8851     return env, nl, nl
8852
8853   def CheckPrereq(self):
8854     """Check prerequisites.
8855
8856     This checks that the instance is in the cluster.
8857
8858     """
8859     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8860     assert instance is not None, \
8861       "Cannot retrieve locked instance %s" % self.op.instance_name
8862     nodenames = list(instance.all_nodes)
8863     for node in nodenames:
8864       _CheckNodeOnline(self, node)
8865
8866     self.instance = instance
8867
8868     if instance.disk_template not in constants.DTS_GROWABLE:
8869       raise errors.OpPrereqError("Instance's disk layout does not support"
8870                                  " growing.", errors.ECODE_INVAL)
8871
8872     self.disk = instance.FindDisk(self.op.disk)
8873
8874     if instance.disk_template != constants.DT_FILE:
8875       # TODO: check the free disk space for file, when that feature
8876       # will be supported
8877       _CheckNodesFreeDiskPerVG(self, nodenames,
8878                                {self.disk.physical_id[0]: self.op.amount})
8879
8880   def Exec(self, feedback_fn):
8881     """Execute disk grow.
8882
8883     """
8884     instance = self.instance
8885     disk = self.disk
8886
8887     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8888     if not disks_ok:
8889       raise errors.OpExecError("Cannot activate block device to grow")
8890
8891     for node in instance.all_nodes:
8892       self.cfg.SetDiskID(disk, node)
8893       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8894       result.Raise("Grow request failed to node %s" % node)
8895
8896       # TODO: Rewrite code to work properly
8897       # DRBD goes into sync mode for a short amount of time after executing the
8898       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8899       # calling "resize" in sync mode fails. Sleeping for a short amount of
8900       # time is a work-around.
8901       time.sleep(5)
8902
8903     disk.RecordGrow(self.op.amount)
8904     self.cfg.Update(instance, feedback_fn)
8905     if self.op.wait_for_sync:
8906       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8907       if disk_abort:
8908         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8909                              " status.\nPlease check the instance.")
8910       if not instance.admin_up:
8911         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8912     elif not instance.admin_up:
8913       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8914                            " not supposed to be running because no wait for"
8915                            " sync mode was requested.")
8916
8917
8918 class LUQueryInstanceData(NoHooksLU):
8919   """Query runtime instance data.
8920
8921   """
8922   _OP_PARAMS = [
8923     ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8924     ("static", False, ht.TBool),
8925     ]
8926   REQ_BGL = False
8927
8928   def ExpandNames(self):
8929     self.needed_locks = {}
8930     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8931
8932     if self.op.instances:
8933       self.wanted_names = []
8934       for name in self.op.instances:
8935         full_name = _ExpandInstanceName(self.cfg, name)
8936         self.wanted_names.append(full_name)
8937       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8938     else:
8939       self.wanted_names = None
8940       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8941
8942     self.needed_locks[locking.LEVEL_NODE] = []
8943     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8944
8945   def DeclareLocks(self, level):
8946     if level == locking.LEVEL_NODE:
8947       self._LockInstancesNodes()
8948
8949   def CheckPrereq(self):
8950     """Check prerequisites.
8951
8952     This only checks the optional instance list against the existing names.
8953
8954     """
8955     if self.wanted_names is None:
8956       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8957
8958     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8959                              in self.wanted_names]
8960
8961   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8962     """Returns the status of a block device
8963
8964     """
8965     if self.op.static or not node:
8966       return None
8967
8968     self.cfg.SetDiskID(dev, node)
8969
8970     result = self.rpc.call_blockdev_find(node, dev)
8971     if result.offline:
8972       return None
8973
8974     result.Raise("Can't compute disk status for %s" % instance_name)
8975
8976     status = result.payload
8977     if status is None:
8978       return None
8979
8980     return (status.dev_path, status.major, status.minor,
8981             status.sync_percent, status.estimated_time,
8982             status.is_degraded, status.ldisk_status)
8983
8984   def _ComputeDiskStatus(self, instance, snode, dev):
8985     """Compute block device status.
8986
8987     """
8988     if dev.dev_type in constants.LDS_DRBD:
8989       # we change the snode then (otherwise we use the one passed in)
8990       if dev.logical_id[0] == instance.primary_node:
8991         snode = dev.logical_id[1]
8992       else:
8993         snode = dev.logical_id[0]
8994
8995     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8996                                               instance.name, dev)
8997     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8998
8999     if dev.children:
9000       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9001                       for child in dev.children]
9002     else:
9003       dev_children = []
9004
9005     data = {
9006       "iv_name": dev.iv_name,
9007       "dev_type": dev.dev_type,
9008       "logical_id": dev.logical_id,
9009       "physical_id": dev.physical_id,
9010       "pstatus": dev_pstatus,
9011       "sstatus": dev_sstatus,
9012       "children": dev_children,
9013       "mode": dev.mode,
9014       "size": dev.size,
9015       }
9016
9017     return data
9018
9019   def Exec(self, feedback_fn):
9020     """Gather and return data"""
9021     result = {}
9022
9023     cluster = self.cfg.GetClusterInfo()
9024
9025     for instance in self.wanted_instances:
9026       if not self.op.static:
9027         remote_info = self.rpc.call_instance_info(instance.primary_node,
9028                                                   instance.name,
9029                                                   instance.hypervisor)
9030         remote_info.Raise("Error checking node %s" % instance.primary_node)
9031         remote_info = remote_info.payload
9032         if remote_info and "state" in remote_info:
9033           remote_state = "up"
9034         else:
9035           remote_state = "down"
9036       else:
9037         remote_state = None
9038       if instance.admin_up:
9039         config_state = "up"
9040       else:
9041         config_state = "down"
9042
9043       disks = [self._ComputeDiskStatus(instance, None, device)
9044                for device in instance.disks]
9045
9046       idict = {
9047         "name": instance.name,
9048         "config_state": config_state,
9049         "run_state": remote_state,
9050         "pnode": instance.primary_node,
9051         "snodes": instance.secondary_nodes,
9052         "os": instance.os,
9053         # this happens to be the same format used for hooks
9054         "nics": _NICListToTuple(self, instance.nics),
9055         "disk_template": instance.disk_template,
9056         "disks": disks,
9057         "hypervisor": instance.hypervisor,
9058         "network_port": instance.network_port,
9059         "hv_instance": instance.hvparams,
9060         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9061         "be_instance": instance.beparams,
9062         "be_actual": cluster.FillBE(instance),
9063         "os_instance": instance.osparams,
9064         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9065         "serial_no": instance.serial_no,
9066         "mtime": instance.mtime,
9067         "ctime": instance.ctime,
9068         "uuid": instance.uuid,
9069         }
9070
9071       result[instance.name] = idict
9072
9073     return result
9074
9075
9076 class LUSetInstanceParams(LogicalUnit):
9077   """Modifies an instances's parameters.
9078
9079   """
9080   HPATH = "instance-modify"
9081   HTYPE = constants.HTYPE_INSTANCE
9082   _OP_PARAMS = [
9083     _PInstanceName,
9084     ("nics", ht.EmptyList, ht.TList),
9085     ("disks", ht.EmptyList, ht.TList),
9086     ("beparams", ht.EmptyDict, ht.TDict),
9087     ("hvparams", ht.EmptyDict, ht.TDict),
9088     ("disk_template", None, ht.TMaybeString),
9089     ("remote_node", None, ht.TMaybeString),
9090     ("os_name", None, ht.TMaybeString),
9091     ("force_variant", False, ht.TBool),
9092     ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
9093     _PForce,
9094     ]
9095   REQ_BGL = False
9096
9097   def CheckArguments(self):
9098     if not (self.op.nics or self.op.disks or self.op.disk_template or
9099             self.op.hvparams or self.op.beparams or self.op.os_name):
9100       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9101
9102     if self.op.hvparams:
9103       _CheckGlobalHvParams(self.op.hvparams)
9104
9105     # Disk validation
9106     disk_addremove = 0
9107     for disk_op, disk_dict in self.op.disks:
9108       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9109       if disk_op == constants.DDM_REMOVE:
9110         disk_addremove += 1
9111         continue
9112       elif disk_op == constants.DDM_ADD:
9113         disk_addremove += 1
9114       else:
9115         if not isinstance(disk_op, int):
9116           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9117         if not isinstance(disk_dict, dict):
9118           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9119           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9120
9121       if disk_op == constants.DDM_ADD:
9122         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9123         if mode not in constants.DISK_ACCESS_SET:
9124           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9125                                      errors.ECODE_INVAL)
9126         size = disk_dict.get('size', None)
9127         if size is None:
9128           raise errors.OpPrereqError("Required disk parameter size missing",
9129                                      errors.ECODE_INVAL)
9130         try:
9131           size = int(size)
9132         except (TypeError, ValueError), err:
9133           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9134                                      str(err), errors.ECODE_INVAL)
9135         disk_dict['size'] = size
9136       else:
9137         # modification of disk
9138         if 'size' in disk_dict:
9139           raise errors.OpPrereqError("Disk size change not possible, use"
9140                                      " grow-disk", errors.ECODE_INVAL)
9141
9142     if disk_addremove > 1:
9143       raise errors.OpPrereqError("Only one disk add or remove operation"
9144                                  " supported at a time", errors.ECODE_INVAL)
9145
9146     if self.op.disks and self.op.disk_template is not None:
9147       raise errors.OpPrereqError("Disk template conversion and other disk"
9148                                  " changes not supported at the same time",
9149                                  errors.ECODE_INVAL)
9150
9151     if self.op.disk_template:
9152       _CheckDiskTemplate(self.op.disk_template)
9153       if (self.op.disk_template in constants.DTS_NET_MIRROR and
9154           self.op.remote_node is None):
9155         raise errors.OpPrereqError("Changing the disk template to a mirrored"
9156                                    " one requires specifying a secondary node",
9157                                    errors.ECODE_INVAL)
9158
9159     # NIC validation
9160     nic_addremove = 0
9161     for nic_op, nic_dict in self.op.nics:
9162       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9163       if nic_op == constants.DDM_REMOVE:
9164         nic_addremove += 1
9165         continue
9166       elif nic_op == constants.DDM_ADD:
9167         nic_addremove += 1
9168       else:
9169         if not isinstance(nic_op, int):
9170           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9171         if not isinstance(nic_dict, dict):
9172           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9173           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9174
9175       # nic_dict should be a dict
9176       nic_ip = nic_dict.get('ip', None)
9177       if nic_ip is not None:
9178         if nic_ip.lower() == constants.VALUE_NONE:
9179           nic_dict['ip'] = None
9180         else:
9181           if not netutils.IPAddress.IsValid(nic_ip):
9182             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9183                                        errors.ECODE_INVAL)
9184
9185       nic_bridge = nic_dict.get('bridge', None)
9186       nic_link = nic_dict.get('link', None)
9187       if nic_bridge and nic_link:
9188         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9189                                    " at the same time", errors.ECODE_INVAL)
9190       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9191         nic_dict['bridge'] = None
9192       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9193         nic_dict['link'] = None
9194
9195       if nic_op == constants.DDM_ADD:
9196         nic_mac = nic_dict.get('mac', None)
9197         if nic_mac is None:
9198           nic_dict['mac'] = constants.VALUE_AUTO
9199
9200       if 'mac' in nic_dict:
9201         nic_mac = nic_dict['mac']
9202         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9203           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9204
9205         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9206           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9207                                      " modifying an existing nic",
9208                                      errors.ECODE_INVAL)
9209
9210     if nic_addremove > 1:
9211       raise errors.OpPrereqError("Only one NIC add or remove operation"
9212                                  " supported at a time", errors.ECODE_INVAL)
9213
9214   def ExpandNames(self):
9215     self._ExpandAndLockInstance()
9216     self.needed_locks[locking.LEVEL_NODE] = []
9217     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9218
9219   def DeclareLocks(self, level):
9220     if level == locking.LEVEL_NODE:
9221       self._LockInstancesNodes()
9222       if self.op.disk_template and self.op.remote_node:
9223         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9224         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9225
9226   def BuildHooksEnv(self):
9227     """Build hooks env.
9228
9229     This runs on the master, primary and secondaries.
9230
9231     """
9232     args = dict()
9233     if constants.BE_MEMORY in self.be_new:
9234       args['memory'] = self.be_new[constants.BE_MEMORY]
9235     if constants.BE_VCPUS in self.be_new:
9236       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9237     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9238     # information at all.
9239     if self.op.nics:
9240       args['nics'] = []
9241       nic_override = dict(self.op.nics)
9242       for idx, nic in enumerate(self.instance.nics):
9243         if idx in nic_override:
9244           this_nic_override = nic_override[idx]
9245         else:
9246           this_nic_override = {}
9247         if 'ip' in this_nic_override:
9248           ip = this_nic_override['ip']
9249         else:
9250           ip = nic.ip
9251         if 'mac' in this_nic_override:
9252           mac = this_nic_override['mac']
9253         else:
9254           mac = nic.mac
9255         if idx in self.nic_pnew:
9256           nicparams = self.nic_pnew[idx]
9257         else:
9258           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9259         mode = nicparams[constants.NIC_MODE]
9260         link = nicparams[constants.NIC_LINK]
9261         args['nics'].append((ip, mac, mode, link))
9262       if constants.DDM_ADD in nic_override:
9263         ip = nic_override[constants.DDM_ADD].get('ip', None)
9264         mac = nic_override[constants.DDM_ADD]['mac']
9265         nicparams = self.nic_pnew[constants.DDM_ADD]
9266         mode = nicparams[constants.NIC_MODE]
9267         link = nicparams[constants.NIC_LINK]
9268         args['nics'].append((ip, mac, mode, link))
9269       elif constants.DDM_REMOVE in nic_override:
9270         del args['nics'][-1]
9271
9272     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9273     if self.op.disk_template:
9274       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9275     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9276     return env, nl, nl
9277
9278   def CheckPrereq(self):
9279     """Check prerequisites.
9280
9281     This only checks the instance list against the existing names.
9282
9283     """
9284     # checking the new params on the primary/secondary nodes
9285
9286     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9287     cluster = self.cluster = self.cfg.GetClusterInfo()
9288     assert self.instance is not None, \
9289       "Cannot retrieve locked instance %s" % self.op.instance_name
9290     pnode = instance.primary_node
9291     nodelist = list(instance.all_nodes)
9292
9293     # OS change
9294     if self.op.os_name and not self.op.force:
9295       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9296                       self.op.force_variant)
9297       instance_os = self.op.os_name
9298     else:
9299       instance_os = instance.os
9300
9301     if self.op.disk_template:
9302       if instance.disk_template == self.op.disk_template:
9303         raise errors.OpPrereqError("Instance already has disk template %s" %
9304                                    instance.disk_template, errors.ECODE_INVAL)
9305
9306       if (instance.disk_template,
9307           self.op.disk_template) not in self._DISK_CONVERSIONS:
9308         raise errors.OpPrereqError("Unsupported disk template conversion from"
9309                                    " %s to %s" % (instance.disk_template,
9310                                                   self.op.disk_template),
9311                                    errors.ECODE_INVAL)
9312       _CheckInstanceDown(self, instance, "cannot change disk template")
9313       if self.op.disk_template in constants.DTS_NET_MIRROR:
9314         if self.op.remote_node == pnode:
9315           raise errors.OpPrereqError("Given new secondary node %s is the same"
9316                                      " as the primary node of the instance" %
9317                                      self.op.remote_node, errors.ECODE_STATE)
9318         _CheckNodeOnline(self, self.op.remote_node)
9319         _CheckNodeNotDrained(self, self.op.remote_node)
9320         # FIXME: here we assume that the old instance type is DT_PLAIN
9321         assert instance.disk_template == constants.DT_PLAIN
9322         disks = [{"size": d.size, "vg": d.logical_id[0]}
9323                  for d in instance.disks]
9324         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9325         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9326
9327     # hvparams processing
9328     if self.op.hvparams:
9329       hv_type = instance.hypervisor
9330       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9331       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9332       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9333
9334       # local check
9335       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9336       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9337       self.hv_new = hv_new # the new actual values
9338       self.hv_inst = i_hvdict # the new dict (without defaults)
9339     else:
9340       self.hv_new = self.hv_inst = {}
9341
9342     # beparams processing
9343     if self.op.beparams:
9344       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9345                                    use_none=True)
9346       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9347       be_new = cluster.SimpleFillBE(i_bedict)
9348       self.be_new = be_new # the new actual values
9349       self.be_inst = i_bedict # the new dict (without defaults)
9350     else:
9351       self.be_new = self.be_inst = {}
9352
9353     # osparams processing
9354     if self.op.osparams:
9355       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9356       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9357       self.os_inst = i_osdict # the new dict (without defaults)
9358     else:
9359       self.os_inst = {}
9360
9361     self.warn = []
9362
9363     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9364       mem_check_list = [pnode]
9365       if be_new[constants.BE_AUTO_BALANCE]:
9366         # either we changed auto_balance to yes or it was from before
9367         mem_check_list.extend(instance.secondary_nodes)
9368       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9369                                                   instance.hypervisor)
9370       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9371                                          instance.hypervisor)
9372       pninfo = nodeinfo[pnode]
9373       msg = pninfo.fail_msg
9374       if msg:
9375         # Assume the primary node is unreachable and go ahead
9376         self.warn.append("Can't get info from primary node %s: %s" %
9377                          (pnode,  msg))
9378       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9379         self.warn.append("Node data from primary node %s doesn't contain"
9380                          " free memory information" % pnode)
9381       elif instance_info.fail_msg:
9382         self.warn.append("Can't get instance runtime information: %s" %
9383                         instance_info.fail_msg)
9384       else:
9385         if instance_info.payload:
9386           current_mem = int(instance_info.payload['memory'])
9387         else:
9388           # Assume instance not running
9389           # (there is a slight race condition here, but it's not very probable,
9390           # and we have no other way to check)
9391           current_mem = 0
9392         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9393                     pninfo.payload['memory_free'])
9394         if miss_mem > 0:
9395           raise errors.OpPrereqError("This change will prevent the instance"
9396                                      " from starting, due to %d MB of memory"
9397                                      " missing on its primary node" % miss_mem,
9398                                      errors.ECODE_NORES)
9399
9400       if be_new[constants.BE_AUTO_BALANCE]:
9401         for node, nres in nodeinfo.items():
9402           if node not in instance.secondary_nodes:
9403             continue
9404           msg = nres.fail_msg
9405           if msg:
9406             self.warn.append("Can't get info from secondary node %s: %s" %
9407                              (node, msg))
9408           elif not isinstance(nres.payload.get('memory_free', None), int):
9409             self.warn.append("Secondary node %s didn't return free"
9410                              " memory information" % node)
9411           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9412             self.warn.append("Not enough memory to failover instance to"
9413                              " secondary node %s" % node)
9414
9415     # NIC processing
9416     self.nic_pnew = {}
9417     self.nic_pinst = {}
9418     for nic_op, nic_dict in self.op.nics:
9419       if nic_op == constants.DDM_REMOVE:
9420         if not instance.nics:
9421           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9422                                      errors.ECODE_INVAL)
9423         continue
9424       if nic_op != constants.DDM_ADD:
9425         # an existing nic
9426         if not instance.nics:
9427           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9428                                      " no NICs" % nic_op,
9429                                      errors.ECODE_INVAL)
9430         if nic_op < 0 or nic_op >= len(instance.nics):
9431           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9432                                      " are 0 to %d" %
9433                                      (nic_op, len(instance.nics) - 1),
9434                                      errors.ECODE_INVAL)
9435         old_nic_params = instance.nics[nic_op].nicparams
9436         old_nic_ip = instance.nics[nic_op].ip
9437       else:
9438         old_nic_params = {}
9439         old_nic_ip = None
9440
9441       update_params_dict = dict([(key, nic_dict[key])
9442                                  for key in constants.NICS_PARAMETERS
9443                                  if key in nic_dict])
9444
9445       if 'bridge' in nic_dict:
9446         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9447
9448       new_nic_params = _GetUpdatedParams(old_nic_params,
9449                                          update_params_dict)
9450       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9451       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9452       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9453       self.nic_pinst[nic_op] = new_nic_params
9454       self.nic_pnew[nic_op] = new_filled_nic_params
9455       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9456
9457       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9458         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9459         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9460         if msg:
9461           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9462           if self.op.force:
9463             self.warn.append(msg)
9464           else:
9465             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9466       if new_nic_mode == constants.NIC_MODE_ROUTED:
9467         if 'ip' in nic_dict:
9468           nic_ip = nic_dict['ip']
9469         else:
9470           nic_ip = old_nic_ip
9471         if nic_ip is None:
9472           raise errors.OpPrereqError('Cannot set the nic ip to None'
9473                                      ' on a routed nic', errors.ECODE_INVAL)
9474       if 'mac' in nic_dict:
9475         nic_mac = nic_dict['mac']
9476         if nic_mac is None:
9477           raise errors.OpPrereqError('Cannot set the nic mac to None',
9478                                      errors.ECODE_INVAL)
9479         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9480           # otherwise generate the mac
9481           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9482         else:
9483           # or validate/reserve the current one
9484           try:
9485             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9486           except errors.ReservationError:
9487             raise errors.OpPrereqError("MAC address %s already in use"
9488                                        " in cluster" % nic_mac,
9489                                        errors.ECODE_NOTUNIQUE)
9490
9491     # DISK processing
9492     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9493       raise errors.OpPrereqError("Disk operations not supported for"
9494                                  " diskless instances",
9495                                  errors.ECODE_INVAL)
9496     for disk_op, _ in self.op.disks:
9497       if disk_op == constants.DDM_REMOVE:
9498         if len(instance.disks) == 1:
9499           raise errors.OpPrereqError("Cannot remove the last disk of"
9500                                      " an instance", errors.ECODE_INVAL)
9501         _CheckInstanceDown(self, instance, "cannot remove disks")
9502
9503       if (disk_op == constants.DDM_ADD and
9504           len(instance.nics) >= constants.MAX_DISKS):
9505         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9506                                    " add more" % constants.MAX_DISKS,
9507                                    errors.ECODE_STATE)
9508       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9509         # an existing disk
9510         if disk_op < 0 or disk_op >= len(instance.disks):
9511           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9512                                      " are 0 to %d" %
9513                                      (disk_op, len(instance.disks)),
9514                                      errors.ECODE_INVAL)
9515
9516     return
9517
9518   def _ConvertPlainToDrbd(self, feedback_fn):
9519     """Converts an instance from plain to drbd.
9520
9521     """
9522     feedback_fn("Converting template to drbd")
9523     instance = self.instance
9524     pnode = instance.primary_node
9525     snode = self.op.remote_node
9526
9527     # create a fake disk info for _GenerateDiskTemplate
9528     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9529     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9530                                       instance.name, pnode, [snode],
9531                                       disk_info, None, None, 0, feedback_fn)
9532     info = _GetInstanceInfoText(instance)
9533     feedback_fn("Creating aditional volumes...")
9534     # first, create the missing data and meta devices
9535     for disk in new_disks:
9536       # unfortunately this is... not too nice
9537       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9538                             info, True)
9539       for child in disk.children:
9540         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9541     # at this stage, all new LVs have been created, we can rename the
9542     # old ones
9543     feedback_fn("Renaming original volumes...")
9544     rename_list = [(o, n.children[0].logical_id)
9545                    for (o, n) in zip(instance.disks, new_disks)]
9546     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9547     result.Raise("Failed to rename original LVs")
9548
9549     feedback_fn("Initializing DRBD devices...")
9550     # all child devices are in place, we can now create the DRBD devices
9551     for disk in new_disks:
9552       for node in [pnode, snode]:
9553         f_create = node == pnode
9554         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9555
9556     # at this point, the instance has been modified
9557     instance.disk_template = constants.DT_DRBD8
9558     instance.disks = new_disks
9559     self.cfg.Update(instance, feedback_fn)
9560
9561     # disks are created, waiting for sync
9562     disk_abort = not _WaitForSync(self, instance)
9563     if disk_abort:
9564       raise errors.OpExecError("There are some degraded disks for"
9565                                " this instance, please cleanup manually")
9566
9567   def _ConvertDrbdToPlain(self, feedback_fn):
9568     """Converts an instance from drbd to plain.
9569
9570     """
9571     instance = self.instance
9572     assert len(instance.secondary_nodes) == 1
9573     pnode = instance.primary_node
9574     snode = instance.secondary_nodes[0]
9575     feedback_fn("Converting template to plain")
9576
9577     old_disks = instance.disks
9578     new_disks = [d.children[0] for d in old_disks]
9579
9580     # copy over size and mode
9581     for parent, child in zip(old_disks, new_disks):
9582       child.size = parent.size
9583       child.mode = parent.mode
9584
9585     # update instance structure
9586     instance.disks = new_disks
9587     instance.disk_template = constants.DT_PLAIN
9588     self.cfg.Update(instance, feedback_fn)
9589
9590     feedback_fn("Removing volumes on the secondary node...")
9591     for disk in old_disks:
9592       self.cfg.SetDiskID(disk, snode)
9593       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9594       if msg:
9595         self.LogWarning("Could not remove block device %s on node %s,"
9596                         " continuing anyway: %s", disk.iv_name, snode, msg)
9597
9598     feedback_fn("Removing unneeded volumes on the primary node...")
9599     for idx, disk in enumerate(old_disks):
9600       meta = disk.children[1]
9601       self.cfg.SetDiskID(meta, pnode)
9602       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9603       if msg:
9604         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9605                         " continuing anyway: %s", idx, pnode, msg)
9606
9607   def Exec(self, feedback_fn):
9608     """Modifies an instance.
9609
9610     All parameters take effect only at the next restart of the instance.
9611
9612     """
9613     # Process here the warnings from CheckPrereq, as we don't have a
9614     # feedback_fn there.
9615     for warn in self.warn:
9616       feedback_fn("WARNING: %s" % warn)
9617
9618     result = []
9619     instance = self.instance
9620     # disk changes
9621     for disk_op, disk_dict in self.op.disks:
9622       if disk_op == constants.DDM_REMOVE:
9623         # remove the last disk
9624         device = instance.disks.pop()
9625         device_idx = len(instance.disks)
9626         for node, disk in device.ComputeNodeTree(instance.primary_node):
9627           self.cfg.SetDiskID(disk, node)
9628           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9629           if msg:
9630             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9631                             " continuing anyway", device_idx, node, msg)
9632         result.append(("disk/%d" % device_idx, "remove"))
9633       elif disk_op == constants.DDM_ADD:
9634         # add a new disk
9635         if instance.disk_template == constants.DT_FILE:
9636           file_driver, file_path = instance.disks[0].logical_id
9637           file_path = os.path.dirname(file_path)
9638         else:
9639           file_driver = file_path = None
9640         disk_idx_base = len(instance.disks)
9641         new_disk = _GenerateDiskTemplate(self,
9642                                          instance.disk_template,
9643                                          instance.name, instance.primary_node,
9644                                          instance.secondary_nodes,
9645                                          [disk_dict],
9646                                          file_path,
9647                                          file_driver,
9648                                          disk_idx_base, feedback_fn)[0]
9649         instance.disks.append(new_disk)
9650         info = _GetInstanceInfoText(instance)
9651
9652         logging.info("Creating volume %s for instance %s",
9653                      new_disk.iv_name, instance.name)
9654         # Note: this needs to be kept in sync with _CreateDisks
9655         #HARDCODE
9656         for node in instance.all_nodes:
9657           f_create = node == instance.primary_node
9658           try:
9659             _CreateBlockDev(self, node, instance, new_disk,
9660                             f_create, info, f_create)
9661           except errors.OpExecError, err:
9662             self.LogWarning("Failed to create volume %s (%s) on"
9663                             " node %s: %s",
9664                             new_disk.iv_name, new_disk, node, err)
9665         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9666                        (new_disk.size, new_disk.mode)))
9667       else:
9668         # change a given disk
9669         instance.disks[disk_op].mode = disk_dict['mode']
9670         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9671
9672     if self.op.disk_template:
9673       r_shut = _ShutdownInstanceDisks(self, instance)
9674       if not r_shut:
9675         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9676                                  " proceed with disk template conversion")
9677       mode = (instance.disk_template, self.op.disk_template)
9678       try:
9679         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9680       except:
9681         self.cfg.ReleaseDRBDMinors(instance.name)
9682         raise
9683       result.append(("disk_template", self.op.disk_template))
9684
9685     # NIC changes
9686     for nic_op, nic_dict in self.op.nics:
9687       if nic_op == constants.DDM_REMOVE:
9688         # remove the last nic
9689         del instance.nics[-1]
9690         result.append(("nic.%d" % len(instance.nics), "remove"))
9691       elif nic_op == constants.DDM_ADD:
9692         # mac and bridge should be set, by now
9693         mac = nic_dict['mac']
9694         ip = nic_dict.get('ip', None)
9695         nicparams = self.nic_pinst[constants.DDM_ADD]
9696         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9697         instance.nics.append(new_nic)
9698         result.append(("nic.%d" % (len(instance.nics) - 1),
9699                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9700                        (new_nic.mac, new_nic.ip,
9701                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9702                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9703                        )))
9704       else:
9705         for key in 'mac', 'ip':
9706           if key in nic_dict:
9707             setattr(instance.nics[nic_op], key, nic_dict[key])
9708         if nic_op in self.nic_pinst:
9709           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9710         for key, val in nic_dict.iteritems():
9711           result.append(("nic.%s/%d" % (key, nic_op), val))
9712
9713     # hvparams changes
9714     if self.op.hvparams:
9715       instance.hvparams = self.hv_inst
9716       for key, val in self.op.hvparams.iteritems():
9717         result.append(("hv/%s" % key, val))
9718
9719     # beparams changes
9720     if self.op.beparams:
9721       instance.beparams = self.be_inst
9722       for key, val in self.op.beparams.iteritems():
9723         result.append(("be/%s" % key, val))
9724
9725     # OS change
9726     if self.op.os_name:
9727       instance.os = self.op.os_name
9728
9729     # osparams changes
9730     if self.op.osparams:
9731       instance.osparams = self.os_inst
9732       for key, val in self.op.osparams.iteritems():
9733         result.append(("os/%s" % key, val))
9734
9735     self.cfg.Update(instance, feedback_fn)
9736
9737     return result
9738
9739   _DISK_CONVERSIONS = {
9740     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9741     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9742     }
9743
9744
9745 class LUQueryExports(NoHooksLU):
9746   """Query the exports list
9747
9748   """
9749   _OP_PARAMS = [
9750     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9751     ("use_locking", False, ht.TBool),
9752     ]
9753   REQ_BGL = False
9754
9755   def ExpandNames(self):
9756     self.needed_locks = {}
9757     self.share_locks[locking.LEVEL_NODE] = 1
9758     if not self.op.nodes:
9759       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9760     else:
9761       self.needed_locks[locking.LEVEL_NODE] = \
9762         _GetWantedNodes(self, self.op.nodes)
9763
9764   def Exec(self, feedback_fn):
9765     """Compute the list of all the exported system images.
9766
9767     @rtype: dict
9768     @return: a dictionary with the structure node->(export-list)
9769         where export-list is a list of the instances exported on
9770         that node.
9771
9772     """
9773     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9774     rpcresult = self.rpc.call_export_list(self.nodes)
9775     result = {}
9776     for node in rpcresult:
9777       if rpcresult[node].fail_msg:
9778         result[node] = False
9779       else:
9780         result[node] = rpcresult[node].payload
9781
9782     return result
9783
9784
9785 class LUPrepareExport(NoHooksLU):
9786   """Prepares an instance for an export and returns useful information.
9787
9788   """
9789   _OP_PARAMS = [
9790     _PInstanceName,
9791     ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9792     ]
9793   REQ_BGL = False
9794
9795   def ExpandNames(self):
9796     self._ExpandAndLockInstance()
9797
9798   def CheckPrereq(self):
9799     """Check prerequisites.
9800
9801     """
9802     instance_name = self.op.instance_name
9803
9804     self.instance = self.cfg.GetInstanceInfo(instance_name)
9805     assert self.instance is not None, \
9806           "Cannot retrieve locked instance %s" % self.op.instance_name
9807     _CheckNodeOnline(self, self.instance.primary_node)
9808
9809     self._cds = _GetClusterDomainSecret()
9810
9811   def Exec(self, feedback_fn):
9812     """Prepares an instance for an export.
9813
9814     """
9815     instance = self.instance
9816
9817     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9818       salt = utils.GenerateSecret(8)
9819
9820       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9821       result = self.rpc.call_x509_cert_create(instance.primary_node,
9822                                               constants.RIE_CERT_VALIDITY)
9823       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9824
9825       (name, cert_pem) = result.payload
9826
9827       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9828                                              cert_pem)
9829
9830       return {
9831         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9832         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9833                           salt),
9834         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9835         }
9836
9837     return None
9838
9839
9840 class LUExportInstance(LogicalUnit):
9841   """Export an instance to an image in the cluster.
9842
9843   """
9844   HPATH = "instance-export"
9845   HTYPE = constants.HTYPE_INSTANCE
9846   _OP_PARAMS = [
9847     _PInstanceName,
9848     ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9849     ("shutdown", True, ht.TBool),
9850     _PShutdownTimeout,
9851     ("remove_instance", False, ht.TBool),
9852     ("ignore_remove_failures", False, ht.TBool),
9853     ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9854     ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9855     ("destination_x509_ca", None, ht.TMaybeString),
9856     ]
9857   REQ_BGL = False
9858
9859   def CheckArguments(self):
9860     """Check the arguments.
9861
9862     """
9863     self.x509_key_name = self.op.x509_key_name
9864     self.dest_x509_ca_pem = self.op.destination_x509_ca
9865
9866     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9867       if not self.x509_key_name:
9868         raise errors.OpPrereqError("Missing X509 key name for encryption",
9869                                    errors.ECODE_INVAL)
9870
9871       if not self.dest_x509_ca_pem:
9872         raise errors.OpPrereqError("Missing destination X509 CA",
9873                                    errors.ECODE_INVAL)
9874
9875   def ExpandNames(self):
9876     self._ExpandAndLockInstance()
9877
9878     # Lock all nodes for local exports
9879     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9880       # FIXME: lock only instance primary and destination node
9881       #
9882       # Sad but true, for now we have do lock all nodes, as we don't know where
9883       # the previous export might be, and in this LU we search for it and
9884       # remove it from its current node. In the future we could fix this by:
9885       #  - making a tasklet to search (share-lock all), then create the
9886       #    new one, then one to remove, after
9887       #  - removing the removal operation altogether
9888       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9889
9890   def DeclareLocks(self, level):
9891     """Last minute lock declaration."""
9892     # All nodes are locked anyway, so nothing to do here.
9893
9894   def BuildHooksEnv(self):
9895     """Build hooks env.
9896
9897     This will run on the master, primary node and target node.
9898
9899     """
9900     env = {
9901       "EXPORT_MODE": self.op.mode,
9902       "EXPORT_NODE": self.op.target_node,
9903       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9904       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9905       # TODO: Generic function for boolean env variables
9906       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9907       }
9908
9909     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9910
9911     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9912
9913     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9914       nl.append(self.op.target_node)
9915
9916     return env, nl, nl
9917
9918   def CheckPrereq(self):
9919     """Check prerequisites.
9920
9921     This checks that the instance and node names are valid.
9922
9923     """
9924     instance_name = self.op.instance_name
9925
9926     self.instance = self.cfg.GetInstanceInfo(instance_name)
9927     assert self.instance is not None, \
9928           "Cannot retrieve locked instance %s" % self.op.instance_name
9929     _CheckNodeOnline(self, self.instance.primary_node)
9930
9931     if (self.op.remove_instance and self.instance.admin_up and
9932         not self.op.shutdown):
9933       raise errors.OpPrereqError("Can not remove instance without shutting it"
9934                                  " down before")
9935
9936     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9937       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9938       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9939       assert self.dst_node is not None
9940
9941       _CheckNodeOnline(self, self.dst_node.name)
9942       _CheckNodeNotDrained(self, self.dst_node.name)
9943
9944       self._cds = None
9945       self.dest_disk_info = None
9946       self.dest_x509_ca = None
9947
9948     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9949       self.dst_node = None
9950
9951       if len(self.op.target_node) != len(self.instance.disks):
9952         raise errors.OpPrereqError(("Received destination information for %s"
9953                                     " disks, but instance %s has %s disks") %
9954                                    (len(self.op.target_node), instance_name,
9955                                     len(self.instance.disks)),
9956                                    errors.ECODE_INVAL)
9957
9958       cds = _GetClusterDomainSecret()
9959
9960       # Check X509 key name
9961       try:
9962         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9963       except (TypeError, ValueError), err:
9964         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9965
9966       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9967         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9968                                    errors.ECODE_INVAL)
9969
9970       # Load and verify CA
9971       try:
9972         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9973       except OpenSSL.crypto.Error, err:
9974         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9975                                    (err, ), errors.ECODE_INVAL)
9976
9977       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9978       if errcode is not None:
9979         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9980                                    (msg, ), errors.ECODE_INVAL)
9981
9982       self.dest_x509_ca = cert
9983
9984       # Verify target information
9985       disk_info = []
9986       for idx, disk_data in enumerate(self.op.target_node):
9987         try:
9988           (host, port, magic) = \
9989             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9990         except errors.GenericError, err:
9991           raise errors.OpPrereqError("Target info for disk %s: %s" %
9992                                      (idx, err), errors.ECODE_INVAL)
9993
9994         disk_info.append((host, port, magic))
9995
9996       assert len(disk_info) == len(self.op.target_node)
9997       self.dest_disk_info = disk_info
9998
9999     else:
10000       raise errors.ProgrammerError("Unhandled export mode %r" %
10001                                    self.op.mode)
10002
10003     # instance disk type verification
10004     # TODO: Implement export support for file-based disks
10005     for disk in self.instance.disks:
10006       if disk.dev_type == constants.LD_FILE:
10007         raise errors.OpPrereqError("Export not supported for instances with"
10008                                    " file-based disks", errors.ECODE_INVAL)
10009
10010   def _CleanupExports(self, feedback_fn):
10011     """Removes exports of current instance from all other nodes.
10012
10013     If an instance in a cluster with nodes A..D was exported to node C, its
10014     exports will be removed from the nodes A, B and D.
10015
10016     """
10017     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10018
10019     nodelist = self.cfg.GetNodeList()
10020     nodelist.remove(self.dst_node.name)
10021
10022     # on one-node clusters nodelist will be empty after the removal
10023     # if we proceed the backup would be removed because OpQueryExports
10024     # substitutes an empty list with the full cluster node list.
10025     iname = self.instance.name
10026     if nodelist:
10027       feedback_fn("Removing old exports for instance %s" % iname)
10028       exportlist = self.rpc.call_export_list(nodelist)
10029       for node in exportlist:
10030         if exportlist[node].fail_msg:
10031           continue
10032         if iname in exportlist[node].payload:
10033           msg = self.rpc.call_export_remove(node, iname).fail_msg
10034           if msg:
10035             self.LogWarning("Could not remove older export for instance %s"
10036                             " on node %s: %s", iname, node, msg)
10037
10038   def Exec(self, feedback_fn):
10039     """Export an instance to an image in the cluster.
10040
10041     """
10042     assert self.op.mode in constants.EXPORT_MODES
10043
10044     instance = self.instance
10045     src_node = instance.primary_node
10046
10047     if self.op.shutdown:
10048       # shutdown the instance, but not the disks
10049       feedback_fn("Shutting down instance %s" % instance.name)
10050       result = self.rpc.call_instance_shutdown(src_node, instance,
10051                                                self.op.shutdown_timeout)
10052       # TODO: Maybe ignore failures if ignore_remove_failures is set
10053       result.Raise("Could not shutdown instance %s on"
10054                    " node %s" % (instance.name, src_node))
10055
10056     # set the disks ID correctly since call_instance_start needs the
10057     # correct drbd minor to create the symlinks
10058     for disk in instance.disks:
10059       self.cfg.SetDiskID(disk, src_node)
10060
10061     activate_disks = (not instance.admin_up)
10062
10063     if activate_disks:
10064       # Activate the instance disks if we'exporting a stopped instance
10065       feedback_fn("Activating disks for %s" % instance.name)
10066       _StartInstanceDisks(self, instance, None)
10067
10068     try:
10069       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10070                                                      instance)
10071
10072       helper.CreateSnapshots()
10073       try:
10074         if (self.op.shutdown and instance.admin_up and
10075             not self.op.remove_instance):
10076           assert not activate_disks
10077           feedback_fn("Starting instance %s" % instance.name)
10078           result = self.rpc.call_instance_start(src_node, instance, None, None)
10079           msg = result.fail_msg
10080           if msg:
10081             feedback_fn("Failed to start instance: %s" % msg)
10082             _ShutdownInstanceDisks(self, instance)
10083             raise errors.OpExecError("Could not start instance: %s" % msg)
10084
10085         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10086           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10087         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10088           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10089           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10090
10091           (key_name, _, _) = self.x509_key_name
10092
10093           dest_ca_pem = \
10094             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10095                                             self.dest_x509_ca)
10096
10097           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10098                                                      key_name, dest_ca_pem,
10099                                                      timeouts)
10100       finally:
10101         helper.Cleanup()
10102
10103       # Check for backwards compatibility
10104       assert len(dresults) == len(instance.disks)
10105       assert compat.all(isinstance(i, bool) for i in dresults), \
10106              "Not all results are boolean: %r" % dresults
10107
10108     finally:
10109       if activate_disks:
10110         feedback_fn("Deactivating disks for %s" % instance.name)
10111         _ShutdownInstanceDisks(self, instance)
10112
10113     if not (compat.all(dresults) and fin_resu):
10114       failures = []
10115       if not fin_resu:
10116         failures.append("export finalization")
10117       if not compat.all(dresults):
10118         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10119                                if not dsk)
10120         failures.append("disk export: disk(s) %s" % fdsk)
10121
10122       raise errors.OpExecError("Export failed, errors in %s" %
10123                                utils.CommaJoin(failures))
10124
10125     # At this point, the export was successful, we can cleanup/finish
10126
10127     # Remove instance if requested
10128     if self.op.remove_instance:
10129       feedback_fn("Removing instance %s" % instance.name)
10130       _RemoveInstance(self, feedback_fn, instance,
10131                       self.op.ignore_remove_failures)
10132
10133     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10134       self._CleanupExports(feedback_fn)
10135
10136     return fin_resu, dresults
10137
10138
10139 class LURemoveExport(NoHooksLU):
10140   """Remove exports related to the named instance.
10141
10142   """
10143   _OP_PARAMS = [
10144     _PInstanceName,
10145     ]
10146   REQ_BGL = False
10147
10148   def ExpandNames(self):
10149     self.needed_locks = {}
10150     # We need all nodes to be locked in order for RemoveExport to work, but we
10151     # don't need to lock the instance itself, as nothing will happen to it (and
10152     # we can remove exports also for a removed instance)
10153     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10154
10155   def Exec(self, feedback_fn):
10156     """Remove any export.
10157
10158     """
10159     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10160     # If the instance was not found we'll try with the name that was passed in.
10161     # This will only work if it was an FQDN, though.
10162     fqdn_warn = False
10163     if not instance_name:
10164       fqdn_warn = True
10165       instance_name = self.op.instance_name
10166
10167     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10168     exportlist = self.rpc.call_export_list(locked_nodes)
10169     found = False
10170     for node in exportlist:
10171       msg = exportlist[node].fail_msg
10172       if msg:
10173         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10174         continue
10175       if instance_name in exportlist[node].payload:
10176         found = True
10177         result = self.rpc.call_export_remove(node, instance_name)
10178         msg = result.fail_msg
10179         if msg:
10180           logging.error("Could not remove export for instance %s"
10181                         " on node %s: %s", instance_name, node, msg)
10182
10183     if fqdn_warn and not found:
10184       feedback_fn("Export not found. If trying to remove an export belonging"
10185                   " to a deleted instance please use its Fully Qualified"
10186                   " Domain Name.")
10187
10188
10189 class LUAddGroup(LogicalUnit):
10190   """Logical unit for creating node groups.
10191
10192   """
10193   HPATH = "group-add"
10194   HTYPE = constants.HTYPE_GROUP
10195
10196   _OP_PARAMS = [
10197     _PGroupName,
10198     ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
10199     ("alloc_policy", None, ht.TOr(ht.TNone,
10200                                   ht.TElemOf(constants.VALID_ALLOC_POLICIES))),
10201     ]
10202
10203   REQ_BGL = False
10204
10205   def ExpandNames(self):
10206     # We need the new group's UUID here so that we can create and acquire the
10207     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10208     # that it should not check whether the UUID exists in the configuration.
10209     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10210     self.needed_locks = {}
10211     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10212
10213   def CheckPrereq(self):
10214     """Check prerequisites.
10215
10216     This checks that the given group name is not an existing node group
10217     already.
10218
10219     """
10220     try:
10221       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10222     except errors.OpPrereqError:
10223       pass
10224     else:
10225       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10226                                  " node group (UUID: %s)" %
10227                                  (self.op.group_name, existing_uuid),
10228                                  errors.ECODE_EXISTS)
10229
10230     if self.op.ndparams:
10231       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10232
10233   def BuildHooksEnv(self):
10234     """Build hooks env.
10235
10236     """
10237     env = {
10238       "GROUP_NAME": self.op.group_name,
10239       }
10240     mn = self.cfg.GetMasterNode()
10241     return env, [mn], [mn]
10242
10243   def Exec(self, feedback_fn):
10244     """Add the node group to the cluster.
10245
10246     """
10247     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10248                                   uuid=self.group_uuid,
10249                                   alloc_policy=self.op.alloc_policy,
10250                                   ndparams=self.op.ndparams)
10251
10252     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10253     del self.remove_locks[locking.LEVEL_NODEGROUP]
10254
10255
10256 class LUQueryGroups(NoHooksLU):
10257   """Logical unit for querying node groups.
10258
10259   """
10260   # pylint: disable-msg=W0142
10261   _OP_PARAMS = [
10262     _POutputFields,
10263     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10264     ]
10265
10266   REQ_BGL = False
10267
10268   _FIELDS_DYNAMIC = utils.FieldSet()
10269
10270   _SIMPLE_FIELDS = ["name", "uuid", "alloc_policy",
10271                     "ctime", "mtime", "serial_no"]
10272
10273   _FIELDS_STATIC = utils.FieldSet(
10274       "node_cnt", "node_list", "pinst_cnt", "pinst_list", *_SIMPLE_FIELDS)
10275
10276   def CheckArguments(self):
10277     _CheckOutputFields(static=self._FIELDS_STATIC,
10278                        dynamic=self._FIELDS_DYNAMIC,
10279                        selected=self.op.output_fields)
10280
10281   def ExpandNames(self):
10282     self.needed_locks = {}
10283
10284   def Exec(self, feedback_fn):
10285     """Computes the list of groups and their attributes.
10286
10287     """
10288     all_groups = self.cfg.GetAllNodeGroupsInfo()
10289     name_to_uuid = dict((g.name, g.uuid) for g in all_groups.values())
10290
10291     if not self.op.names:
10292       sorted_names = utils.NiceSort(name_to_uuid.keys())
10293       my_groups = [name_to_uuid[n] for n in sorted_names]
10294     else:
10295       # Accept names to be either names or UUIDs.
10296       all_uuid = frozenset(all_groups.keys())
10297       my_groups = []
10298       missing = []
10299
10300       for name in self.op.names:
10301         if name in all_uuid:
10302           my_groups.append(name)
10303         elif name in name_to_uuid:
10304           my_groups.append(name_to_uuid[name])
10305         else:
10306           missing.append(name)
10307
10308       if missing:
10309         raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10310                                    errors.ECODE_NOENT)
10311
10312     do_nodes = bool(frozenset(["node_cnt", "node_list"]).
10313                     intersection(self.op.output_fields))
10314
10315     do_instances = bool(frozenset(["pinst_cnt", "pinst_list"]).
10316                         intersection(self.op.output_fields))
10317
10318     # We need to map group->[nodes], and group->[instances]. The former is
10319     # directly attainable, but the latter we have to do through instance->node,
10320     # hence we need to process nodes even if we only need instance information.
10321     if do_nodes or do_instances:
10322       all_nodes = self.cfg.GetAllNodesInfo()
10323       group_to_nodes = dict((all_groups[name].uuid, []) for name in my_groups)
10324       node_to_group = {}
10325
10326       for node in all_nodes.values():
10327         if node.group in group_to_nodes:
10328           group_to_nodes[node.group].append(node.name)
10329           node_to_group[node.name] = node.group
10330
10331       if do_instances:
10332         all_instances = self.cfg.GetAllInstancesInfo()
10333         group_to_instances = dict((all_groups[name].uuid, [])
10334                                   for name in my_groups)
10335         for instance in all_instances.values():
10336           node = instance.primary_node
10337           if node in node_to_group:
10338             group_to_instances[node_to_group[node]].append(instance.name)
10339
10340     output = []
10341
10342     for uuid in my_groups:
10343       group = all_groups[uuid]
10344       group_output = []
10345
10346       for field in self.op.output_fields:
10347         if field in self._SIMPLE_FIELDS:
10348           val = getattr(group, field)
10349         elif field == "node_list":
10350           val = utils.NiceSort(group_to_nodes[group.uuid])
10351         elif field == "node_cnt":
10352           val = len(group_to_nodes[group.uuid])
10353         elif field == "pinst_list":
10354           val = utils.NiceSort(group_to_instances[group.uuid])
10355         elif field == "pinst_cnt":
10356           val = len(group_to_instances[group.uuid])
10357         else:
10358           raise errors.ParameterError(field)
10359         group_output.append(val)
10360       output.append(group_output)
10361
10362     return output
10363
10364
10365 class LUSetGroupParams(LogicalUnit):
10366   """Modifies the parameters of a node group.
10367
10368   """
10369   HPATH = "group-modify"
10370   HTYPE = constants.HTYPE_GROUP
10371
10372   _OP_PARAMS = [
10373     _PGroupName,
10374     ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
10375     ("alloc_policy", None, ht.TOr(ht.TNone,
10376                                   ht.TElemOf(constants.VALID_ALLOC_POLICIES))),
10377     ]
10378
10379   REQ_BGL = False
10380
10381   def CheckArguments(self):
10382     all_changes = [
10383       self.op.ndparams,
10384       self.op.alloc_policy,
10385       ]
10386
10387     if all_changes.count(None) == len(all_changes):
10388       raise errors.OpPrereqError("Please pass at least one modification",
10389                                  errors.ECODE_INVAL)
10390
10391   def ExpandNames(self):
10392     # This raises errors.OpPrereqError on its own:
10393     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10394
10395     self.needed_locks = {
10396       locking.LEVEL_NODEGROUP: [self.group_uuid],
10397       }
10398
10399   def CheckPrereq(self):
10400     """Check prerequisites.
10401
10402     """
10403     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10404
10405     if self.group is None:
10406       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10407                                (self.op.group_name, self.group_uuid))
10408
10409     if self.op.ndparams:
10410       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10411       self.new_ndparams = self.group.SimpleFillND(self.op.ndparams)
10412
10413   def BuildHooksEnv(self):
10414     """Build hooks env.
10415
10416     """
10417     env = {
10418       "GROUP_NAME": self.op.group_name,
10419       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10420       }
10421     mn = self.cfg.GetMasterNode()
10422     return env, [mn], [mn]
10423
10424   def Exec(self, feedback_fn):
10425     """Modifies the node group.
10426
10427     """
10428     result = []
10429
10430     if self.op.ndparams:
10431       self.group.ndparams = self.new_ndparams
10432       result.append(("ndparams", str(self.group.ndparams)))
10433
10434     if self.op.alloc_policy:
10435       self.group.alloc_policy = self.op.alloc_policy
10436
10437     self.cfg.Update(self.group, feedback_fn)
10438     return result
10439
10440
10441
10442 class LURemoveGroup(LogicalUnit):
10443   HPATH = "group-remove"
10444   HTYPE = constants.HTYPE_GROUP
10445
10446   _OP_PARAMS = [
10447     _PGroupName,
10448     ]
10449
10450   REQ_BGL = False
10451
10452   def ExpandNames(self):
10453     # This will raises errors.OpPrereqError on its own:
10454     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10455     self.needed_locks = {
10456       locking.LEVEL_NODEGROUP: [self.group_uuid],
10457       }
10458
10459   def CheckPrereq(self):
10460     """Check prerequisites.
10461
10462     This checks that the given group name exists as a node group, that is
10463     empty (i.e., contains no nodes), and that is not the last group of the
10464     cluster.
10465
10466     """
10467     # Verify that the group is empty.
10468     group_nodes = [node.name
10469                    for node in self.cfg.GetAllNodesInfo().values()
10470                    if node.group == self.group_uuid]
10471
10472     if group_nodes:
10473       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10474                                  " nodes: %s" %
10475                                  (self.op.group_name,
10476                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10477                                  errors.ECODE_STATE)
10478
10479     # Verify the cluster would not be left group-less.
10480     if len(self.cfg.GetNodeGroupList()) == 1:
10481       raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10482                                  " which cannot be left without at least one"
10483                                  " group" % self.op.group_name,
10484                                  errors.ECODE_STATE)
10485
10486   def BuildHooksEnv(self):
10487     """Build hooks env.
10488
10489     """
10490     env = {
10491       "GROUP_NAME": self.op.group_name,
10492       }
10493     mn = self.cfg.GetMasterNode()
10494     return env, [mn], [mn]
10495
10496   def Exec(self, feedback_fn):
10497     """Remove the node group.
10498
10499     """
10500     try:
10501       self.cfg.RemoveNodeGroup(self.group_uuid)
10502     except errors.ConfigurationError:
10503       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10504                                (self.op.group_name, self.group_uuid))
10505
10506     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10507
10508
10509 class LURenameGroup(LogicalUnit):
10510   HPATH = "group-rename"
10511   HTYPE = constants.HTYPE_GROUP
10512
10513   _OP_PARAMS = [
10514     ("old_name", ht.NoDefault, ht.TNonEmptyString),
10515     ("new_name", ht.NoDefault, ht.TNonEmptyString),
10516     ]
10517
10518   REQ_BGL = False
10519
10520   def ExpandNames(self):
10521     # This raises errors.OpPrereqError on its own:
10522     self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10523
10524     self.needed_locks = {
10525       locking.LEVEL_NODEGROUP: [self.group_uuid],
10526       }
10527
10528   def CheckPrereq(self):
10529     """Check prerequisites.
10530
10531     This checks that the given old_name exists as a node group, and that
10532     new_name doesn't.
10533
10534     """
10535     try:
10536       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10537     except errors.OpPrereqError:
10538       pass
10539     else:
10540       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10541                                  " node group (UUID: %s)" %
10542                                  (self.op.new_name, new_name_uuid),
10543                                  errors.ECODE_EXISTS)
10544
10545   def BuildHooksEnv(self):
10546     """Build hooks env.
10547
10548     """
10549     env = {
10550       "OLD_NAME": self.op.old_name,
10551       "NEW_NAME": self.op.new_name,
10552       }
10553
10554     mn = self.cfg.GetMasterNode()
10555     all_nodes = self.cfg.GetAllNodesInfo()
10556     run_nodes = [mn]
10557     all_nodes.pop(mn, None)
10558
10559     for node in all_nodes.values():
10560       if node.group == self.group_uuid:
10561         run_nodes.append(node.name)
10562
10563     return env, run_nodes, run_nodes
10564
10565   def Exec(self, feedback_fn):
10566     """Rename the node group.
10567
10568     """
10569     group = self.cfg.GetNodeGroup(self.group_uuid)
10570
10571     if group is None:
10572       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10573                                (self.op.old_name, self.group_uuid))
10574
10575     group.name = self.op.new_name
10576     self.cfg.Update(group, feedback_fn)
10577
10578     return self.op.new_name
10579
10580
10581 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10582   """Generic tags LU.
10583
10584   This is an abstract class which is the parent of all the other tags LUs.
10585
10586   """
10587
10588   def ExpandNames(self):
10589     self.needed_locks = {}
10590     if self.op.kind == constants.TAG_NODE:
10591       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10592       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10593     elif self.op.kind == constants.TAG_INSTANCE:
10594       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10595       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10596
10597     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10598     # not possible to acquire the BGL based on opcode parameters)
10599
10600   def CheckPrereq(self):
10601     """Check prerequisites.
10602
10603     """
10604     if self.op.kind == constants.TAG_CLUSTER:
10605       self.target = self.cfg.GetClusterInfo()
10606     elif self.op.kind == constants.TAG_NODE:
10607       self.target = self.cfg.GetNodeInfo(self.op.name)
10608     elif self.op.kind == constants.TAG_INSTANCE:
10609       self.target = self.cfg.GetInstanceInfo(self.op.name)
10610     else:
10611       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10612                                  str(self.op.kind), errors.ECODE_INVAL)
10613
10614
10615 class LUGetTags(TagsLU):
10616   """Returns the tags of a given object.
10617
10618   """
10619   _OP_PARAMS = [
10620     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10621     # Name is only meaningful for nodes and instances
10622     ("name", ht.NoDefault, ht.TMaybeString),
10623     ]
10624   REQ_BGL = False
10625
10626   def ExpandNames(self):
10627     TagsLU.ExpandNames(self)
10628
10629     # Share locks as this is only a read operation
10630     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10631
10632   def Exec(self, feedback_fn):
10633     """Returns the tag list.
10634
10635     """
10636     return list(self.target.GetTags())
10637
10638
10639 class LUSearchTags(NoHooksLU):
10640   """Searches the tags for a given pattern.
10641
10642   """
10643   _OP_PARAMS = [
10644     ("pattern", ht.NoDefault, ht.TNonEmptyString),
10645     ]
10646   REQ_BGL = False
10647
10648   def ExpandNames(self):
10649     self.needed_locks = {}
10650
10651   def CheckPrereq(self):
10652     """Check prerequisites.
10653
10654     This checks the pattern passed for validity by compiling it.
10655
10656     """
10657     try:
10658       self.re = re.compile(self.op.pattern)
10659     except re.error, err:
10660       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10661                                  (self.op.pattern, err), errors.ECODE_INVAL)
10662
10663   def Exec(self, feedback_fn):
10664     """Returns the tag list.
10665
10666     """
10667     cfg = self.cfg
10668     tgts = [("/cluster", cfg.GetClusterInfo())]
10669     ilist = cfg.GetAllInstancesInfo().values()
10670     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10671     nlist = cfg.GetAllNodesInfo().values()
10672     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10673     results = []
10674     for path, target in tgts:
10675       for tag in target.GetTags():
10676         if self.re.search(tag):
10677           results.append((path, tag))
10678     return results
10679
10680
10681 class LUAddTags(TagsLU):
10682   """Sets a tag on a given object.
10683
10684   """
10685   _OP_PARAMS = [
10686     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10687     # Name is only meaningful for nodes and instances
10688     ("name", ht.NoDefault, ht.TMaybeString),
10689     ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10690     ]
10691   REQ_BGL = False
10692
10693   def CheckPrereq(self):
10694     """Check prerequisites.
10695
10696     This checks the type and length of the tag name and value.
10697
10698     """
10699     TagsLU.CheckPrereq(self)
10700     for tag in self.op.tags:
10701       objects.TaggableObject.ValidateTag(tag)
10702
10703   def Exec(self, feedback_fn):
10704     """Sets the tag.
10705
10706     """
10707     try:
10708       for tag in self.op.tags:
10709         self.target.AddTag(tag)
10710     except errors.TagError, err:
10711       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10712     self.cfg.Update(self.target, feedback_fn)
10713
10714
10715 class LUDelTags(TagsLU):
10716   """Delete a list of tags from a given object.
10717
10718   """
10719   _OP_PARAMS = [
10720     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10721     # Name is only meaningful for nodes and instances
10722     ("name", ht.NoDefault, ht.TMaybeString),
10723     ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10724     ]
10725   REQ_BGL = False
10726
10727   def CheckPrereq(self):
10728     """Check prerequisites.
10729
10730     This checks that we have the given tag.
10731
10732     """
10733     TagsLU.CheckPrereq(self)
10734     for tag in self.op.tags:
10735       objects.TaggableObject.ValidateTag(tag)
10736     del_tags = frozenset(self.op.tags)
10737     cur_tags = self.target.GetTags()
10738
10739     diff_tags = del_tags - cur_tags
10740     if diff_tags:
10741       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10742       raise errors.OpPrereqError("Tag(s) %s not found" %
10743                                  (utils.CommaJoin(diff_names), ),
10744                                  errors.ECODE_NOENT)
10745
10746   def Exec(self, feedback_fn):
10747     """Remove the tag from the object.
10748
10749     """
10750     for tag in self.op.tags:
10751       self.target.RemoveTag(tag)
10752     self.cfg.Update(self.target, feedback_fn)
10753
10754
10755 class LUTestDelay(NoHooksLU):
10756   """Sleep for a specified amount of time.
10757
10758   This LU sleeps on the master and/or nodes for a specified amount of
10759   time.
10760
10761   """
10762   _OP_PARAMS = [
10763     ("duration", ht.NoDefault, ht.TFloat),
10764     ("on_master", True, ht.TBool),
10765     ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10766     ("repeat", 0, ht.TPositiveInt)
10767     ]
10768   REQ_BGL = False
10769
10770   def ExpandNames(self):
10771     """Expand names and set required locks.
10772
10773     This expands the node list, if any.
10774
10775     """
10776     self.needed_locks = {}
10777     if self.op.on_nodes:
10778       # _GetWantedNodes can be used here, but is not always appropriate to use
10779       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10780       # more information.
10781       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10782       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10783
10784   def _TestDelay(self):
10785     """Do the actual sleep.
10786
10787     """
10788     if self.op.on_master:
10789       if not utils.TestDelay(self.op.duration):
10790         raise errors.OpExecError("Error during master delay test")
10791     if self.op.on_nodes:
10792       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10793       for node, node_result in result.items():
10794         node_result.Raise("Failure during rpc call to node %s" % node)
10795
10796   def Exec(self, feedback_fn):
10797     """Execute the test delay opcode, with the wanted repetitions.
10798
10799     """
10800     if self.op.repeat == 0:
10801       self._TestDelay()
10802     else:
10803       top_value = self.op.repeat - 1
10804       for i in range(self.op.repeat):
10805         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10806         self._TestDelay()
10807
10808
10809 class LUTestJobqueue(NoHooksLU):
10810   """Utility LU to test some aspects of the job queue.
10811
10812   """
10813   _OP_PARAMS = [
10814     ("notify_waitlock", False, ht.TBool),
10815     ("notify_exec", False, ht.TBool),
10816     ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10817     ("fail", False, ht.TBool),
10818     ]
10819   REQ_BGL = False
10820
10821   # Must be lower than default timeout for WaitForJobChange to see whether it
10822   # notices changed jobs
10823   _CLIENT_CONNECT_TIMEOUT = 20.0
10824   _CLIENT_CONFIRM_TIMEOUT = 60.0
10825
10826   @classmethod
10827   def _NotifyUsingSocket(cls, cb, errcls):
10828     """Opens a Unix socket and waits for another program to connect.
10829
10830     @type cb: callable
10831     @param cb: Callback to send socket name to client
10832     @type errcls: class
10833     @param errcls: Exception class to use for errors
10834
10835     """
10836     # Using a temporary directory as there's no easy way to create temporary
10837     # sockets without writing a custom loop around tempfile.mktemp and
10838     # socket.bind
10839     tmpdir = tempfile.mkdtemp()
10840     try:
10841       tmpsock = utils.PathJoin(tmpdir, "sock")
10842
10843       logging.debug("Creating temporary socket at %s", tmpsock)
10844       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10845       try:
10846         sock.bind(tmpsock)
10847         sock.listen(1)
10848
10849         # Send details to client
10850         cb(tmpsock)
10851
10852         # Wait for client to connect before continuing
10853         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10854         try:
10855           (conn, _) = sock.accept()
10856         except socket.error, err:
10857           raise errcls("Client didn't connect in time (%s)" % err)
10858       finally:
10859         sock.close()
10860     finally:
10861       # Remove as soon as client is connected
10862       shutil.rmtree(tmpdir)
10863
10864     # Wait for client to close
10865     try:
10866       try:
10867         # pylint: disable-msg=E1101
10868         # Instance of '_socketobject' has no ... member
10869         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10870         conn.recv(1)
10871       except socket.error, err:
10872         raise errcls("Client failed to confirm notification (%s)" % err)
10873     finally:
10874       conn.close()
10875
10876   def _SendNotification(self, test, arg, sockname):
10877     """Sends a notification to the client.
10878
10879     @type test: string
10880     @param test: Test name
10881     @param arg: Test argument (depends on test)
10882     @type sockname: string
10883     @param sockname: Socket path
10884
10885     """
10886     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10887
10888   def _Notify(self, prereq, test, arg):
10889     """Notifies the client of a test.
10890
10891     @type prereq: bool
10892     @param prereq: Whether this is a prereq-phase test
10893     @type test: string
10894     @param test: Test name
10895     @param arg: Test argument (depends on test)
10896
10897     """
10898     if prereq:
10899       errcls = errors.OpPrereqError
10900     else:
10901       errcls = errors.OpExecError
10902
10903     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10904                                                   test, arg),
10905                                    errcls)
10906
10907   def CheckArguments(self):
10908     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10909     self.expandnames_calls = 0
10910
10911   def ExpandNames(self):
10912     checkargs_calls = getattr(self, "checkargs_calls", 0)
10913     if checkargs_calls < 1:
10914       raise errors.ProgrammerError("CheckArguments was not called")
10915
10916     self.expandnames_calls += 1
10917
10918     if self.op.notify_waitlock:
10919       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10920
10921     self.LogInfo("Expanding names")
10922
10923     # Get lock on master node (just to get a lock, not for a particular reason)
10924     self.needed_locks = {
10925       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10926       }
10927
10928   def Exec(self, feedback_fn):
10929     if self.expandnames_calls < 1:
10930       raise errors.ProgrammerError("ExpandNames was not called")
10931
10932     if self.op.notify_exec:
10933       self._Notify(False, constants.JQT_EXEC, None)
10934
10935     self.LogInfo("Executing")
10936
10937     if self.op.log_messages:
10938       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10939       for idx, msg in enumerate(self.op.log_messages):
10940         self.LogInfo("Sending log message %s", idx + 1)
10941         feedback_fn(constants.JQT_MSGPREFIX + msg)
10942         # Report how many test messages have been sent
10943         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10944
10945     if self.op.fail:
10946       raise errors.OpExecError("Opcode failure was requested")
10947
10948     return True
10949
10950
10951 class IAllocator(object):
10952   """IAllocator framework.
10953
10954   An IAllocator instance has three sets of attributes:
10955     - cfg that is needed to query the cluster
10956     - input data (all members of the _KEYS class attribute are required)
10957     - four buffer attributes (in|out_data|text), that represent the
10958       input (to the external script) in text and data structure format,
10959       and the output from it, again in two formats
10960     - the result variables from the script (success, info, nodes) for
10961       easy usage
10962
10963   """
10964   # pylint: disable-msg=R0902
10965   # lots of instance attributes
10966   _ALLO_KEYS = [
10967     "name", "mem_size", "disks", "disk_template",
10968     "os", "tags", "nics", "vcpus", "hypervisor",
10969     ]
10970   _RELO_KEYS = [
10971     "name", "relocate_from",
10972     ]
10973   _EVAC_KEYS = [
10974     "evac_nodes",
10975     ]
10976
10977   def __init__(self, cfg, rpc, mode, **kwargs):
10978     self.cfg = cfg
10979     self.rpc = rpc
10980     # init buffer variables
10981     self.in_text = self.out_text = self.in_data = self.out_data = None
10982     # init all input fields so that pylint is happy
10983     self.mode = mode
10984     self.mem_size = self.disks = self.disk_template = None
10985     self.os = self.tags = self.nics = self.vcpus = None
10986     self.hypervisor = None
10987     self.relocate_from = None
10988     self.name = None
10989     self.evac_nodes = None
10990     # computed fields
10991     self.required_nodes = None
10992     # init result fields
10993     self.success = self.info = self.result = None
10994     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10995       keyset = self._ALLO_KEYS
10996       fn = self._AddNewInstance
10997     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10998       keyset = self._RELO_KEYS
10999       fn = self._AddRelocateInstance
11000     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11001       keyset = self._EVAC_KEYS
11002       fn = self._AddEvacuateNodes
11003     else:
11004       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11005                                    " IAllocator" % self.mode)
11006     for key in kwargs:
11007       if key not in keyset:
11008         raise errors.ProgrammerError("Invalid input parameter '%s' to"
11009                                      " IAllocator" % key)
11010       setattr(self, key, kwargs[key])
11011
11012     for key in keyset:
11013       if key not in kwargs:
11014         raise errors.ProgrammerError("Missing input parameter '%s' to"
11015                                      " IAllocator" % key)
11016     self._BuildInputData(fn)
11017
11018   def _ComputeClusterData(self):
11019     """Compute the generic allocator input data.
11020
11021     This is the data that is independent of the actual operation.
11022
11023     """
11024     cfg = self.cfg
11025     cluster_info = cfg.GetClusterInfo()
11026     # cluster data
11027     data = {
11028       "version": constants.IALLOCATOR_VERSION,
11029       "cluster_name": cfg.GetClusterName(),
11030       "cluster_tags": list(cluster_info.GetTags()),
11031       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11032       # we don't have job IDs
11033       }
11034     iinfo = cfg.GetAllInstancesInfo().values()
11035     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11036
11037     # node data
11038     node_list = cfg.GetNodeList()
11039
11040     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11041       hypervisor_name = self.hypervisor
11042     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11043       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11044     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11045       hypervisor_name = cluster_info.enabled_hypervisors[0]
11046
11047     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11048                                         hypervisor_name)
11049     node_iinfo = \
11050       self.rpc.call_all_instances_info(node_list,
11051                                        cluster_info.enabled_hypervisors)
11052
11053     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11054
11055     data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
11056
11057     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11058
11059     self.in_data = data
11060
11061   @staticmethod
11062   def _ComputeNodeGroupData(cfg):
11063     """Compute node groups data.
11064
11065     """
11066     ng = {}
11067     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11068       ng[guuid] = {
11069         "name": gdata.name,
11070         "alloc_policy": gdata.alloc_policy,
11071         }
11072     return ng
11073
11074   @staticmethod
11075   def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
11076     """Compute global node data.
11077
11078     """
11079     node_results = {}
11080     for nname, nresult in node_data.items():
11081       # first fill in static (config-based) values
11082       ninfo = cfg.GetNodeInfo(nname)
11083       pnr = {
11084         "tags": list(ninfo.GetTags()),
11085         "primary_ip": ninfo.primary_ip,
11086         "secondary_ip": ninfo.secondary_ip,
11087         "offline": ninfo.offline,
11088         "drained": ninfo.drained,
11089         "master_candidate": ninfo.master_candidate,
11090         "group": ninfo.group,
11091         "master_capable": ninfo.master_capable,
11092         "vm_capable": ninfo.vm_capable,
11093         }
11094
11095       if not (ninfo.offline or ninfo.drained):
11096         nresult.Raise("Can't get data for node %s" % nname)
11097         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11098                                 nname)
11099         remote_info = nresult.payload
11100
11101         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11102                      'vg_size', 'vg_free', 'cpu_total']:
11103           if attr not in remote_info:
11104             raise errors.OpExecError("Node '%s' didn't return attribute"
11105                                      " '%s'" % (nname, attr))
11106           if not isinstance(remote_info[attr], int):
11107             raise errors.OpExecError("Node '%s' returned invalid value"
11108                                      " for '%s': %s" %
11109                                      (nname, attr, remote_info[attr]))
11110         # compute memory used by primary instances
11111         i_p_mem = i_p_up_mem = 0
11112         for iinfo, beinfo in i_list:
11113           if iinfo.primary_node == nname:
11114             i_p_mem += beinfo[constants.BE_MEMORY]
11115             if iinfo.name not in node_iinfo[nname].payload:
11116               i_used_mem = 0
11117             else:
11118               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11119             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11120             remote_info['memory_free'] -= max(0, i_mem_diff)
11121
11122             if iinfo.admin_up:
11123               i_p_up_mem += beinfo[constants.BE_MEMORY]
11124
11125         # compute memory used by instances
11126         pnr_dyn = {
11127           "total_memory": remote_info['memory_total'],
11128           "reserved_memory": remote_info['memory_dom0'],
11129           "free_memory": remote_info['memory_free'],
11130           "total_disk": remote_info['vg_size'],
11131           "free_disk": remote_info['vg_free'],
11132           "total_cpus": remote_info['cpu_total'],
11133           "i_pri_memory": i_p_mem,
11134           "i_pri_up_memory": i_p_up_mem,
11135           }
11136         pnr.update(pnr_dyn)
11137
11138       node_results[nname] = pnr
11139
11140     return node_results
11141
11142   @staticmethod
11143   def _ComputeInstanceData(cluster_info, i_list):
11144     """Compute global instance data.
11145
11146     """
11147     instance_data = {}
11148     for iinfo, beinfo in i_list:
11149       nic_data = []
11150       for nic in iinfo.nics:
11151         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11152         nic_dict = {"mac": nic.mac,
11153                     "ip": nic.ip,
11154                     "mode": filled_params[constants.NIC_MODE],
11155                     "link": filled_params[constants.NIC_LINK],
11156                    }
11157         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11158           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11159         nic_data.append(nic_dict)
11160       pir = {
11161         "tags": list(iinfo.GetTags()),
11162         "admin_up": iinfo.admin_up,
11163         "vcpus": beinfo[constants.BE_VCPUS],
11164         "memory": beinfo[constants.BE_MEMORY],
11165         "os": iinfo.os,
11166         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11167         "nics": nic_data,
11168         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11169         "disk_template": iinfo.disk_template,
11170         "hypervisor": iinfo.hypervisor,
11171         }
11172       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11173                                                  pir["disks"])
11174       instance_data[iinfo.name] = pir
11175
11176     return instance_data
11177
11178   def _AddNewInstance(self):
11179     """Add new instance data to allocator structure.
11180
11181     This in combination with _AllocatorGetClusterData will create the
11182     correct structure needed as input for the allocator.
11183
11184     The checks for the completeness of the opcode must have already been
11185     done.
11186
11187     """
11188     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11189
11190     if self.disk_template in constants.DTS_NET_MIRROR:
11191       self.required_nodes = 2
11192     else:
11193       self.required_nodes = 1
11194     request = {
11195       "name": self.name,
11196       "disk_template": self.disk_template,
11197       "tags": self.tags,
11198       "os": self.os,
11199       "vcpus": self.vcpus,
11200       "memory": self.mem_size,
11201       "disks": self.disks,
11202       "disk_space_total": disk_space,
11203       "nics": self.nics,
11204       "required_nodes": self.required_nodes,
11205       }
11206     return request
11207
11208   def _AddRelocateInstance(self):
11209     """Add relocate instance data to allocator structure.
11210
11211     This in combination with _IAllocatorGetClusterData will create the
11212     correct structure needed as input for the allocator.
11213
11214     The checks for the completeness of the opcode must have already been
11215     done.
11216
11217     """
11218     instance = self.cfg.GetInstanceInfo(self.name)
11219     if instance is None:
11220       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11221                                    " IAllocator" % self.name)
11222
11223     if instance.disk_template not in constants.DTS_NET_MIRROR:
11224       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11225                                  errors.ECODE_INVAL)
11226
11227     if len(instance.secondary_nodes) != 1:
11228       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11229                                  errors.ECODE_STATE)
11230
11231     self.required_nodes = 1
11232     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11233     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11234
11235     request = {
11236       "name": self.name,
11237       "disk_space_total": disk_space,
11238       "required_nodes": self.required_nodes,
11239       "relocate_from": self.relocate_from,
11240       }
11241     return request
11242
11243   def _AddEvacuateNodes(self):
11244     """Add evacuate nodes data to allocator structure.
11245
11246     """
11247     request = {
11248       "evac_nodes": self.evac_nodes
11249       }
11250     return request
11251
11252   def _BuildInputData(self, fn):
11253     """Build input data structures.
11254
11255     """
11256     self._ComputeClusterData()
11257
11258     request = fn()
11259     request["type"] = self.mode
11260     self.in_data["request"] = request
11261
11262     self.in_text = serializer.Dump(self.in_data)
11263
11264   def Run(self, name, validate=True, call_fn=None):
11265     """Run an instance allocator and return the results.
11266
11267     """
11268     if call_fn is None:
11269       call_fn = self.rpc.call_iallocator_runner
11270
11271     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11272     result.Raise("Failure while running the iallocator script")
11273
11274     self.out_text = result.payload
11275     if validate:
11276       self._ValidateResult()
11277
11278   def _ValidateResult(self):
11279     """Process the allocator results.
11280
11281     This will process and if successful save the result in
11282     self.out_data and the other parameters.
11283
11284     """
11285     try:
11286       rdict = serializer.Load(self.out_text)
11287     except Exception, err:
11288       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11289
11290     if not isinstance(rdict, dict):
11291       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11292
11293     # TODO: remove backwards compatiblity in later versions
11294     if "nodes" in rdict and "result" not in rdict:
11295       rdict["result"] = rdict["nodes"]
11296       del rdict["nodes"]
11297
11298     for key in "success", "info", "result":
11299       if key not in rdict:
11300         raise errors.OpExecError("Can't parse iallocator results:"
11301                                  " missing key '%s'" % key)
11302       setattr(self, key, rdict[key])
11303
11304     if not isinstance(rdict["result"], list):
11305       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11306                                " is not a list")
11307     self.out_data = rdict
11308
11309
11310 class LUTestAllocator(NoHooksLU):
11311   """Run allocator tests.
11312
11313   This LU runs the allocator tests
11314
11315   """
11316   _OP_PARAMS = [
11317     ("direction", ht.NoDefault,
11318      ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
11319     ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
11320     ("name", ht.NoDefault, ht.TNonEmptyString),
11321     ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
11322       ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
11323                ht.TOr(ht.TNone, ht.TNonEmptyString))))),
11324     ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
11325     ("hypervisor", None, ht.TMaybeString),
11326     ("allocator", None, ht.TMaybeString),
11327     ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
11328     ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11329     ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11330     ("os", None, ht.TMaybeString),
11331     ("disk_template", None, ht.TMaybeString),
11332     ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
11333     ]
11334
11335   def CheckPrereq(self):
11336     """Check prerequisites.
11337
11338     This checks the opcode parameters depending on the director and mode test.
11339
11340     """
11341     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11342       for attr in ["mem_size", "disks", "disk_template",
11343                    "os", "tags", "nics", "vcpus"]:
11344         if not hasattr(self.op, attr):
11345           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11346                                      attr, errors.ECODE_INVAL)
11347       iname = self.cfg.ExpandInstanceName(self.op.name)
11348       if iname is not None:
11349         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11350                                    iname, errors.ECODE_EXISTS)
11351       if not isinstance(self.op.nics, list):
11352         raise errors.OpPrereqError("Invalid parameter 'nics'",
11353                                    errors.ECODE_INVAL)
11354       if not isinstance(self.op.disks, list):
11355         raise errors.OpPrereqError("Invalid parameter 'disks'",
11356                                    errors.ECODE_INVAL)
11357       for row in self.op.disks:
11358         if (not isinstance(row, dict) or
11359             "size" not in row or
11360             not isinstance(row["size"], int) or
11361             "mode" not in row or
11362             row["mode"] not in ['r', 'w']):
11363           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11364                                      " parameter", errors.ECODE_INVAL)
11365       if self.op.hypervisor is None:
11366         self.op.hypervisor = self.cfg.GetHypervisorType()
11367     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11368       fname = _ExpandInstanceName(self.cfg, self.op.name)
11369       self.op.name = fname
11370       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11371     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11372       if not hasattr(self.op, "evac_nodes"):
11373         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11374                                    " opcode input", errors.ECODE_INVAL)
11375     else:
11376       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11377                                  self.op.mode, errors.ECODE_INVAL)
11378
11379     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11380       if self.op.allocator is None:
11381         raise errors.OpPrereqError("Missing allocator name",
11382                                    errors.ECODE_INVAL)
11383     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11384       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11385                                  self.op.direction, errors.ECODE_INVAL)
11386
11387   def Exec(self, feedback_fn):
11388     """Run the allocator test.
11389
11390     """
11391     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11392       ial = IAllocator(self.cfg, self.rpc,
11393                        mode=self.op.mode,
11394                        name=self.op.name,
11395                        mem_size=self.op.mem_size,
11396                        disks=self.op.disks,
11397                        disk_template=self.op.disk_template,
11398                        os=self.op.os,
11399                        tags=self.op.tags,
11400                        nics=self.op.nics,
11401                        vcpus=self.op.vcpus,
11402                        hypervisor=self.op.hypervisor,
11403                        )
11404     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11405       ial = IAllocator(self.cfg, self.rpc,
11406                        mode=self.op.mode,
11407                        name=self.op.name,
11408                        relocate_from=list(self.relocate_from),
11409                        )
11410     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11411       ial = IAllocator(self.cfg, self.rpc,
11412                        mode=self.op.mode,
11413                        evac_nodes=self.op.evac_nodes)
11414     else:
11415       raise errors.ProgrammerError("Uncatched mode %s in"
11416                                    " LUTestAllocator.Exec", self.op.mode)
11417
11418     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11419       result = ial.in_text
11420     else:
11421       ial.Run(self.op.allocator, validate=False)
11422       result = ial.out_text
11423     return result