code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42
  43 from ganeti import ssh
  44 from ganeti import utils
  45 from ganeti import errors
  46 from ganeti import hypervisor
  47 from ganeti import locking
  48 from ganeti import constants
  49 from ganeti import objects
  50 from ganeti import serializer
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56
  57 import ganeti.masterd.instance # pylint: disable-msg=W0611
  58
  59
  60 # Modifiable default values; need to define these here before the
  61 # actual LUs
  62
  63 def _EmptyList():
  64   """Returns an empty list.
  65
  66   """
  67   return []
  68
  69
  70 def _EmptyDict():
  71   """Returns an empty dict.
  72
  73   """
  74   return {}
  75
  76
  77 #: The without-default default value
  78 _NoDefault = object()
  79
  80
  81 #: The no-type (value to complex to check it in the type system)
  82 _NoType = object()
  83
  84
  85 # Some basic types
  86 def _TNotNone(val):
  87   """Checks if the given value is not None.
  88
  89   """
  90   return val is not None
  91
  92
  93 def _TNone(val):
  94   """Checks if the given value is None.
  95
  96   """
  97   return val is None
  98
  99
 100 def _TBool(val):
 101   """Checks if the given value is a boolean.
 102
 103   """
 104   return isinstance(val, bool)
 105
 106
 107 def _TInt(val):
 108   """Checks if the given value is an integer.
 109
 110   """
 111   return isinstance(val, int)
 112
 113
 114 def _TFloat(val):
 115   """Checks if the given value is a float.
 116
 117   """
 118   return isinstance(val, float)
 119
 120
 121 def _TString(val):
 122   """Checks if the given value is a string.
 123
 124   """
 125   return isinstance(val, basestring)
 126
 127
 128 def _TTrue(val):
 129   """Checks if a given value evaluates to a boolean True value.
 130
 131   """
 132   return bool(val)
 133
 134
 135 def _TElemOf(target_list):
 136   """Builds a function that checks if a given value is a member of a list.
 137
 138   """
 139   return lambda val: val in target_list
 140
 141
 142 # Container types
 143 def _TList(val):
 144   """Checks if the given value is a list.
 145
 146   """
 147   return isinstance(val, list)
 148
 149
 150 def _TDict(val):
 151   """Checks if the given value is a dictionary.
 152
 153   """
 154   return isinstance(val, dict)
 155
 156
 157 # Combinator types
 158 def _TAnd(*args):
 159   """Combine multiple functions using an AND operation.
 160
 161   """
 162   def fn(val):
 163     return compat.all(t(val) for t in args)
 164   return fn
 165
 166
 167 def _TOr(*args):
 168   """Combine multiple functions using an AND operation.
 169
 170   """
 171   def fn(val):
 172     return compat.any(t(val) for t in args)
 173   return fn
 174
 175
 176 # Type aliases
 177
 178 #: a non-empty string
 179 _TNonEmptyString = _TAnd(_TString, _TTrue)
 180
 181
 182 #: a maybe non-empty string
 183 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
 184
 185
 186 #: a maybe boolean (bool or none)
 187 _TMaybeBool = _TOr(_TBool, _TNone)
 188
 189
 190 #: a positive integer
 191 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
 192
 193 #: a strictly positive integer
 194 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
 195
 196
 197 def _TListOf(my_type):
 198   """Checks if a given value is a list with all elements of the same type.
 199
 200   """
 201   return _TAnd(_TList,
 202                lambda lst: compat.all(my_type(v) for v in lst))
 203
 204
 205 def _TDictOf(key_type, val_type):
 206   """Checks a dict type for the type of its key/values.
 207
 208   """
 209   return _TAnd(_TDict,
 210                lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
 211                                 and compat.all(val_type(v)
 212                                                for v in my_dict.values())))
 213
 214
 215 # Common opcode attributes
 216
 217 #: output fields for a query operation
 218 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
 219
 220
 221 #: the shutdown timeout
 222 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
 223                      _TPositiveInt)
 224
 225 #: the force parameter
 226 _PForce = ("force", False, _TBool)
 227
 228 #: a required instance name (for single-instance LUs)
 229 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
 230
 231
 232 #: a required node name (for single-node LUs)
 233 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
 234
 235 #: the migration type (live/non-live)
 236 _PMigrationMode = ("mode", None, _TOr(_TNone,
 237                                       _TElemOf(constants.HT_MIGRATION_MODES)))
 238
 239 #: the obsolete 'live' mode (boolean)
 240 _PMigrationLive = ("live", None, _TMaybeBool)
 241
 242
 243 # End types
 244 class LogicalUnit(object):
 245   """Logical Unit base class.
 246
 247   Subclasses must follow these rules:
 248     - implement ExpandNames
 249     - implement CheckPrereq (except when tasklets are used)
 250     - implement Exec (except when tasklets are used)
 251     - implement BuildHooksEnv
 252     - redefine HPATH and HTYPE
 253     - optionally redefine their run requirements:
 254         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 255
 256   Note that all commands require root permissions.
 257
 258   @ivar dry_run_result: the value (if any) that will be returned to the caller
 259       in dry-run mode (signalled by opcode dry_run parameter)
 260   @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
 261       they should get if not already defined, and types they must match
 262
 263   """
 264   HPATH = None
 265   HTYPE = None
 266   _OP_PARAMS = []
 267   REQ_BGL = True
 268
 269   def __init__(self, processor, op, context, rpc):
 270     """Constructor for LogicalUnit.
 271
 272     This needs to be overridden in derived classes in order to check op
 273     validity.
 274
 275     """
 276     self.proc = processor
 277     self.op = op
 278     self.cfg = context.cfg
 279     self.context = context
 280     self.rpc = rpc
 281     # Dicts used to declare locking needs to mcpu
 282     self.needed_locks = None
 283     self.acquired_locks = {}
 284     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 285     self.add_locks = {}
 286     self.remove_locks = {}
 287     # Used to force good behavior when calling helper functions
 288     self.recalculate_locks = {}
 289     self.__ssh = None
 290     # logging
 291     self.Log = processor.Log # pylint: disable-msg=C0103
 292     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 293     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 294     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 295     # support for dry-run
 296     self.dry_run_result = None
 297     # support for generic debug attribute
 298     if (not hasattr(self.op, "debug_level") or
 299         not isinstance(self.op.debug_level, int)):
 300       self.op.debug_level = 0
 301
 302     # Tasklets
 303     self.tasklets = None
 304
 305     # The new kind-of-type-system
 306     op_id = self.op.OP_ID
 307     for attr_name, aval, test in self._OP_PARAMS:
 308       if not hasattr(op, attr_name):
 309         if aval == _NoDefault:
 310           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 311                                      (op_id, attr_name), errors.ECODE_INVAL)
 312         else:
 313           if callable(aval):
 314             dval = aval()
 315           else:
 316             dval = aval
 317           setattr(self.op, attr_name, dval)
 318       attr_val = getattr(op, attr_name)
 319       if test == _NoType:
 320         # no tests here
 321         continue
 322       if not callable(test):
 323         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 324                                      " given type is not a proper type (%s)" %
 325                                      (op_id, attr_name, test))
 326       if not test(attr_val):
 327         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 328                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 329         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 330                                    (op_id, attr_name), errors.ECODE_INVAL)
 331
 332     self.CheckArguments()
 333
 334   def __GetSSH(self):
 335     """Returns the SshRunner object
 336
 337     """
 338     if not self.__ssh:
 339       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 340     return self.__ssh
 341
 342   ssh = property(fget=__GetSSH)
 343
 344   def CheckArguments(self):
 345     """Check syntactic validity for the opcode arguments.
 346
 347     This method is for doing a simple syntactic check and ensure
 348     validity of opcode parameters, without any cluster-related
 349     checks. While the same can be accomplished in ExpandNames and/or
 350     CheckPrereq, doing these separate is better because:
 351
 352       - ExpandNames is left as as purely a lock-related function
 353       - CheckPrereq is run after we have acquired locks (and possible
 354         waited for them)
 355
 356     The function is allowed to change the self.op attribute so that
 357     later methods can no longer worry about missing parameters.
 358
 359     """
 360     pass
 361
 362   def ExpandNames(self):
 363     """Expand names for this LU.
 364
 365     This method is called before starting to execute the opcode, and it should
 366     update all the parameters of the opcode to their canonical form (e.g. a
 367     short node name must be fully expanded after this method has successfully
 368     completed). This way locking, hooks, logging, ecc. can work correctly.
 369
 370     LUs which implement this method must also populate the self.needed_locks
 371     member, as a dict with lock levels as keys, and a list of needed lock names
 372     as values. Rules:
 373
 374       - use an empty dict if you don't need any lock
 375       - if you don't need any lock at a particular level omit that level
 376       - don't put anything for the BGL level
 377       - if you want all locks at a level use locking.ALL_SET as a value
 378
 379     If you need to share locks (rather than acquire them exclusively) at one
 380     level you can modify self.share_locks, setting a true value (usually 1) for
 381     that level. By default locks are not shared.
 382
 383     This function can also define a list of tasklets, which then will be
 384     executed in order instead of the usual LU-level CheckPrereq and Exec
 385     functions, if those are not defined by the LU.
 386
 387     Examples::
 388
 389       # Acquire all nodes and one instance
 390       self.needed_locks = {
 391         locking.LEVEL_NODE: locking.ALL_SET,
 392         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 393       }
 394       # Acquire just two nodes
 395       self.needed_locks = {
 396         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 397       }
 398       # Acquire no locks
 399       self.needed_locks = {} # No, you can't leave it to the default value None
 400
 401     """
 402     # The implementation of this method is mandatory only if the new LU is
 403     # concurrent, so that old LUs don't need to be changed all at the same
 404     # time.
 405     if self.REQ_BGL:
 406       self.needed_locks = {} # Exclusive LUs don't need locks.
 407     else:
 408       raise NotImplementedError
 409
 410   def DeclareLocks(self, level):
 411     """Declare LU locking needs for a level
 412
 413     While most LUs can just declare their locking needs at ExpandNames time,
 414     sometimes there's the need to calculate some locks after having acquired
 415     the ones before. This function is called just before acquiring locks at a
 416     particular level, but after acquiring the ones at lower levels, and permits
 417     such calculations. It can be used to modify self.needed_locks, and by
 418     default it does nothing.
 419
 420     This function is only called if you have something already set in
 421     self.needed_locks for the level.
 422
 423     @param level: Locking level which is going to be locked
 424     @type level: member of ganeti.locking.LEVELS
 425
 426     """
 427
 428   def CheckPrereq(self):
 429     """Check prerequisites for this LU.
 430
 431     This method should check that the prerequisites for the execution
 432     of this LU are fulfilled. It can do internode communication, but
 433     it should be idempotent - no cluster or system changes are
 434     allowed.
 435
 436     The method should raise errors.OpPrereqError in case something is
 437     not fulfilled. Its return value is ignored.
 438
 439     This method should also update all the parameters of the opcode to
 440     their canonical form if it hasn't been done by ExpandNames before.
 441
 442     """
 443     if self.tasklets is not None:
 444       for (idx, tl) in enumerate(self.tasklets):
 445         logging.debug("Checking prerequisites for tasklet %s/%s",
 446                       idx + 1, len(self.tasklets))
 447         tl.CheckPrereq()
 448     else:
 449       pass
 450
 451   def Exec(self, feedback_fn):
 452     """Execute the LU.
 453
 454     This method should implement the actual work. It should raise
 455     errors.OpExecError for failures that are somewhat dealt with in
 456     code, or expected.
 457
 458     """
 459     if self.tasklets is not None:
 460       for (idx, tl) in enumerate(self.tasklets):
 461         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 462         tl.Exec(feedback_fn)
 463     else:
 464       raise NotImplementedError
 465
 466   def BuildHooksEnv(self):
 467     """Build hooks environment for this LU.
 468
 469     This method should return a three-node tuple consisting of: a dict
 470     containing the environment that will be used for running the
 471     specific hook for this LU, a list of node names on which the hook
 472     should run before the execution, and a list of node names on which
 473     the hook should run after the execution.
 474
 475     The keys of the dict must not have 'GANETI_' prefixed as this will
 476     be handled in the hooks runner. Also note additional keys will be
 477     added by the hooks runner. If the LU doesn't define any
 478     environment, an empty dict (and not None) should be returned.
 479
 480     No nodes should be returned as an empty list (and not None).
 481
 482     Note that if the HPATH for a LU class is None, this function will
 483     not be called.
 484
 485     """
 486     raise NotImplementedError
 487
 488   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 489     """Notify the LU about the results of its hooks.
 490
 491     This method is called every time a hooks phase is executed, and notifies
 492     the Logical Unit about the hooks' result. The LU can then use it to alter
 493     its result based on the hooks.  By default the method does nothing and the
 494     previous result is passed back unchanged but any LU can define it if it
 495     wants to use the local cluster hook-scripts somehow.
 496
 497     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 498         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 499     @param hook_results: the results of the multi-node hooks rpc call
 500     @param feedback_fn: function used send feedback back to the caller
 501     @param lu_result: the previous Exec result this LU had, or None
 502         in the PRE phase
 503     @return: the new Exec result, based on the previous result
 504         and hook results
 505
 506     """
 507     # API must be kept, thus we ignore the unused argument and could
 508     # be a function warnings
 509     # pylint: disable-msg=W0613,R0201
 510     return lu_result
 511
 512   def _ExpandAndLockInstance(self):
 513     """Helper function to expand and lock an instance.
 514
 515     Many LUs that work on an instance take its name in self.op.instance_name
 516     and need to expand it and then declare the expanded name for locking. This
 517     function does it, and then updates self.op.instance_name to the expanded
 518     name. It also initializes needed_locks as a dict, if this hasn't been done
 519     before.
 520
 521     """
 522     if self.needed_locks is None:
 523       self.needed_locks = {}
 524     else:
 525       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 526         "_ExpandAndLockInstance called with instance-level locks set"
 527     self.op.instance_name = _ExpandInstanceName(self.cfg,
 528                                                 self.op.instance_name)
 529     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 530
 531   def _LockInstancesNodes(self, primary_only=False):
 532     """Helper function to declare instances' nodes for locking.
 533
 534     This function should be called after locking one or more instances to lock
 535     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 536     with all primary or secondary nodes for instances already locked and
 537     present in self.needed_locks[locking.LEVEL_INSTANCE].
 538
 539     It should be called from DeclareLocks, and for safety only works if
 540     self.recalculate_locks[locking.LEVEL_NODE] is set.
 541
 542     In the future it may grow parameters to just lock some instance's nodes, or
 543     to just lock primaries or secondary nodes, if needed.
 544
 545     If should be called in DeclareLocks in a way similar to::
 546
 547       if level == locking.LEVEL_NODE:
 548         self._LockInstancesNodes()
 549
 550     @type primary_only: boolean
 551     @param primary_only: only lock primary nodes of locked instances
 552
 553     """
 554     assert locking.LEVEL_NODE in self.recalculate_locks, \
 555       "_LockInstancesNodes helper function called with no nodes to recalculate"
 556
 557     # TODO: check if we're really been called with the instance locks held
 558
 559     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 560     # future we might want to have different behaviors depending on the value
 561     # of self.recalculate_locks[locking.LEVEL_NODE]
 562     wanted_nodes = []
 563     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 564       instance = self.context.cfg.GetInstanceInfo(instance_name)
 565       wanted_nodes.append(instance.primary_node)
 566       if not primary_only:
 567         wanted_nodes.extend(instance.secondary_nodes)
 568
 569     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 570       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 571     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 572       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 573
 574     del self.recalculate_locks[locking.LEVEL_NODE]
 575
 576
 577 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 578   """Simple LU which runs no hooks.
 579
 580   This LU is intended as a parent for other LogicalUnits which will
 581   run no hooks, in order to reduce duplicate code.
 582
 583   """
 584   HPATH = None
 585   HTYPE = None
 586
 587   def BuildHooksEnv(self):
 588     """Empty BuildHooksEnv for NoHooksLu.
 589
 590     This just raises an error.
 591
 592     """
 593     assert False, "BuildHooksEnv called for NoHooksLUs"
 594
 595
 596 class Tasklet:
 597   """Tasklet base class.
 598
 599   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 600   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 601   tasklets know nothing about locks.
 602
 603   Subclasses must follow these rules:
 604     - Implement CheckPrereq
 605     - Implement Exec
 606
 607   """
 608   def __init__(self, lu):
 609     self.lu = lu
 610
 611     # Shortcuts
 612     self.cfg = lu.cfg
 613     self.rpc = lu.rpc
 614
 615   def CheckPrereq(self):
 616     """Check prerequisites for this tasklets.
 617
 618     This method should check whether the prerequisites for the execution of
 619     this tasklet are fulfilled. It can do internode communication, but it
 620     should be idempotent - no cluster or system changes are allowed.
 621
 622     The method should raise errors.OpPrereqError in case something is not
 623     fulfilled. Its return value is ignored.
 624
 625     This method should also update all parameters to their canonical form if it
 626     hasn't been done before.
 627
 628     """
 629     pass
 630
 631   def Exec(self, feedback_fn):
 632     """Execute the tasklet.
 633
 634     This method should implement the actual work. It should raise
 635     errors.OpExecError for failures that are somewhat dealt with in code, or
 636     expected.
 637
 638     """
 639     raise NotImplementedError
 640
 641
 642 def _GetWantedNodes(lu, nodes):
 643   """Returns list of checked and expanded node names.
 644
 645   @type lu: L{LogicalUnit}
 646   @param lu: the logical unit on whose behalf we execute
 647   @type nodes: list
 648   @param nodes: list of node names or None for all nodes
 649   @rtype: list
 650   @return: the list of nodes, sorted
 651   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 652
 653   """
 654   if not nodes:
 655     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 656       " non-empty list of nodes whose name is to be expanded.")
 657
 658   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 659   return utils.NiceSort(wanted)
 660
 661
 662 def _GetWantedInstances(lu, instances):
 663   """Returns list of checked and expanded instance names.
 664
 665   @type lu: L{LogicalUnit}
 666   @param lu: the logical unit on whose behalf we execute
 667   @type instances: list
 668   @param instances: list of instance names or None for all instances
 669   @rtype: list
 670   @return: the list of instances, sorted
 671   @raise errors.OpPrereqError: if the instances parameter is wrong type
 672   @raise errors.OpPrereqError: if any of the passed instances is not found
 673
 674   """
 675   if instances:
 676     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 677   else:
 678     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 679   return wanted
 680
 681
 682 def _GetUpdatedParams(old_params, update_dict,
 683                       use_default=True, use_none=False):
 684   """Return the new version of a parameter dictionary.
 685
 686   @type old_params: dict
 687   @param old_params: old parameters
 688   @type update_dict: dict
 689   @param update_dict: dict containing new parameter values, or
 690       constants.VALUE_DEFAULT to reset the parameter to its default
 691       value
 692   @param use_default: boolean
 693   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 694       values as 'to be deleted' values
 695   @param use_none: boolean
 696   @type use_none: whether to recognise C{None} values as 'to be
 697       deleted' values
 698   @rtype: dict
 699   @return: the new parameter dictionary
 700
 701   """
 702   params_copy = copy.deepcopy(old_params)
 703   for key, val in update_dict.iteritems():
 704     if ((use_default and val == constants.VALUE_DEFAULT) or
 705         (use_none and val is None)):
 706       try:
 707         del params_copy[key]
 708       except KeyError:
 709         pass
 710     else:
 711       params_copy[key] = val
 712   return params_copy
 713
 714
 715 def _CheckOutputFields(static, dynamic, selected):
 716   """Checks whether all selected fields are valid.
 717
 718   @type static: L{utils.FieldSet}
 719   @param static: static fields set
 720   @type dynamic: L{utils.FieldSet}
 721   @param dynamic: dynamic fields set
 722
 723   """
 724   f = utils.FieldSet()
 725   f.Extend(static)
 726   f.Extend(dynamic)
 727
 728   delta = f.NonMatching(selected)
 729   if delta:
 730     raise errors.OpPrereqError("Unknown output fields selected: %s"
 731                                % ",".join(delta), errors.ECODE_INVAL)
 732
 733
 734 def _CheckGlobalHvParams(params):
 735   """Validates that given hypervisor params are not global ones.
 736
 737   This will ensure that instances don't get customised versions of
 738   global params.
 739
 740   """
 741   used_globals = constants.HVC_GLOBALS.intersection(params)
 742   if used_globals:
 743     msg = ("The following hypervisor parameters are global and cannot"
 744            " be customized at instance level, please modify them at"
 745            " cluster level: %s" % utils.CommaJoin(used_globals))
 746     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 747
 748
 749 def _CheckNodeOnline(lu, node):
 750   """Ensure that a given node is online.
 751
 752   @param lu: the LU on behalf of which we make the check
 753   @param node: the node to check
 754   @raise errors.OpPrereqError: if the node is offline
 755
 756   """
 757   if lu.cfg.GetNodeInfo(node).offline:
 758     raise errors.OpPrereqError("Can't use offline node %s" % node,
 759                                errors.ECODE_INVAL)
 760
 761
 762 def _CheckNodeNotDrained(lu, node):
 763   """Ensure that a given node is not drained.
 764
 765   @param lu: the LU on behalf of which we make the check
 766   @param node: the node to check
 767   @raise errors.OpPrereqError: if the node is drained
 768
 769   """
 770   if lu.cfg.GetNodeInfo(node).drained:
 771     raise errors.OpPrereqError("Can't use drained node %s" % node,
 772                                errors.ECODE_INVAL)
 773
 774
 775 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 776   """Ensure that a node supports a given OS.
 777
 778   @param lu: the LU on behalf of which we make the check
 779   @param node: the node to check
 780   @param os_name: the OS to query about
 781   @param force_variant: whether to ignore variant errors
 782   @raise errors.OpPrereqError: if the node is not supporting the OS
 783
 784   """
 785   result = lu.rpc.call_os_get(node, os_name)
 786   result.Raise("OS '%s' not in supported OS list for node %s" %
 787                (os_name, node),
 788                prereq=True, ecode=errors.ECODE_INVAL)
 789   if not force_variant:
 790     _CheckOSVariant(result.payload, os_name)
 791
 792
 793 def _RequireFileStorage():
 794   """Checks that file storage is enabled.
 795
 796   @raise errors.OpPrereqError: when file storage is disabled
 797
 798   """
 799   if not constants.ENABLE_FILE_STORAGE:
 800     raise errors.OpPrereqError("File storage disabled at configure time",
 801                                errors.ECODE_INVAL)
 802
 803
 804 def _CheckDiskTemplate(template):
 805   """Ensure a given disk template is valid.
 806
 807   """
 808   if template not in constants.DISK_TEMPLATES:
 809     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 810            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 811     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 812   if template == constants.DT_FILE:
 813     _RequireFileStorage()
 814   return True
 815
 816
 817 def _CheckStorageType(storage_type):
 818   """Ensure a given storage type is valid.
 819
 820   """
 821   if storage_type not in constants.VALID_STORAGE_TYPES:
 822     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 823                                errors.ECODE_INVAL)
 824   if storage_type == constants.ST_FILE:
 825     _RequireFileStorage()
 826   return True
 827
 828
 829 def _GetClusterDomainSecret():
 830   """Reads the cluster domain secret.
 831
 832   """
 833   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 834                                strict=True)
 835
 836
 837 def _CheckInstanceDown(lu, instance, reason):
 838   """Ensure that an instance is not running."""
 839   if instance.admin_up:
 840     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 841                                (instance.name, reason), errors.ECODE_STATE)
 842
 843   pnode = instance.primary_node
 844   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 845   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 846               prereq=True, ecode=errors.ECODE_ENVIRON)
 847
 848   if instance.name in ins_l.payload:
 849     raise errors.OpPrereqError("Instance %s is running, %s" %
 850                                (instance.name, reason), errors.ECODE_STATE)
 851
 852
 853 def _ExpandItemName(fn, name, kind):
 854   """Expand an item name.
 855
 856   @param fn: the function to use for expansion
 857   @param name: requested item name
 858   @param kind: text description ('Node' or 'Instance')
 859   @return: the resolved (full) name
 860   @raise errors.OpPrereqError: if the item is not found
 861
 862   """
 863   full_name = fn(name)
 864   if full_name is None:
 865     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 866                                errors.ECODE_NOENT)
 867   return full_name
 868
 869
 870 def _ExpandNodeName(cfg, name):
 871   """Wrapper over L{_ExpandItemName} for nodes."""
 872   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 873
 874
 875 def _ExpandInstanceName(cfg, name):
 876   """Wrapper over L{_ExpandItemName} for instance."""
 877   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 878
 879
 880 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 881                           memory, vcpus, nics, disk_template, disks,
 882                           bep, hvp, hypervisor_name):
 883   """Builds instance related env variables for hooks
 884
 885   This builds the hook environment from individual variables.
 886
 887   @type name: string
 888   @param name: the name of the instance
 889   @type primary_node: string
 890   @param primary_node: the name of the instance's primary node
 891   @type secondary_nodes: list
 892   @param secondary_nodes: list of secondary nodes as strings
 893   @type os_type: string
 894   @param os_type: the name of the instance's OS
 895   @type status: boolean
 896   @param status: the should_run status of the instance
 897   @type memory: string
 898   @param memory: the memory size of the instance
 899   @type vcpus: string
 900   @param vcpus: the count of VCPUs the instance has
 901   @type nics: list
 902   @param nics: list of tuples (ip, mac, mode, link) representing
 903       the NICs the instance has
 904   @type disk_template: string
 905   @param disk_template: the disk template of the instance
 906   @type disks: list
 907   @param disks: the list of (size, mode) pairs
 908   @type bep: dict
 909   @param bep: the backend parameters for the instance
 910   @type hvp: dict
 911   @param hvp: the hypervisor parameters for the instance
 912   @type hypervisor_name: string
 913   @param hypervisor_name: the hypervisor for the instance
 914   @rtype: dict
 915   @return: the hook environment for this instance
 916
 917   """
 918   if status:
 919     str_status = "up"
 920   else:
 921     str_status = "down"
 922   env = {
 923     "OP_TARGET": name,
 924     "INSTANCE_NAME": name,
 925     "INSTANCE_PRIMARY": primary_node,
 926     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 927     "INSTANCE_OS_TYPE": os_type,
 928     "INSTANCE_STATUS": str_status,
 929     "INSTANCE_MEMORY": memory,
 930     "INSTANCE_VCPUS": vcpus,
 931     "INSTANCE_DISK_TEMPLATE": disk_template,
 932     "INSTANCE_HYPERVISOR": hypervisor_name,
 933   }
 934
 935   if nics:
 936     nic_count = len(nics)
 937     for idx, (ip, mac, mode, link) in enumerate(nics):
 938       if ip is None:
 939         ip = ""
 940       env["INSTANCE_NIC%d_IP" % idx] = ip
 941       env["INSTANCE_NIC%d_MAC" % idx] = mac
 942       env["INSTANCE_NIC%d_MODE" % idx] = mode
 943       env["INSTANCE_NIC%d_LINK" % idx] = link
 944       if mode == constants.NIC_MODE_BRIDGED:
 945         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 946   else:
 947     nic_count = 0
 948
 949   env["INSTANCE_NIC_COUNT"] = nic_count
 950
 951   if disks:
 952     disk_count = len(disks)
 953     for idx, (size, mode) in enumerate(disks):
 954       env["INSTANCE_DISK%d_SIZE" % idx] = size
 955       env["INSTANCE_DISK%d_MODE" % idx] = mode
 956   else:
 957     disk_count = 0
 958
 959   env["INSTANCE_DISK_COUNT"] = disk_count
 960
 961   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 962     for key, value in source.items():
 963       env["INSTANCE_%s_%s" % (kind, key)] = value
 964
 965   return env
 966
 967
 968 def _NICListToTuple(lu, nics):
 969   """Build a list of nic information tuples.
 970
 971   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 972   value in LUQueryInstanceData.
 973
 974   @type lu:  L{LogicalUnit}
 975   @param lu: the logical unit on whose behalf we execute
 976   @type nics: list of L{objects.NIC}
 977   @param nics: list of nics to convert to hooks tuples
 978
 979   """
 980   hooks_nics = []
 981   cluster = lu.cfg.GetClusterInfo()
 982   for nic in nics:
 983     ip = nic.ip
 984     mac = nic.mac
 985     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 986     mode = filled_params[constants.NIC_MODE]
 987     link = filled_params[constants.NIC_LINK]
 988     hooks_nics.append((ip, mac, mode, link))
 989   return hooks_nics
 990
 991
 992 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 993   """Builds instance related env variables for hooks from an object.
 994
 995   @type lu: L{LogicalUnit}
 996   @param lu: the logical unit on whose behalf we execute
 997   @type instance: L{objects.Instance}
 998   @param instance: the instance for which we should build the
 999       environment
1000   @type override: dict
1001   @param override: dictionary with key/values that will override
1002       our values
1003   @rtype: dict
1004   @return: the hook environment dictionary
1005
1006   """
1007   cluster = lu.cfg.GetClusterInfo()
1008   bep = cluster.FillBE(instance)
1009   hvp = cluster.FillHV(instance)
1010   args = {
1011     'name': instance.name,
1012     'primary_node': instance.primary_node,
1013     'secondary_nodes': instance.secondary_nodes,
1014     'os_type': instance.os,
1015     'status': instance.admin_up,
1016     'memory': bep[constants.BE_MEMORY],
1017     'vcpus': bep[constants.BE_VCPUS],
1018     'nics': _NICListToTuple(lu, instance.nics),
1019     'disk_template': instance.disk_template,
1020     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1021     'bep': bep,
1022     'hvp': hvp,
1023     'hypervisor_name': instance.hypervisor,
1024   }
1025   if override:
1026     args.update(override)
1027   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1028
1029
1030 def _AdjustCandidatePool(lu, exceptions):
1031   """Adjust the candidate pool after node operations.
1032
1033   """
1034   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1035   if mod_list:
1036     lu.LogInfo("Promoted nodes to master candidate role: %s",
1037                utils.CommaJoin(node.name for node in mod_list))
1038     for name in mod_list:
1039       lu.context.ReaddNode(name)
1040   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1041   if mc_now > mc_max:
1042     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1043                (mc_now, mc_max))
1044
1045
1046 def _DecideSelfPromotion(lu, exceptions=None):
1047   """Decide whether I should promote myself as a master candidate.
1048
1049   """
1050   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1051   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1052   # the new node will increase mc_max with one, so:
1053   mc_should = min(mc_should + 1, cp_size)
1054   return mc_now < mc_should
1055
1056
1057 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1058   """Check that the brigdes needed by a list of nics exist.
1059
1060   """
1061   cluster = lu.cfg.GetClusterInfo()
1062   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1063   brlist = [params[constants.NIC_LINK] for params in paramslist
1064             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1065   if brlist:
1066     result = lu.rpc.call_bridges_exist(target_node, brlist)
1067     result.Raise("Error checking bridges on destination node '%s'" %
1068                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1069
1070
1071 def _CheckInstanceBridgesExist(lu, instance, node=None):
1072   """Check that the brigdes needed by an instance exist.
1073
1074   """
1075   if node is None:
1076     node = instance.primary_node
1077   _CheckNicsBridgesExist(lu, instance.nics, node)
1078
1079
1080 def _CheckOSVariant(os_obj, name):
1081   """Check whether an OS name conforms to the os variants specification.
1082
1083   @type os_obj: L{objects.OS}
1084   @param os_obj: OS object to check
1085   @type name: string
1086   @param name: OS name passed by the user, to check for validity
1087
1088   """
1089   if not os_obj.supported_variants:
1090     return
1091   try:
1092     variant = name.split("+", 1)[1]
1093   except IndexError:
1094     raise errors.OpPrereqError("OS name must include a variant",
1095                                errors.ECODE_INVAL)
1096
1097   if variant not in os_obj.supported_variants:
1098     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1099
1100
1101 def _GetNodeInstancesInner(cfg, fn):
1102   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1103
1104
1105 def _GetNodeInstances(cfg, node_name):
1106   """Returns a list of all primary and secondary instances on a node.
1107
1108   """
1109
1110   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1111
1112
1113 def _GetNodePrimaryInstances(cfg, node_name):
1114   """Returns primary instances on a node.
1115
1116   """
1117   return _GetNodeInstancesInner(cfg,
1118                                 lambda inst: node_name == inst.primary_node)
1119
1120
1121 def _GetNodeSecondaryInstances(cfg, node_name):
1122   """Returns secondary instances on a node.
1123
1124   """
1125   return _GetNodeInstancesInner(cfg,
1126                                 lambda inst: node_name in inst.secondary_nodes)
1127
1128
1129 def _GetStorageTypeArgs(cfg, storage_type):
1130   """Returns the arguments for a storage type.
1131
1132   """
1133   # Special case for file storage
1134   if storage_type == constants.ST_FILE:
1135     # storage.FileStorage wants a list of storage directories
1136     return [[cfg.GetFileStorageDir()]]
1137
1138   return []
1139
1140
1141 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1142   faulty = []
1143
1144   for dev in instance.disks:
1145     cfg.SetDiskID(dev, node_name)
1146
1147   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1148   result.Raise("Failed to get disk status from node %s" % node_name,
1149                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1150
1151   for idx, bdev_status in enumerate(result.payload):
1152     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1153       faulty.append(idx)
1154
1155   return faulty
1156
1157
1158 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1159   """Check the sanity of iallocator and node arguments and use the
1160   cluster-wide iallocator if appropriate.
1161
1162   Check that at most one of (iallocator, node) is specified. If none is
1163   specified, then the LU's opcode's iallocator slot is filled with the
1164   cluster-wide default iallocator.
1165
1166   @type iallocator_slot: string
1167   @param iallocator_slot: the name of the opcode iallocator slot
1168   @type node_slot: string
1169   @param node_slot: the name of the opcode target node slot
1170
1171   """
1172   node = getattr(lu.op, node_slot, None)
1173   iallocator = getattr(lu.op, iallocator_slot, None)
1174
1175   if node is not None and iallocator is not None:
1176     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1177                                errors.ECODE_INVAL)
1178   elif node is None and iallocator is None:
1179     default_iallocator = lu.cfg.GetDefaultIAllocator()
1180     if default_iallocator:
1181       setattr(lu.op, iallocator_slot, default_iallocator)
1182     else:
1183       raise errors.OpPrereqError("No iallocator or node given and no"
1184                                  " cluster-wide default iallocator found."
1185                                  " Please specify either an iallocator or a"
1186                                  " node, or set a cluster-wide default"
1187                                  " iallocator.")
1188
1189
1190 class LUPostInitCluster(LogicalUnit):
1191   """Logical unit for running hooks after cluster initialization.
1192
1193   """
1194   HPATH = "cluster-init"
1195   HTYPE = constants.HTYPE_CLUSTER
1196
1197   def BuildHooksEnv(self):
1198     """Build hooks env.
1199
1200     """
1201     env = {"OP_TARGET": self.cfg.GetClusterName()}
1202     mn = self.cfg.GetMasterNode()
1203     return env, [], [mn]
1204
1205   def Exec(self, feedback_fn):
1206     """Nothing to do.
1207
1208     """
1209     return True
1210
1211
1212 class LUDestroyCluster(LogicalUnit):
1213   """Logical unit for destroying the cluster.
1214
1215   """
1216   HPATH = "cluster-destroy"
1217   HTYPE = constants.HTYPE_CLUSTER
1218
1219   def BuildHooksEnv(self):
1220     """Build hooks env.
1221
1222     """
1223     env = {"OP_TARGET": self.cfg.GetClusterName()}
1224     return env, [], []
1225
1226   def CheckPrereq(self):
1227     """Check prerequisites.
1228
1229     This checks whether the cluster is empty.
1230
1231     Any errors are signaled by raising errors.OpPrereqError.
1232
1233     """
1234     master = self.cfg.GetMasterNode()
1235
1236     nodelist = self.cfg.GetNodeList()
1237     if len(nodelist) != 1 or nodelist[0] != master:
1238       raise errors.OpPrereqError("There are still %d node(s) in"
1239                                  " this cluster." % (len(nodelist) - 1),
1240                                  errors.ECODE_INVAL)
1241     instancelist = self.cfg.GetInstanceList()
1242     if instancelist:
1243       raise errors.OpPrereqError("There are still %d instance(s) in"
1244                                  " this cluster." % len(instancelist),
1245                                  errors.ECODE_INVAL)
1246
1247   def Exec(self, feedback_fn):
1248     """Destroys the cluster.
1249
1250     """
1251     master = self.cfg.GetMasterNode()
1252
1253     # Run post hooks on master node before it's removed
1254     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1255     try:
1256       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1257     except:
1258       # pylint: disable-msg=W0702
1259       self.LogWarning("Errors occurred running hooks on %s" % master)
1260
1261     result = self.rpc.call_node_stop_master(master, False)
1262     result.Raise("Could not disable the master role")
1263
1264     return master
1265
1266
1267 def _VerifyCertificate(filename):
1268   """Verifies a certificate for LUVerifyCluster.
1269
1270   @type filename: string
1271   @param filename: Path to PEM file
1272
1273   """
1274   try:
1275     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1276                                            utils.ReadFile(filename))
1277   except Exception, err: # pylint: disable-msg=W0703
1278     return (LUVerifyCluster.ETYPE_ERROR,
1279             "Failed to load X509 certificate %s: %s" % (filename, err))
1280
1281   (errcode, msg) = \
1282     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1283                                 constants.SSL_CERT_EXPIRATION_ERROR)
1284
1285   if msg:
1286     fnamemsg = "While verifying %s: %s" % (filename, msg)
1287   else:
1288     fnamemsg = None
1289
1290   if errcode is None:
1291     return (None, fnamemsg)
1292   elif errcode == utils.CERT_WARNING:
1293     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1294   elif errcode == utils.CERT_ERROR:
1295     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1296
1297   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1298
1299
1300 class LUVerifyCluster(LogicalUnit):
1301   """Verifies the cluster status.
1302
1303   """
1304   HPATH = "cluster-verify"
1305   HTYPE = constants.HTYPE_CLUSTER
1306   _OP_PARAMS = [
1307     ("skip_checks", _EmptyList,
1308      _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1309     ("verbose", False, _TBool),
1310     ("error_codes", False, _TBool),
1311     ("debug_simulate_errors", False, _TBool),
1312     ]
1313   REQ_BGL = False
1314
1315   TCLUSTER = "cluster"
1316   TNODE = "node"
1317   TINSTANCE = "instance"
1318
1319   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1320   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1321   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1322   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1323   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1324   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1325   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1326   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1327   ENODEDRBD = (TNODE, "ENODEDRBD")
1328   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1329   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1330   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1331   ENODEHV = (TNODE, "ENODEHV")
1332   ENODELVM = (TNODE, "ENODELVM")
1333   ENODEN1 = (TNODE, "ENODEN1")
1334   ENODENET = (TNODE, "ENODENET")
1335   ENODEOS = (TNODE, "ENODEOS")
1336   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1337   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1338   ENODERPC = (TNODE, "ENODERPC")
1339   ENODESSH = (TNODE, "ENODESSH")
1340   ENODEVERSION = (TNODE, "ENODEVERSION")
1341   ENODESETUP = (TNODE, "ENODESETUP")
1342   ENODETIME = (TNODE, "ENODETIME")
1343
1344   ETYPE_FIELD = "code"
1345   ETYPE_ERROR = "ERROR"
1346   ETYPE_WARNING = "WARNING"
1347
1348   class NodeImage(object):
1349     """A class representing the logical and physical status of a node.
1350
1351     @type name: string
1352     @ivar name: the node name to which this object refers
1353     @ivar volumes: a structure as returned from
1354         L{ganeti.backend.GetVolumeList} (runtime)
1355     @ivar instances: a list of running instances (runtime)
1356     @ivar pinst: list of configured primary instances (config)
1357     @ivar sinst: list of configured secondary instances (config)
1358     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1359         of this node (config)
1360     @ivar mfree: free memory, as reported by hypervisor (runtime)
1361     @ivar dfree: free disk, as reported by the node (runtime)
1362     @ivar offline: the offline status (config)
1363     @type rpc_fail: boolean
1364     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1365         not whether the individual keys were correct) (runtime)
1366     @type lvm_fail: boolean
1367     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1368     @type hyp_fail: boolean
1369     @ivar hyp_fail: whether the RPC call didn't return the instance list
1370     @type ghost: boolean
1371     @ivar ghost: whether this is a known node or not (config)
1372     @type os_fail: boolean
1373     @ivar os_fail: whether the RPC call didn't return valid OS data
1374     @type oslist: list
1375     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1376
1377     """
1378     def __init__(self, offline=False, name=None):
1379       self.name = name
1380       self.volumes = {}
1381       self.instances = []
1382       self.pinst = []
1383       self.sinst = []
1384       self.sbp = {}
1385       self.mfree = 0
1386       self.dfree = 0
1387       self.offline = offline
1388       self.rpc_fail = False
1389       self.lvm_fail = False
1390       self.hyp_fail = False
1391       self.ghost = False
1392       self.os_fail = False
1393       self.oslist = {}
1394
1395   def ExpandNames(self):
1396     self.needed_locks = {
1397       locking.LEVEL_NODE: locking.ALL_SET,
1398       locking.LEVEL_INSTANCE: locking.ALL_SET,
1399     }
1400     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1401
1402   def _Error(self, ecode, item, msg, *args, **kwargs):
1403     """Format an error message.
1404
1405     Based on the opcode's error_codes parameter, either format a
1406     parseable error code, or a simpler error string.
1407
1408     This must be called only from Exec and functions called from Exec.
1409
1410     """
1411     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1412     itype, etxt = ecode
1413     # first complete the msg
1414     if args:
1415       msg = msg % args
1416     # then format the whole message
1417     if self.op.error_codes:
1418       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1419     else:
1420       if item:
1421         item = " " + item
1422       else:
1423         item = ""
1424       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1425     # and finally report it via the feedback_fn
1426     self._feedback_fn("  - %s" % msg)
1427
1428   def _ErrorIf(self, cond, *args, **kwargs):
1429     """Log an error message if the passed condition is True.
1430
1431     """
1432     cond = bool(cond) or self.op.debug_simulate_errors
1433     if cond:
1434       self._Error(*args, **kwargs)
1435     # do not mark the operation as failed for WARN cases only
1436     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1437       self.bad = self.bad or cond
1438
1439   def _VerifyNode(self, ninfo, nresult):
1440     """Perform some basic validation on data returned from a node.
1441
1442       - check the result data structure is well formed and has all the
1443         mandatory fields
1444       - check ganeti version
1445
1446     @type ninfo: L{objects.Node}
1447     @param ninfo: the node to check
1448     @param nresult: the results from the node
1449     @rtype: boolean
1450     @return: whether overall this call was successful (and we can expect
1451          reasonable values in the respose)
1452
1453     """
1454     node = ninfo.name
1455     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1456
1457     # main result, nresult should be a non-empty dict
1458     test = not nresult or not isinstance(nresult, dict)
1459     _ErrorIf(test, self.ENODERPC, node,
1460                   "unable to verify node: no data returned")
1461     if test:
1462       return False
1463
1464     # compares ganeti version
1465     local_version = constants.PROTOCOL_VERSION
1466     remote_version = nresult.get("version", None)
1467     test = not (remote_version and
1468                 isinstance(remote_version, (list, tuple)) and
1469                 len(remote_version) == 2)
1470     _ErrorIf(test, self.ENODERPC, node,
1471              "connection to node returned invalid data")
1472     if test:
1473       return False
1474
1475     test = local_version != remote_version[0]
1476     _ErrorIf(test, self.ENODEVERSION, node,
1477              "incompatible protocol versions: master %s,"
1478              " node %s", local_version, remote_version[0])
1479     if test:
1480       return False
1481
1482     # node seems compatible, we can actually try to look into its results
1483
1484     # full package version
1485     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1486                   self.ENODEVERSION, node,
1487                   "software version mismatch: master %s, node %s",
1488                   constants.RELEASE_VERSION, remote_version[1],
1489                   code=self.ETYPE_WARNING)
1490
1491     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1492     if isinstance(hyp_result, dict):
1493       for hv_name, hv_result in hyp_result.iteritems():
1494         test = hv_result is not None
1495         _ErrorIf(test, self.ENODEHV, node,
1496                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1497
1498
1499     test = nresult.get(constants.NV_NODESETUP,
1500                            ["Missing NODESETUP results"])
1501     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1502              "; ".join(test))
1503
1504     return True
1505
1506   def _VerifyNodeTime(self, ninfo, nresult,
1507                       nvinfo_starttime, nvinfo_endtime):
1508     """Check the node time.
1509
1510     @type ninfo: L{objects.Node}
1511     @param ninfo: the node to check
1512     @param nresult: the remote results for the node
1513     @param nvinfo_starttime: the start time of the RPC call
1514     @param nvinfo_endtime: the end time of the RPC call
1515
1516     """
1517     node = ninfo.name
1518     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1519
1520     ntime = nresult.get(constants.NV_TIME, None)
1521     try:
1522       ntime_merged = utils.MergeTime(ntime)
1523     except (ValueError, TypeError):
1524       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1525       return
1526
1527     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1528       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1529     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1530       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1531     else:
1532       ntime_diff = None
1533
1534     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1535              "Node time diverges by at least %s from master node time",
1536              ntime_diff)
1537
1538   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1539     """Check the node time.
1540
1541     @type ninfo: L{objects.Node}
1542     @param ninfo: the node to check
1543     @param nresult: the remote results for the node
1544     @param vg_name: the configured VG name
1545
1546     """
1547     if vg_name is None:
1548       return
1549
1550     node = ninfo.name
1551     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1552
1553     # checks vg existence and size > 20G
1554     vglist = nresult.get(constants.NV_VGLIST, None)
1555     test = not vglist
1556     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1557     if not test:
1558       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1559                                             constants.MIN_VG_SIZE)
1560       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1561
1562     # check pv names
1563     pvlist = nresult.get(constants.NV_PVLIST, None)
1564     test = pvlist is None
1565     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1566     if not test:
1567       # check that ':' is not present in PV names, since it's a
1568       # special character for lvcreate (denotes the range of PEs to
1569       # use on the PV)
1570       for _, pvname, owner_vg in pvlist:
1571         test = ":" in pvname
1572         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1573                  " '%s' of VG '%s'", pvname, owner_vg)
1574
1575   def _VerifyNodeNetwork(self, ninfo, nresult):
1576     """Check the node time.
1577
1578     @type ninfo: L{objects.Node}
1579     @param ninfo: the node to check
1580     @param nresult: the remote results for the node
1581
1582     """
1583     node = ninfo.name
1584     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1585
1586     test = constants.NV_NODELIST not in nresult
1587     _ErrorIf(test, self.ENODESSH, node,
1588              "node hasn't returned node ssh connectivity data")
1589     if not test:
1590       if nresult[constants.NV_NODELIST]:
1591         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1592           _ErrorIf(True, self.ENODESSH, node,
1593                    "ssh communication with node '%s': %s", a_node, a_msg)
1594
1595     test = constants.NV_NODENETTEST not in nresult
1596     _ErrorIf(test, self.ENODENET, node,
1597              "node hasn't returned node tcp connectivity data")
1598     if not test:
1599       if nresult[constants.NV_NODENETTEST]:
1600         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1601         for anode in nlist:
1602           _ErrorIf(True, self.ENODENET, node,
1603                    "tcp communication with node '%s': %s",
1604                    anode, nresult[constants.NV_NODENETTEST][anode])
1605
1606     test = constants.NV_MASTERIP not in nresult
1607     _ErrorIf(test, self.ENODENET, node,
1608              "node hasn't returned node master IP reachability data")
1609     if not test:
1610       if not nresult[constants.NV_MASTERIP]:
1611         if node == self.master_node:
1612           msg = "the master node cannot reach the master IP (not configured?)"
1613         else:
1614           msg = "cannot reach the master IP"
1615         _ErrorIf(True, self.ENODENET, node, msg)
1616
1617
1618   def _VerifyInstance(self, instance, instanceconfig, node_image):
1619     """Verify an instance.
1620
1621     This function checks to see if the required block devices are
1622     available on the instance's node.
1623
1624     """
1625     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1626     node_current = instanceconfig.primary_node
1627
1628     node_vol_should = {}
1629     instanceconfig.MapLVsByNode(node_vol_should)
1630
1631     for node in node_vol_should:
1632       n_img = node_image[node]
1633       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1634         # ignore missing volumes on offline or broken nodes
1635         continue
1636       for volume in node_vol_should[node]:
1637         test = volume not in n_img.volumes
1638         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1639                  "volume %s missing on node %s", volume, node)
1640
1641     if instanceconfig.admin_up:
1642       pri_img = node_image[node_current]
1643       test = instance not in pri_img.instances and not pri_img.offline
1644       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1645                "instance not running on its primary node %s",
1646                node_current)
1647
1648     for node, n_img in node_image.items():
1649       if (not node == node_current):
1650         test = instance in n_img.instances
1651         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1652                  "instance should not run on node %s", node)
1653
1654   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1655     """Verify if there are any unknown volumes in the cluster.
1656
1657     The .os, .swap and backup volumes are ignored. All other volumes are
1658     reported as unknown.
1659
1660     @type reserved: L{ganeti.utils.FieldSet}
1661     @param reserved: a FieldSet of reserved volume names
1662
1663     """
1664     for node, n_img in node_image.items():
1665       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1666         # skip non-healthy nodes
1667         continue
1668       for volume in n_img.volumes:
1669         test = ((node not in node_vol_should or
1670                 volume not in node_vol_should[node]) and
1671                 not reserved.Matches(volume))
1672         self._ErrorIf(test, self.ENODEORPHANLV, node,
1673                       "volume %s is unknown", volume)
1674
1675   def _VerifyOrphanInstances(self, instancelist, node_image):
1676     """Verify the list of running instances.
1677
1678     This checks what instances are running but unknown to the cluster.
1679
1680     """
1681     for node, n_img in node_image.items():
1682       for o_inst in n_img.instances:
1683         test = o_inst not in instancelist
1684         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1685                       "instance %s on node %s should not exist", o_inst, node)
1686
1687   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1688     """Verify N+1 Memory Resilience.
1689
1690     Check that if one single node dies we can still start all the
1691     instances it was primary for.
1692
1693     """
1694     for node, n_img in node_image.items():
1695       # This code checks that every node which is now listed as
1696       # secondary has enough memory to host all instances it is
1697       # supposed to should a single other node in the cluster fail.
1698       # FIXME: not ready for failover to an arbitrary node
1699       # FIXME: does not support file-backed instances
1700       # WARNING: we currently take into account down instances as well
1701       # as up ones, considering that even if they're down someone
1702       # might want to start them even in the event of a node failure.
1703       for prinode, instances in n_img.sbp.items():
1704         needed_mem = 0
1705         for instance in instances:
1706           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1707           if bep[constants.BE_AUTO_BALANCE]:
1708             needed_mem += bep[constants.BE_MEMORY]
1709         test = n_img.mfree < needed_mem
1710         self._ErrorIf(test, self.ENODEN1, node,
1711                       "not enough memory on to accommodate"
1712                       " failovers should peer node %s fail", prinode)
1713
1714   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1715                        master_files):
1716     """Verifies and computes the node required file checksums.
1717
1718     @type ninfo: L{objects.Node}
1719     @param ninfo: the node to check
1720     @param nresult: the remote results for the node
1721     @param file_list: required list of files
1722     @param local_cksum: dictionary of local files and their checksums
1723     @param master_files: list of files that only masters should have
1724
1725     """
1726     node = ninfo.name
1727     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1728
1729     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1730     test = not isinstance(remote_cksum, dict)
1731     _ErrorIf(test, self.ENODEFILECHECK, node,
1732              "node hasn't returned file checksum data")
1733     if test:
1734       return
1735
1736     for file_name in file_list:
1737       node_is_mc = ninfo.master_candidate
1738       must_have = (file_name not in master_files) or node_is_mc
1739       # missing
1740       test1 = file_name not in remote_cksum
1741       # invalid checksum
1742       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1743       # existing and good
1744       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1745       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1746                "file '%s' missing", file_name)
1747       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1748                "file '%s' has wrong checksum", file_name)
1749       # not candidate and this is not a must-have file
1750       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1751                "file '%s' should not exist on non master"
1752                " candidates (and the file is outdated)", file_name)
1753       # all good, except non-master/non-must have combination
1754       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1755                "file '%s' should not exist"
1756                " on non master candidates", file_name)
1757
1758   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1759                       drbd_map):
1760     """Verifies and the node DRBD status.
1761
1762     @type ninfo: L{objects.Node}
1763     @param ninfo: the node to check
1764     @param nresult: the remote results for the node
1765     @param instanceinfo: the dict of instances
1766     @param drbd_helper: the configured DRBD usermode helper
1767     @param drbd_map: the DRBD map as returned by
1768         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1769
1770     """
1771     node = ninfo.name
1772     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1773
1774     if drbd_helper:
1775       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1776       test = (helper_result == None)
1777       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1778                "no drbd usermode helper returned")
1779       if helper_result:
1780         status, payload = helper_result
1781         test = not status
1782         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1783                  "drbd usermode helper check unsuccessful: %s", payload)
1784         test = status and (payload != drbd_helper)
1785         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1786                  "wrong drbd usermode helper: %s", payload)
1787
1788     # compute the DRBD minors
1789     node_drbd = {}
1790     for minor, instance in drbd_map[node].items():
1791       test = instance not in instanceinfo
1792       _ErrorIf(test, self.ECLUSTERCFG, None,
1793                "ghost instance '%s' in temporary DRBD map", instance)
1794         # ghost instance should not be running, but otherwise we
1795         # don't give double warnings (both ghost instance and
1796         # unallocated minor in use)
1797       if test:
1798         node_drbd[minor] = (instance, False)
1799       else:
1800         instance = instanceinfo[instance]
1801         node_drbd[minor] = (instance.name, instance.admin_up)
1802
1803     # and now check them
1804     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1805     test = not isinstance(used_minors, (tuple, list))
1806     _ErrorIf(test, self.ENODEDRBD, node,
1807              "cannot parse drbd status file: %s", str(used_minors))
1808     if test:
1809       # we cannot check drbd status
1810       return
1811
1812     for minor, (iname, must_exist) in node_drbd.items():
1813       test = minor not in used_minors and must_exist
1814       _ErrorIf(test, self.ENODEDRBD, node,
1815                "drbd minor %d of instance %s is not active", minor, iname)
1816     for minor in used_minors:
1817       test = minor not in node_drbd
1818       _ErrorIf(test, self.ENODEDRBD, node,
1819                "unallocated drbd minor %d is in use", minor)
1820
1821   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1822     """Builds the node OS structures.
1823
1824     @type ninfo: L{objects.Node}
1825     @param ninfo: the node to check
1826     @param nresult: the remote results for the node
1827     @param nimg: the node image object
1828
1829     """
1830     node = ninfo.name
1831     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1832
1833     remote_os = nresult.get(constants.NV_OSLIST, None)
1834     test = (not isinstance(remote_os, list) or
1835             not compat.all(isinstance(v, list) and len(v) == 7
1836                            for v in remote_os))
1837
1838     _ErrorIf(test, self.ENODEOS, node,
1839              "node hasn't returned valid OS data")
1840
1841     nimg.os_fail = test
1842
1843     if test:
1844       return
1845
1846     os_dict = {}
1847
1848     for (name, os_path, status, diagnose,
1849          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1850
1851       if name not in os_dict:
1852         os_dict[name] = []
1853
1854       # parameters is a list of lists instead of list of tuples due to
1855       # JSON lacking a real tuple type, fix it:
1856       parameters = [tuple(v) for v in parameters]
1857       os_dict[name].append((os_path, status, diagnose,
1858                             set(variants), set(parameters), set(api_ver)))
1859
1860     nimg.oslist = os_dict
1861
1862   def _VerifyNodeOS(self, ninfo, nimg, base):
1863     """Verifies the node OS list.
1864
1865     @type ninfo: L{objects.Node}
1866     @param ninfo: the node to check
1867     @param nimg: the node image object
1868     @param base: the 'template' node we match against (e.g. from the master)
1869
1870     """
1871     node = ninfo.name
1872     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1873
1874     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1875
1876     for os_name, os_data in nimg.oslist.items():
1877       assert os_data, "Empty OS status for OS %s?!" % os_name
1878       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1879       _ErrorIf(not f_status, self.ENODEOS, node,
1880                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1881       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1882                "OS '%s' has multiple entries (first one shadows the rest): %s",
1883                os_name, utils.CommaJoin([v[0] for v in os_data]))
1884       # this will catched in backend too
1885       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1886                and not f_var, self.ENODEOS, node,
1887                "OS %s with API at least %d does not declare any variant",
1888                os_name, constants.OS_API_V15)
1889       # comparisons with the 'base' image
1890       test = os_name not in base.oslist
1891       _ErrorIf(test, self.ENODEOS, node,
1892                "Extra OS %s not present on reference node (%s)",
1893                os_name, base.name)
1894       if test:
1895         continue
1896       assert base.oslist[os_name], "Base node has empty OS status?"
1897       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1898       if not b_status:
1899         # base OS is invalid, skipping
1900         continue
1901       for kind, a, b in [("API version", f_api, b_api),
1902                          ("variants list", f_var, b_var),
1903                          ("parameters", f_param, b_param)]:
1904         _ErrorIf(a != b, self.ENODEOS, node,
1905                  "OS %s %s differs from reference node %s: %s vs. %s",
1906                  kind, os_name, base.name,
1907                  utils.CommaJoin(a), utils.CommaJoin(b))
1908
1909     # check any missing OSes
1910     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1911     _ErrorIf(missing, self.ENODEOS, node,
1912              "OSes present on reference node %s but missing on this node: %s",
1913              base.name, utils.CommaJoin(missing))
1914
1915   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1916     """Verifies and updates the node volume data.
1917
1918     This function will update a L{NodeImage}'s internal structures
1919     with data from the remote call.
1920
1921     @type ninfo: L{objects.Node}
1922     @param ninfo: the node to check
1923     @param nresult: the remote results for the node
1924     @param nimg: the node image object
1925     @param vg_name: the configured VG name
1926
1927     """
1928     node = ninfo.name
1929     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1930
1931     nimg.lvm_fail = True
1932     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1933     if vg_name is None:
1934       pass
1935     elif isinstance(lvdata, basestring):
1936       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1937                utils.SafeEncode(lvdata))
1938     elif not isinstance(lvdata, dict):
1939       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1940     else:
1941       nimg.volumes = lvdata
1942       nimg.lvm_fail = False
1943
1944   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1945     """Verifies and updates the node instance list.
1946
1947     If the listing was successful, then updates this node's instance
1948     list. Otherwise, it marks the RPC call as failed for the instance
1949     list key.
1950
1951     @type ninfo: L{objects.Node}
1952     @param ninfo: the node to check
1953     @param nresult: the remote results for the node
1954     @param nimg: the node image object
1955
1956     """
1957     idata = nresult.get(constants.NV_INSTANCELIST, None)
1958     test = not isinstance(idata, list)
1959     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1960                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1961     if test:
1962       nimg.hyp_fail = True
1963     else:
1964       nimg.instances = idata
1965
1966   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1967     """Verifies and computes a node information map
1968
1969     @type ninfo: L{objects.Node}
1970     @param ninfo: the node to check
1971     @param nresult: the remote results for the node
1972     @param nimg: the node image object
1973     @param vg_name: the configured VG name
1974
1975     """
1976     node = ninfo.name
1977     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1978
1979     # try to read free memory (from the hypervisor)
1980     hv_info = nresult.get(constants.NV_HVINFO, None)
1981     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1982     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1983     if not test:
1984       try:
1985         nimg.mfree = int(hv_info["memory_free"])
1986       except (ValueError, TypeError):
1987         _ErrorIf(True, self.ENODERPC, node,
1988                  "node returned invalid nodeinfo, check hypervisor")
1989
1990     # FIXME: devise a free space model for file based instances as well
1991     if vg_name is not None:
1992       test = (constants.NV_VGLIST not in nresult or
1993               vg_name not in nresult[constants.NV_VGLIST])
1994       _ErrorIf(test, self.ENODELVM, node,
1995                "node didn't return data for the volume group '%s'"
1996                " - it is either missing or broken", vg_name)
1997       if not test:
1998         try:
1999           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2000         except (ValueError, TypeError):
2001           _ErrorIf(True, self.ENODERPC, node,
2002                    "node returned invalid LVM info, check LVM status")
2003
2004   def BuildHooksEnv(self):
2005     """Build hooks env.
2006
2007     Cluster-Verify hooks just ran in the post phase and their failure makes
2008     the output be logged in the verify output and the verification to fail.
2009
2010     """
2011     all_nodes = self.cfg.GetNodeList()
2012     env = {
2013       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2014       }
2015     for node in self.cfg.GetAllNodesInfo().values():
2016       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2017
2018     return env, [], all_nodes
2019
2020   def Exec(self, feedback_fn):
2021     """Verify integrity of cluster, performing various test on nodes.
2022
2023     """
2024     self.bad = False
2025     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2026     verbose = self.op.verbose
2027     self._feedback_fn = feedback_fn
2028     feedback_fn("* Verifying global settings")
2029     for msg in self.cfg.VerifyConfig():
2030       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2031
2032     # Check the cluster certificates
2033     for cert_filename in constants.ALL_CERT_FILES:
2034       (errcode, msg) = _VerifyCertificate(cert_filename)
2035       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2036
2037     vg_name = self.cfg.GetVGName()
2038     drbd_helper = self.cfg.GetDRBDHelper()
2039     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2040     cluster = self.cfg.GetClusterInfo()
2041     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2042     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2043     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2044     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2045                         for iname in instancelist)
2046     i_non_redundant = [] # Non redundant instances
2047     i_non_a_balanced = [] # Non auto-balanced instances
2048     n_offline = 0 # Count of offline nodes
2049     n_drained = 0 # Count of nodes being drained
2050     node_vol_should = {}
2051
2052     # FIXME: verify OS list
2053     # do local checksums
2054     master_files = [constants.CLUSTER_CONF_FILE]
2055     master_node = self.master_node = self.cfg.GetMasterNode()
2056     master_ip = self.cfg.GetMasterIP()
2057
2058     file_names = ssconf.SimpleStore().GetFileList()
2059     file_names.extend(constants.ALL_CERT_FILES)
2060     file_names.extend(master_files)
2061     if cluster.modify_etc_hosts:
2062       file_names.append(constants.ETC_HOSTS)
2063
2064     local_checksums = utils.FingerprintFiles(file_names)
2065
2066     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2067     node_verify_param = {
2068       constants.NV_FILELIST: file_names,
2069       constants.NV_NODELIST: [node.name for node in nodeinfo
2070                               if not node.offline],
2071       constants.NV_HYPERVISOR: hypervisors,
2072       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2073                                   node.secondary_ip) for node in nodeinfo
2074                                  if not node.offline],
2075       constants.NV_INSTANCELIST: hypervisors,
2076       constants.NV_VERSION: None,
2077       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2078       constants.NV_NODESETUP: None,
2079       constants.NV_TIME: None,
2080       constants.NV_MASTERIP: (master_node, master_ip),
2081       constants.NV_OSLIST: None,
2082       }
2083
2084     if vg_name is not None:
2085       node_verify_param[constants.NV_VGLIST] = None
2086       node_verify_param[constants.NV_LVLIST] = vg_name
2087       node_verify_param[constants.NV_PVLIST] = [vg_name]
2088       node_verify_param[constants.NV_DRBDLIST] = None
2089
2090     if drbd_helper:
2091       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2092
2093     # Build our expected cluster state
2094     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2095                                                  name=node.name))
2096                       for node in nodeinfo)
2097
2098     for instance in instancelist:
2099       inst_config = instanceinfo[instance]
2100
2101       for nname in inst_config.all_nodes:
2102         if nname not in node_image:
2103           # ghost node
2104           gnode = self.NodeImage(name=nname)
2105           gnode.ghost = True
2106           node_image[nname] = gnode
2107
2108       inst_config.MapLVsByNode(node_vol_should)
2109
2110       pnode = inst_config.primary_node
2111       node_image[pnode].pinst.append(instance)
2112
2113       for snode in inst_config.secondary_nodes:
2114         nimg = node_image[snode]
2115         nimg.sinst.append(instance)
2116         if pnode not in nimg.sbp:
2117           nimg.sbp[pnode] = []
2118         nimg.sbp[pnode].append(instance)
2119
2120     # At this point, we have the in-memory data structures complete,
2121     # except for the runtime information, which we'll gather next
2122
2123     # Due to the way our RPC system works, exact response times cannot be
2124     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2125     # time before and after executing the request, we can at least have a time
2126     # window.
2127     nvinfo_starttime = time.time()
2128     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2129                                            self.cfg.GetClusterName())
2130     nvinfo_endtime = time.time()
2131
2132     all_drbd_map = self.cfg.ComputeDRBDMap()
2133
2134     feedback_fn("* Verifying node status")
2135
2136     refos_img = None
2137
2138     for node_i in nodeinfo:
2139       node = node_i.name
2140       nimg = node_image[node]
2141
2142       if node_i.offline:
2143         if verbose:
2144           feedback_fn("* Skipping offline node %s" % (node,))
2145         n_offline += 1
2146         continue
2147
2148       if node == master_node:
2149         ntype = "master"
2150       elif node_i.master_candidate:
2151         ntype = "master candidate"
2152       elif node_i.drained:
2153         ntype = "drained"
2154         n_drained += 1
2155       else:
2156         ntype = "regular"
2157       if verbose:
2158         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2159
2160       msg = all_nvinfo[node].fail_msg
2161       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2162       if msg:
2163         nimg.rpc_fail = True
2164         continue
2165
2166       nresult = all_nvinfo[node].payload
2167
2168       nimg.call_ok = self._VerifyNode(node_i, nresult)
2169       self._VerifyNodeNetwork(node_i, nresult)
2170       self._VerifyNodeLVM(node_i, nresult, vg_name)
2171       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2172                             master_files)
2173       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2174                            all_drbd_map)
2175       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2176
2177       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2178       self._UpdateNodeInstances(node_i, nresult, nimg)
2179       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2180       self._UpdateNodeOS(node_i, nresult, nimg)
2181       if not nimg.os_fail:
2182         if refos_img is None:
2183           refos_img = nimg
2184         self._VerifyNodeOS(node_i, nimg, refos_img)
2185
2186     feedback_fn("* Verifying instance status")
2187     for instance in instancelist:
2188       if verbose:
2189         feedback_fn("* Verifying instance %s" % instance)
2190       inst_config = instanceinfo[instance]
2191       self._VerifyInstance(instance, inst_config, node_image)
2192       inst_nodes_offline = []
2193
2194       pnode = inst_config.primary_node
2195       pnode_img = node_image[pnode]
2196       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2197                self.ENODERPC, pnode, "instance %s, connection to"
2198                " primary node failed", instance)
2199
2200       if pnode_img.offline:
2201         inst_nodes_offline.append(pnode)
2202
2203       # If the instance is non-redundant we cannot survive losing its primary
2204       # node, so we are not N+1 compliant. On the other hand we have no disk
2205       # templates with more than one secondary so that situation is not well
2206       # supported either.
2207       # FIXME: does not support file-backed instances
2208       if not inst_config.secondary_nodes:
2209         i_non_redundant.append(instance)
2210       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2211                instance, "instance has multiple secondary nodes: %s",
2212                utils.CommaJoin(inst_config.secondary_nodes),
2213                code=self.ETYPE_WARNING)
2214
2215       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2216         i_non_a_balanced.append(instance)
2217
2218       for snode in inst_config.secondary_nodes:
2219         s_img = node_image[snode]
2220         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2221                  "instance %s, connection to secondary node failed", instance)
2222
2223         if s_img.offline:
2224           inst_nodes_offline.append(snode)
2225
2226       # warn that the instance lives on offline nodes
2227       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2228                "instance lives on offline node(s) %s",
2229                utils.CommaJoin(inst_nodes_offline))
2230       # ... or ghost nodes
2231       for node in inst_config.all_nodes:
2232         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2233                  "instance lives on ghost node %s", node)
2234
2235     feedback_fn("* Verifying orphan volumes")
2236     reserved = utils.FieldSet(*cluster.reserved_lvs)
2237     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2238
2239     feedback_fn("* Verifying orphan instances")
2240     self._VerifyOrphanInstances(instancelist, node_image)
2241
2242     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2243       feedback_fn("* Verifying N+1 Memory redundancy")
2244       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2245
2246     feedback_fn("* Other Notes")
2247     if i_non_redundant:
2248       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2249                   % len(i_non_redundant))
2250
2251     if i_non_a_balanced:
2252       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2253                   % len(i_non_a_balanced))
2254
2255     if n_offline:
2256       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2257
2258     if n_drained:
2259       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2260
2261     return not self.bad
2262
2263   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2264     """Analyze the post-hooks' result
2265
2266     This method analyses the hook result, handles it, and sends some
2267     nicely-formatted feedback back to the user.
2268
2269     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2270         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2271     @param hooks_results: the results of the multi-node hooks rpc call
2272     @param feedback_fn: function used send feedback back to the caller
2273     @param lu_result: previous Exec result
2274     @return: the new Exec result, based on the previous result
2275         and hook results
2276
2277     """
2278     # We only really run POST phase hooks, and are only interested in
2279     # their results
2280     if phase == constants.HOOKS_PHASE_POST:
2281       # Used to change hooks' output to proper indentation
2282       indent_re = re.compile('^', re.M)
2283       feedback_fn("* Hooks Results")
2284       assert hooks_results, "invalid result from hooks"
2285
2286       for node_name in hooks_results:
2287         res = hooks_results[node_name]
2288         msg = res.fail_msg
2289         test = msg and not res.offline
2290         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2291                       "Communication failure in hooks execution: %s", msg)
2292         if res.offline or msg:
2293           # No need to investigate payload if node is offline or gave an error.
2294           # override manually lu_result here as _ErrorIf only
2295           # overrides self.bad
2296           lu_result = 1
2297           continue
2298         for script, hkr, output in res.payload:
2299           test = hkr == constants.HKR_FAIL
2300           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2301                         "Script %s failed, output:", script)
2302           if test:
2303             output = indent_re.sub('      ', output)
2304             feedback_fn("%s" % output)
2305             lu_result = 0
2306
2307       return lu_result
2308
2309
2310 class LUVerifyDisks(NoHooksLU):
2311   """Verifies the cluster disks status.
2312
2313   """
2314   REQ_BGL = False
2315
2316   def ExpandNames(self):
2317     self.needed_locks = {
2318       locking.LEVEL_NODE: locking.ALL_SET,
2319       locking.LEVEL_INSTANCE: locking.ALL_SET,
2320     }
2321     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2322
2323   def Exec(self, feedback_fn):
2324     """Verify integrity of cluster disks.
2325
2326     @rtype: tuple of three items
2327     @return: a tuple of (dict of node-to-node_error, list of instances
2328         which need activate-disks, dict of instance: (node, volume) for
2329         missing volumes
2330
2331     """
2332     result = res_nodes, res_instances, res_missing = {}, [], {}
2333
2334     vg_name = self.cfg.GetVGName()
2335     nodes = utils.NiceSort(self.cfg.GetNodeList())
2336     instances = [self.cfg.GetInstanceInfo(name)
2337                  for name in self.cfg.GetInstanceList()]
2338
2339     nv_dict = {}
2340     for inst in instances:
2341       inst_lvs = {}
2342       if (not inst.admin_up or
2343           inst.disk_template not in constants.DTS_NET_MIRROR):
2344         continue
2345       inst.MapLVsByNode(inst_lvs)
2346       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2347       for node, vol_list in inst_lvs.iteritems():
2348         for vol in vol_list:
2349           nv_dict[(node, vol)] = inst
2350
2351     if not nv_dict:
2352       return result
2353
2354     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2355
2356     for node in nodes:
2357       # node_volume
2358       node_res = node_lvs[node]
2359       if node_res.offline:
2360         continue
2361       msg = node_res.fail_msg
2362       if msg:
2363         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2364         res_nodes[node] = msg
2365         continue
2366
2367       lvs = node_res.payload
2368       for lv_name, (_, _, lv_online) in lvs.items():
2369         inst = nv_dict.pop((node, lv_name), None)
2370         if (not lv_online and inst is not None
2371             and inst.name not in res_instances):
2372           res_instances.append(inst.name)
2373
2374     # any leftover items in nv_dict are missing LVs, let's arrange the
2375     # data better
2376     for key, inst in nv_dict.iteritems():
2377       if inst.name not in res_missing:
2378         res_missing[inst.name] = []
2379       res_missing[inst.name].append(key)
2380
2381     return result
2382
2383
2384 class LURepairDiskSizes(NoHooksLU):
2385   """Verifies the cluster disks sizes.
2386
2387   """
2388   _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2389   REQ_BGL = False
2390
2391   def ExpandNames(self):
2392     if self.op.instances:
2393       self.wanted_names = []
2394       for name in self.op.instances:
2395         full_name = _ExpandInstanceName(self.cfg, name)
2396         self.wanted_names.append(full_name)
2397       self.needed_locks = {
2398         locking.LEVEL_NODE: [],
2399         locking.LEVEL_INSTANCE: self.wanted_names,
2400         }
2401       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2402     else:
2403       self.wanted_names = None
2404       self.needed_locks = {
2405         locking.LEVEL_NODE: locking.ALL_SET,
2406         locking.LEVEL_INSTANCE: locking.ALL_SET,
2407         }
2408     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2409
2410   def DeclareLocks(self, level):
2411     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2412       self._LockInstancesNodes(primary_only=True)
2413
2414   def CheckPrereq(self):
2415     """Check prerequisites.
2416
2417     This only checks the optional instance list against the existing names.
2418
2419     """
2420     if self.wanted_names is None:
2421       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2422
2423     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2424                              in self.wanted_names]
2425
2426   def _EnsureChildSizes(self, disk):
2427     """Ensure children of the disk have the needed disk size.
2428
2429     This is valid mainly for DRBD8 and fixes an issue where the
2430     children have smaller disk size.
2431
2432     @param disk: an L{ganeti.objects.Disk} object
2433
2434     """
2435     if disk.dev_type == constants.LD_DRBD8:
2436       assert disk.children, "Empty children for DRBD8?"
2437       fchild = disk.children[0]
2438       mismatch = fchild.size < disk.size
2439       if mismatch:
2440         self.LogInfo("Child disk has size %d, parent %d, fixing",
2441                      fchild.size, disk.size)
2442         fchild.size = disk.size
2443
2444       # and we recurse on this child only, not on the metadev
2445       return self._EnsureChildSizes(fchild) or mismatch
2446     else:
2447       return False
2448
2449   def Exec(self, feedback_fn):
2450     """Verify the size of cluster disks.
2451
2452     """
2453     # TODO: check child disks too
2454     # TODO: check differences in size between primary/secondary nodes
2455     per_node_disks = {}
2456     for instance in self.wanted_instances:
2457       pnode = instance.primary_node
2458       if pnode not in per_node_disks:
2459         per_node_disks[pnode] = []
2460       for idx, disk in enumerate(instance.disks):
2461         per_node_disks[pnode].append((instance, idx, disk))
2462
2463     changed = []
2464     for node, dskl in per_node_disks.items():
2465       newl = [v[2].Copy() for v in dskl]
2466       for dsk in newl:
2467         self.cfg.SetDiskID(dsk, node)
2468       result = self.rpc.call_blockdev_getsizes(node, newl)
2469       if result.fail_msg:
2470         self.LogWarning("Failure in blockdev_getsizes call to node"
2471                         " %s, ignoring", node)
2472         continue
2473       if len(result.data) != len(dskl):
2474         self.LogWarning("Invalid result from node %s, ignoring node results",
2475                         node)
2476         continue
2477       for ((instance, idx, disk), size) in zip(dskl, result.data):
2478         if size is None:
2479           self.LogWarning("Disk %d of instance %s did not return size"
2480                           " information, ignoring", idx, instance.name)
2481           continue
2482         if not isinstance(size, (int, long)):
2483           self.LogWarning("Disk %d of instance %s did not return valid"
2484                           " size information, ignoring", idx, instance.name)
2485           continue
2486         size = size >> 20
2487         if size != disk.size:
2488           self.LogInfo("Disk %d of instance %s has mismatched size,"
2489                        " correcting: recorded %d, actual %d", idx,
2490                        instance.name, disk.size, size)
2491           disk.size = size
2492           self.cfg.Update(instance, feedback_fn)
2493           changed.append((instance.name, idx, size))
2494         if self._EnsureChildSizes(disk):
2495           self.cfg.Update(instance, feedback_fn)
2496           changed.append((instance.name, idx, disk.size))
2497     return changed
2498
2499
2500 class LURenameCluster(LogicalUnit):
2501   """Rename the cluster.
2502
2503   """
2504   HPATH = "cluster-rename"
2505   HTYPE = constants.HTYPE_CLUSTER
2506   _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2507
2508   def BuildHooksEnv(self):
2509     """Build hooks env.
2510
2511     """
2512     env = {
2513       "OP_TARGET": self.cfg.GetClusterName(),
2514       "NEW_NAME": self.op.name,
2515       }
2516     mn = self.cfg.GetMasterNode()
2517     all_nodes = self.cfg.GetNodeList()
2518     return env, [mn], all_nodes
2519
2520   def CheckPrereq(self):
2521     """Verify that the passed name is a valid one.
2522
2523     """
2524     hostname = netutils.GetHostname(name=self.op.name,
2525                                     family=self.cfg.GetPrimaryIPFamily())
2526
2527     new_name = hostname.name
2528     self.ip = new_ip = hostname.ip
2529     old_name = self.cfg.GetClusterName()
2530     old_ip = self.cfg.GetMasterIP()
2531     if new_name == old_name and new_ip == old_ip:
2532       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2533                                  " cluster has changed",
2534                                  errors.ECODE_INVAL)
2535     if new_ip != old_ip:
2536       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2537         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2538                                    " reachable on the network. Aborting." %
2539                                    new_ip, errors.ECODE_NOTUNIQUE)
2540
2541     self.op.name = new_name
2542
2543   def Exec(self, feedback_fn):
2544     """Rename the cluster.
2545
2546     """
2547     clustername = self.op.name
2548     ip = self.ip
2549
2550     # shutdown the master IP
2551     master = self.cfg.GetMasterNode()
2552     result = self.rpc.call_node_stop_master(master, False)
2553     result.Raise("Could not disable the master role")
2554
2555     try:
2556       cluster = self.cfg.GetClusterInfo()
2557       cluster.cluster_name = clustername
2558       cluster.master_ip = ip
2559       self.cfg.Update(cluster, feedback_fn)
2560
2561       # update the known hosts file
2562       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2563       node_list = self.cfg.GetNodeList()
2564       try:
2565         node_list.remove(master)
2566       except ValueError:
2567         pass
2568       result = self.rpc.call_upload_file(node_list,
2569                                          constants.SSH_KNOWN_HOSTS_FILE)
2570       for to_node, to_result in result.iteritems():
2571         msg = to_result.fail_msg
2572         if msg:
2573           msg = ("Copy of file %s to node %s failed: %s" %
2574                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2575           self.proc.LogWarning(msg)
2576
2577     finally:
2578       result = self.rpc.call_node_start_master(master, False, False)
2579       msg = result.fail_msg
2580       if msg:
2581         self.LogWarning("Could not re-enable the master role on"
2582                         " the master, please restart manually: %s", msg)
2583
2584     return clustername
2585
2586
2587 class LUSetClusterParams(LogicalUnit):
2588   """Change the parameters of the cluster.
2589
2590   """
2591   HPATH = "cluster-modify"
2592   HTYPE = constants.HTYPE_CLUSTER
2593   _OP_PARAMS = [
2594     ("vg_name", None, _TMaybeString),
2595     ("enabled_hypervisors", None,
2596      _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2597     ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2598     ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2599     ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2600     ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2601     ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2602     ("uid_pool", None, _NoType),
2603     ("add_uids", None, _NoType),
2604     ("remove_uids", None, _NoType),
2605     ("maintain_node_health", None, _TMaybeBool),
2606     ("nicparams", None, _TOr(_TDict, _TNone)),
2607     ("drbd_helper", None, _TOr(_TString, _TNone)),
2608     ("default_iallocator", None, _TMaybeString),
2609     ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2610     ]
2611   REQ_BGL = False
2612
2613   def CheckArguments(self):
2614     """Check parameters
2615
2616     """
2617     if self.op.uid_pool:
2618       uidpool.CheckUidPool(self.op.uid_pool)
2619
2620     if self.op.add_uids:
2621       uidpool.CheckUidPool(self.op.add_uids)
2622
2623     if self.op.remove_uids:
2624       uidpool.CheckUidPool(self.op.remove_uids)
2625
2626   def ExpandNames(self):
2627     # FIXME: in the future maybe other cluster params won't require checking on
2628     # all nodes to be modified.
2629     self.needed_locks = {
2630       locking.LEVEL_NODE: locking.ALL_SET,
2631     }
2632     self.share_locks[locking.LEVEL_NODE] = 1
2633
2634   def BuildHooksEnv(self):
2635     """Build hooks env.
2636
2637     """
2638     env = {
2639       "OP_TARGET": self.cfg.GetClusterName(),
2640       "NEW_VG_NAME": self.op.vg_name,
2641       }
2642     mn = self.cfg.GetMasterNode()
2643     return env, [mn], [mn]
2644
2645   def CheckPrereq(self):
2646     """Check prerequisites.
2647
2648     This checks whether the given params don't conflict and
2649     if the given volume group is valid.
2650
2651     """
2652     if self.op.vg_name is not None and not self.op.vg_name:
2653       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2654         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2655                                    " instances exist", errors.ECODE_INVAL)
2656
2657     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2658       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2659         raise errors.OpPrereqError("Cannot disable drbd helper while"
2660                                    " drbd-based instances exist",
2661                                    errors.ECODE_INVAL)
2662
2663     node_list = self.acquired_locks[locking.LEVEL_NODE]
2664
2665     # if vg_name not None, checks given volume group on all nodes
2666     if self.op.vg_name:
2667       vglist = self.rpc.call_vg_list(node_list)
2668       for node in node_list:
2669         msg = vglist[node].fail_msg
2670         if msg:
2671           # ignoring down node
2672           self.LogWarning("Error while gathering data on node %s"
2673                           " (ignoring node): %s", node, msg)
2674           continue
2675         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2676                                               self.op.vg_name,
2677                                               constants.MIN_VG_SIZE)
2678         if vgstatus:
2679           raise errors.OpPrereqError("Error on node '%s': %s" %
2680                                      (node, vgstatus), errors.ECODE_ENVIRON)
2681
2682     if self.op.drbd_helper:
2683       # checks given drbd helper on all nodes
2684       helpers = self.rpc.call_drbd_helper(node_list)
2685       for node in node_list:
2686         ninfo = self.cfg.GetNodeInfo(node)
2687         if ninfo.offline:
2688           self.LogInfo("Not checking drbd helper on offline node %s", node)
2689           continue
2690         msg = helpers[node].fail_msg
2691         if msg:
2692           raise errors.OpPrereqError("Error checking drbd helper on node"
2693                                      " '%s': %s" % (node, msg),
2694                                      errors.ECODE_ENVIRON)
2695         node_helper = helpers[node].payload
2696         if node_helper != self.op.drbd_helper:
2697           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2698                                      (node, node_helper), errors.ECODE_ENVIRON)
2699
2700     self.cluster = cluster = self.cfg.GetClusterInfo()
2701     # validate params changes
2702     if self.op.beparams:
2703       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2704       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2705
2706     if self.op.nicparams:
2707       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2708       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2709       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2710       nic_errors = []
2711
2712       # check all instances for consistency
2713       for instance in self.cfg.GetAllInstancesInfo().values():
2714         for nic_idx, nic in enumerate(instance.nics):
2715           params_copy = copy.deepcopy(nic.nicparams)
2716           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2717
2718           # check parameter syntax
2719           try:
2720             objects.NIC.CheckParameterSyntax(params_filled)
2721           except errors.ConfigurationError, err:
2722             nic_errors.append("Instance %s, nic/%d: %s" %
2723                               (instance.name, nic_idx, err))
2724
2725           # if we're moving instances to routed, check that they have an ip
2726           target_mode = params_filled[constants.NIC_MODE]
2727           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2728             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2729                               (instance.name, nic_idx))
2730       if nic_errors:
2731         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2732                                    "\n".join(nic_errors))
2733
2734     # hypervisor list/parameters
2735     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2736     if self.op.hvparams:
2737       for hv_name, hv_dict in self.op.hvparams.items():
2738         if hv_name not in self.new_hvparams:
2739           self.new_hvparams[hv_name] = hv_dict
2740         else:
2741           self.new_hvparams[hv_name].update(hv_dict)
2742
2743     # os hypervisor parameters
2744     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2745     if self.op.os_hvp:
2746       for os_name, hvs in self.op.os_hvp.items():
2747         if os_name not in self.new_os_hvp:
2748           self.new_os_hvp[os_name] = hvs
2749         else:
2750           for hv_name, hv_dict in hvs.items():
2751             if hv_name not in self.new_os_hvp[os_name]:
2752               self.new_os_hvp[os_name][hv_name] = hv_dict
2753             else:
2754               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2755
2756     # os parameters
2757     self.new_osp = objects.FillDict(cluster.osparams, {})
2758     if self.op.osparams:
2759       for os_name, osp in self.op.osparams.items():
2760         if os_name not in self.new_osp:
2761           self.new_osp[os_name] = {}
2762
2763         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2764                                                   use_none=True)
2765
2766         if not self.new_osp[os_name]:
2767           # we removed all parameters
2768           del self.new_osp[os_name]
2769         else:
2770           # check the parameter validity (remote check)
2771           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2772                          os_name, self.new_osp[os_name])
2773
2774     # changes to the hypervisor list
2775     if self.op.enabled_hypervisors is not None:
2776       self.hv_list = self.op.enabled_hypervisors
2777       for hv in self.hv_list:
2778         # if the hypervisor doesn't already exist in the cluster
2779         # hvparams, we initialize it to empty, and then (in both
2780         # cases) we make sure to fill the defaults, as we might not
2781         # have a complete defaults list if the hypervisor wasn't
2782         # enabled before
2783         if hv not in new_hvp:
2784           new_hvp[hv] = {}
2785         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2786         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2787     else:
2788       self.hv_list = cluster.enabled_hypervisors
2789
2790     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2791       # either the enabled list has changed, or the parameters have, validate
2792       for hv_name, hv_params in self.new_hvparams.items():
2793         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2794             (self.op.enabled_hypervisors and
2795              hv_name in self.op.enabled_hypervisors)):
2796           # either this is a new hypervisor, or its parameters have changed
2797           hv_class = hypervisor.GetHypervisor(hv_name)
2798           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2799           hv_class.CheckParameterSyntax(hv_params)
2800           _CheckHVParams(self, node_list, hv_name, hv_params)
2801
2802     if self.op.os_hvp:
2803       # no need to check any newly-enabled hypervisors, since the
2804       # defaults have already been checked in the above code-block
2805       for os_name, os_hvp in self.new_os_hvp.items():
2806         for hv_name, hv_params in os_hvp.items():
2807           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2808           # we need to fill in the new os_hvp on top of the actual hv_p
2809           cluster_defaults = self.new_hvparams.get(hv_name, {})
2810           new_osp = objects.FillDict(cluster_defaults, hv_params)
2811           hv_class = hypervisor.GetHypervisor(hv_name)
2812           hv_class.CheckParameterSyntax(new_osp)
2813           _CheckHVParams(self, node_list, hv_name, new_osp)
2814
2815     if self.op.default_iallocator:
2816       alloc_script = utils.FindFile(self.op.default_iallocator,
2817                                     constants.IALLOCATOR_SEARCH_PATH,
2818                                     os.path.isfile)
2819       if alloc_script is None:
2820         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2821                                    " specified" % self.op.default_iallocator,
2822                                    errors.ECODE_INVAL)
2823
2824   def Exec(self, feedback_fn):
2825     """Change the parameters of the cluster.
2826
2827     """
2828     if self.op.vg_name is not None:
2829       new_volume = self.op.vg_name
2830       if not new_volume:
2831         new_volume = None
2832       if new_volume != self.cfg.GetVGName():
2833         self.cfg.SetVGName(new_volume)
2834       else:
2835         feedback_fn("Cluster LVM configuration already in desired"
2836                     " state, not changing")
2837     if self.op.drbd_helper is not None:
2838       new_helper = self.op.drbd_helper
2839       if not new_helper:
2840         new_helper = None
2841       if new_helper != self.cfg.GetDRBDHelper():
2842         self.cfg.SetDRBDHelper(new_helper)
2843       else:
2844         feedback_fn("Cluster DRBD helper already in desired state,"
2845                     " not changing")
2846     if self.op.hvparams:
2847       self.cluster.hvparams = self.new_hvparams
2848     if self.op.os_hvp:
2849       self.cluster.os_hvp = self.new_os_hvp
2850     if self.op.enabled_hypervisors is not None:
2851       self.cluster.hvparams = self.new_hvparams
2852       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2853     if self.op.beparams:
2854       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2855     if self.op.nicparams:
2856       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2857     if self.op.osparams:
2858       self.cluster.osparams = self.new_osp
2859
2860     if self.op.candidate_pool_size is not None:
2861       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2862       # we need to update the pool size here, otherwise the save will fail
2863       _AdjustCandidatePool(self, [])
2864
2865     if self.op.maintain_node_health is not None:
2866       self.cluster.maintain_node_health = self.op.maintain_node_health
2867
2868     if self.op.add_uids is not None:
2869       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2870
2871     if self.op.remove_uids is not None:
2872       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2873
2874     if self.op.uid_pool is not None:
2875       self.cluster.uid_pool = self.op.uid_pool
2876
2877     if self.op.default_iallocator is not None:
2878       self.cluster.default_iallocator = self.op.default_iallocator
2879
2880     if self.op.reserved_lvs is not None:
2881       self.cluster.reserved_lvs = self.op.reserved_lvs
2882
2883     self.cfg.Update(self.cluster, feedback_fn)
2884
2885
2886 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2887   """Distribute additional files which are part of the cluster configuration.
2888
2889   ConfigWriter takes care of distributing the config and ssconf files, but
2890   there are more files which should be distributed to all nodes. This function
2891   makes sure those are copied.
2892
2893   @param lu: calling logical unit
2894   @param additional_nodes: list of nodes not in the config to distribute to
2895
2896   """
2897   # 1. Gather target nodes
2898   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2899   dist_nodes = lu.cfg.GetOnlineNodeList()
2900   if additional_nodes is not None:
2901     dist_nodes.extend(additional_nodes)
2902   if myself.name in dist_nodes:
2903     dist_nodes.remove(myself.name)
2904
2905   # 2. Gather files to distribute
2906   dist_files = set([constants.ETC_HOSTS,
2907                     constants.SSH_KNOWN_HOSTS_FILE,
2908                     constants.RAPI_CERT_FILE,
2909                     constants.RAPI_USERS_FILE,
2910                     constants.CONFD_HMAC_KEY,
2911                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2912                    ])
2913
2914   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2915   for hv_name in enabled_hypervisors:
2916     hv_class = hypervisor.GetHypervisor(hv_name)
2917     dist_files.update(hv_class.GetAncillaryFiles())
2918
2919   # 3. Perform the files upload
2920   for fname in dist_files:
2921     if os.path.exists(fname):
2922       result = lu.rpc.call_upload_file(dist_nodes, fname)
2923       for to_node, to_result in result.items():
2924         msg = to_result.fail_msg
2925         if msg:
2926           msg = ("Copy of file %s to node %s failed: %s" %
2927                  (fname, to_node, msg))
2928           lu.proc.LogWarning(msg)
2929
2930
2931 class LURedistributeConfig(NoHooksLU):
2932   """Force the redistribution of cluster configuration.
2933
2934   This is a very simple LU.
2935
2936   """
2937   REQ_BGL = False
2938
2939   def ExpandNames(self):
2940     self.needed_locks = {
2941       locking.LEVEL_NODE: locking.ALL_SET,
2942     }
2943     self.share_locks[locking.LEVEL_NODE] = 1
2944
2945   def Exec(self, feedback_fn):
2946     """Redistribute the configuration.
2947
2948     """
2949     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2950     _RedistributeAncillaryFiles(self)
2951
2952
2953 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2954   """Sleep and poll for an instance's disk to sync.
2955
2956   """
2957   if not instance.disks or disks is not None and not disks:
2958     return True
2959
2960   disks = _ExpandCheckDisks(instance, disks)
2961
2962   if not oneshot:
2963     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2964
2965   node = instance.primary_node
2966
2967   for dev in disks:
2968     lu.cfg.SetDiskID(dev, node)
2969
2970   # TODO: Convert to utils.Retry
2971
2972   retries = 0
2973   degr_retries = 10 # in seconds, as we sleep 1 second each time
2974   while True:
2975     max_time = 0
2976     done = True
2977     cumul_degraded = False
2978     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2979     msg = rstats.fail_msg
2980     if msg:
2981       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2982       retries += 1
2983       if retries >= 10:
2984         raise errors.RemoteError("Can't contact node %s for mirror data,"
2985                                  " aborting." % node)
2986       time.sleep(6)
2987       continue
2988     rstats = rstats.payload
2989     retries = 0
2990     for i, mstat in enumerate(rstats):
2991       if mstat is None:
2992         lu.LogWarning("Can't compute data for node %s/%s",
2993                            node, disks[i].iv_name)
2994         continue
2995
2996       cumul_degraded = (cumul_degraded or
2997                         (mstat.is_degraded and mstat.sync_percent is None))
2998       if mstat.sync_percent is not None:
2999         done = False
3000         if mstat.estimated_time is not None:
3001           rem_time = ("%s remaining (estimated)" %
3002                       utils.FormatSeconds(mstat.estimated_time))
3003           max_time = mstat.estimated_time
3004         else:
3005           rem_time = "no time estimate"
3006         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3007                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3008
3009     # if we're done but degraded, let's do a few small retries, to
3010     # make sure we see a stable and not transient situation; therefore
3011     # we force restart of the loop
3012     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3013       logging.info("Degraded disks found, %d retries left", degr_retries)
3014       degr_retries -= 1
3015       time.sleep(1)
3016       continue
3017
3018     if done or oneshot:
3019       break
3020
3021     time.sleep(min(60, max_time))
3022
3023   if done:
3024     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3025   return not cumul_degraded
3026
3027
3028 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3029   """Check that mirrors are not degraded.
3030
3031   The ldisk parameter, if True, will change the test from the
3032   is_degraded attribute (which represents overall non-ok status for
3033   the device(s)) to the ldisk (representing the local storage status).
3034
3035   """
3036   lu.cfg.SetDiskID(dev, node)
3037
3038   result = True
3039
3040   if on_primary or dev.AssembleOnSecondary():
3041     rstats = lu.rpc.call_blockdev_find(node, dev)
3042     msg = rstats.fail_msg
3043     if msg:
3044       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3045       result = False
3046     elif not rstats.payload:
3047       lu.LogWarning("Can't find disk on node %s", node)
3048       result = False
3049     else:
3050       if ldisk:
3051         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3052       else:
3053         result = result and not rstats.payload.is_degraded
3054
3055   if dev.children:
3056     for child in dev.children:
3057       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3058
3059   return result
3060
3061
3062 class LUDiagnoseOS(NoHooksLU):
3063   """Logical unit for OS diagnose/query.
3064
3065   """
3066   _OP_PARAMS = [
3067     _POutputFields,
3068     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3069     ]
3070   REQ_BGL = False
3071   _FIELDS_STATIC = utils.FieldSet()
3072   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
3073                                    "parameters", "api_versions")
3074
3075   def CheckArguments(self):
3076     if self.op.names:
3077       raise errors.OpPrereqError("Selective OS query not supported",
3078                                  errors.ECODE_INVAL)
3079
3080     _CheckOutputFields(static=self._FIELDS_STATIC,
3081                        dynamic=self._FIELDS_DYNAMIC,
3082                        selected=self.op.output_fields)
3083
3084   def ExpandNames(self):
3085     # Lock all nodes, in shared mode
3086     # Temporary removal of locks, should be reverted later
3087     # TODO: reintroduce locks when they are lighter-weight
3088     self.needed_locks = {}
3089     #self.share_locks[locking.LEVEL_NODE] = 1
3090     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3091
3092   @staticmethod
3093   def _DiagnoseByOS(rlist):
3094     """Remaps a per-node return list into an a per-os per-node dictionary
3095
3096     @param rlist: a map with node names as keys and OS objects as values
3097
3098     @rtype: dict
3099     @return: a dictionary with osnames as keys and as value another
3100         map, with nodes as keys and tuples of (path, status, diagnose,
3101         variants, parameters, api_versions) as values, eg::
3102
3103           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3104                                      (/srv/..., False, "invalid api")],
3105                            "node2": [(/srv/..., True, "", [], [])]}
3106           }
3107
3108     """
3109     all_os = {}
3110     # we build here the list of nodes that didn't fail the RPC (at RPC
3111     # level), so that nodes with a non-responding node daemon don't
3112     # make all OSes invalid
3113     good_nodes = [node_name for node_name in rlist
3114                   if not rlist[node_name].fail_msg]
3115     for node_name, nr in rlist.items():
3116       if nr.fail_msg or not nr.payload:
3117         continue
3118       for (name, path, status, diagnose, variants,
3119            params, api_versions) in nr.payload:
3120         if name not in all_os:
3121           # build a list of nodes for this os containing empty lists
3122           # for each node in node_list
3123           all_os[name] = {}
3124           for nname in good_nodes:
3125             all_os[name][nname] = []
3126         # convert params from [name, help] to (name, help)
3127         params = [tuple(v) for v in params]
3128         all_os[name][node_name].append((path, status, diagnose,
3129                                         variants, params, api_versions))
3130     return all_os
3131
3132   def Exec(self, feedback_fn):
3133     """Compute the list of OSes.
3134
3135     """
3136     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3137     node_data = self.rpc.call_os_diagnose(valid_nodes)
3138     pol = self._DiagnoseByOS(node_data)
3139     output = []
3140
3141     for os_name, os_data in pol.items():
3142       row = []
3143       valid = True
3144       (variants, params, api_versions) = null_state = (set(), set(), set())
3145       for idx, osl in enumerate(os_data.values()):
3146         valid = bool(valid and osl and osl[0][1])
3147         if not valid:
3148           (variants, params, api_versions) = null_state
3149           break
3150         node_variants, node_params, node_api = osl[0][3:6]
3151         if idx == 0: # first entry
3152           variants = set(node_variants)
3153           params = set(node_params)
3154           api_versions = set(node_api)
3155         else: # keep consistency
3156           variants.intersection_update(node_variants)
3157           params.intersection_update(node_params)
3158           api_versions.intersection_update(node_api)
3159
3160       for field in self.op.output_fields:
3161         if field == "name":
3162           val = os_name
3163         elif field == "valid":
3164           val = valid
3165         elif field == "node_status":
3166           # this is just a copy of the dict
3167           val = {}
3168           for node_name, nos_list in os_data.items():
3169             val[node_name] = nos_list
3170         elif field == "variants":
3171           val = list(variants)
3172         elif field == "parameters":
3173           val = list(params)
3174         elif field == "api_versions":
3175           val = list(api_versions)
3176         else:
3177           raise errors.ParameterError(field)
3178         row.append(val)
3179       output.append(row)
3180
3181     return output
3182
3183
3184 class LURemoveNode(LogicalUnit):
3185   """Logical unit for removing a node.
3186
3187   """
3188   HPATH = "node-remove"
3189   HTYPE = constants.HTYPE_NODE
3190   _OP_PARAMS = [
3191     _PNodeName,
3192     ]
3193
3194   def BuildHooksEnv(self):
3195     """Build hooks env.
3196
3197     This doesn't run on the target node in the pre phase as a failed
3198     node would then be impossible to remove.
3199
3200     """
3201     env = {
3202       "OP_TARGET": self.op.node_name,
3203       "NODE_NAME": self.op.node_name,
3204       }
3205     all_nodes = self.cfg.GetNodeList()
3206     try:
3207       all_nodes.remove(self.op.node_name)
3208     except ValueError:
3209       logging.warning("Node %s which is about to be removed not found"
3210                       " in the all nodes list", self.op.node_name)
3211     return env, all_nodes, all_nodes
3212
3213   def CheckPrereq(self):
3214     """Check prerequisites.
3215
3216     This checks:
3217      - the node exists in the configuration
3218      - it does not have primary or secondary instances
3219      - it's not the master
3220
3221     Any errors are signaled by raising errors.OpPrereqError.
3222
3223     """
3224     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3225     node = self.cfg.GetNodeInfo(self.op.node_name)
3226     assert node is not None
3227
3228     instance_list = self.cfg.GetInstanceList()
3229
3230     masternode = self.cfg.GetMasterNode()
3231     if node.name == masternode:
3232       raise errors.OpPrereqError("Node is the master node,"
3233                                  " you need to failover first.",
3234                                  errors.ECODE_INVAL)
3235
3236     for instance_name in instance_list:
3237       instance = self.cfg.GetInstanceInfo(instance_name)
3238       if node.name in instance.all_nodes:
3239         raise errors.OpPrereqError("Instance %s is still running on the node,"
3240                                    " please remove first." % instance_name,
3241                                    errors.ECODE_INVAL)
3242     self.op.node_name = node.name
3243     self.node = node
3244
3245   def Exec(self, feedback_fn):
3246     """Removes the node from the cluster.
3247
3248     """
3249     node = self.node
3250     logging.info("Stopping the node daemon and removing configs from node %s",
3251                  node.name)
3252
3253     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3254
3255     # Promote nodes to master candidate as needed
3256     _AdjustCandidatePool(self, exceptions=[node.name])
3257     self.context.RemoveNode(node.name)
3258
3259     # Run post hooks on the node before it's removed
3260     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3261     try:
3262       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3263     except:
3264       # pylint: disable-msg=W0702
3265       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3266
3267     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3268     msg = result.fail_msg
3269     if msg:
3270       self.LogWarning("Errors encountered on the remote node while leaving"
3271                       " the cluster: %s", msg)
3272
3273     # Remove node from our /etc/hosts
3274     if self.cfg.GetClusterInfo().modify_etc_hosts:
3275       # FIXME: this should be done via an rpc call to node daemon
3276       utils.RemoveHostFromEtcHosts(node.name)
3277       _RedistributeAncillaryFiles(self)
3278
3279
3280 class LUQueryNodes(NoHooksLU):
3281   """Logical unit for querying nodes.
3282
3283   """
3284   # pylint: disable-msg=W0142
3285   _OP_PARAMS = [
3286     _POutputFields,
3287     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3288     ("use_locking", False, _TBool),
3289     ]
3290   REQ_BGL = False
3291
3292   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3293                     "master_candidate", "offline", "drained"]
3294
3295   _FIELDS_DYNAMIC = utils.FieldSet(
3296     "dtotal", "dfree",
3297     "mtotal", "mnode", "mfree",
3298     "bootid",
3299     "ctotal", "cnodes", "csockets",
3300     )
3301
3302   _FIELDS_STATIC = utils.FieldSet(*[
3303     "pinst_cnt", "sinst_cnt",
3304     "pinst_list", "sinst_list",
3305     "pip", "sip", "tags",
3306     "master",
3307     "role"] + _SIMPLE_FIELDS
3308     )
3309
3310   def CheckArguments(self):
3311     _CheckOutputFields(static=self._FIELDS_STATIC,
3312                        dynamic=self._FIELDS_DYNAMIC,
3313                        selected=self.op.output_fields)
3314
3315   def ExpandNames(self):
3316     self.needed_locks = {}
3317     self.share_locks[locking.LEVEL_NODE] = 1
3318
3319     if self.op.names:
3320       self.wanted = _GetWantedNodes(self, self.op.names)
3321     else:
3322       self.wanted = locking.ALL_SET
3323
3324     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3325     self.do_locking = self.do_node_query and self.op.use_locking
3326     if self.do_locking:
3327       # if we don't request only static fields, we need to lock the nodes
3328       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3329
3330   def Exec(self, feedback_fn):
3331     """Computes the list of nodes and their attributes.
3332
3333     """
3334     all_info = self.cfg.GetAllNodesInfo()
3335     if self.do_locking:
3336       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3337     elif self.wanted != locking.ALL_SET:
3338       nodenames = self.wanted
3339       missing = set(nodenames).difference(all_info.keys())
3340       if missing:
3341         raise errors.OpExecError(
3342           "Some nodes were removed before retrieving their data: %s" % missing)
3343     else:
3344       nodenames = all_info.keys()
3345
3346     nodenames = utils.NiceSort(nodenames)
3347     nodelist = [all_info[name] for name in nodenames]
3348
3349     # begin data gathering
3350
3351     if self.do_node_query:
3352       live_data = {}
3353       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3354                                           self.cfg.GetHypervisorType())
3355       for name in nodenames:
3356         nodeinfo = node_data[name]
3357         if not nodeinfo.fail_msg and nodeinfo.payload:
3358           nodeinfo = nodeinfo.payload
3359           fn = utils.TryConvert
3360           live_data[name] = {
3361             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3362             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3363             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3364             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3365             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3366             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3367             "bootid": nodeinfo.get('bootid', None),
3368             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3369             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3370             }
3371         else:
3372           live_data[name] = {}
3373     else:
3374       live_data = dict.fromkeys(nodenames, {})
3375
3376     node_to_primary = dict([(name, set()) for name in nodenames])
3377     node_to_secondary = dict([(name, set()) for name in nodenames])
3378
3379     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3380                              "sinst_cnt", "sinst_list"))
3381     if inst_fields & frozenset(self.op.output_fields):
3382       inst_data = self.cfg.GetAllInstancesInfo()
3383
3384       for inst in inst_data.values():
3385         if inst.primary_node in node_to_primary:
3386           node_to_primary[inst.primary_node].add(inst.name)
3387         for secnode in inst.secondary_nodes:
3388           if secnode in node_to_secondary:
3389             node_to_secondary[secnode].add(inst.name)
3390
3391     master_node = self.cfg.GetMasterNode()
3392
3393     # end data gathering
3394
3395     output = []
3396     for node in nodelist:
3397       node_output = []
3398       for field in self.op.output_fields:
3399         if field in self._SIMPLE_FIELDS:
3400           val = getattr(node, field)
3401         elif field == "pinst_list":
3402           val = list(node_to_primary[node.name])
3403         elif field == "sinst_list":
3404           val = list(node_to_secondary[node.name])
3405         elif field == "pinst_cnt":
3406           val = len(node_to_primary[node.name])
3407         elif field == "sinst_cnt":
3408           val = len(node_to_secondary[node.name])
3409         elif field == "pip":
3410           val = node.primary_ip
3411         elif field == "sip":
3412           val = node.secondary_ip
3413         elif field == "tags":
3414           val = list(node.GetTags())
3415         elif field == "master":
3416           val = node.name == master_node
3417         elif self._FIELDS_DYNAMIC.Matches(field):
3418           val = live_data[node.name].get(field, None)
3419         elif field == "role":
3420           if node.name == master_node:
3421             val = "M"
3422           elif node.master_candidate:
3423             val = "C"
3424           elif node.drained:
3425             val = "D"
3426           elif node.offline:
3427             val = "O"
3428           else:
3429             val = "R"
3430         else:
3431           raise errors.ParameterError(field)
3432         node_output.append(val)
3433       output.append(node_output)
3434
3435     return output
3436
3437
3438 class LUQueryNodeVolumes(NoHooksLU):
3439   """Logical unit for getting volumes on node(s).
3440
3441   """
3442   _OP_PARAMS = [
3443     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3444     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3445     ]
3446   REQ_BGL = False
3447   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3448   _FIELDS_STATIC = utils.FieldSet("node")
3449
3450   def CheckArguments(self):
3451     _CheckOutputFields(static=self._FIELDS_STATIC,
3452                        dynamic=self._FIELDS_DYNAMIC,
3453                        selected=self.op.output_fields)
3454
3455   def ExpandNames(self):
3456     self.needed_locks = {}
3457     self.share_locks[locking.LEVEL_NODE] = 1
3458     if not self.op.nodes:
3459       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3460     else:
3461       self.needed_locks[locking.LEVEL_NODE] = \
3462         _GetWantedNodes(self, self.op.nodes)
3463
3464   def Exec(self, feedback_fn):
3465     """Computes the list of nodes and their attributes.
3466
3467     """
3468     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3469     volumes = self.rpc.call_node_volumes(nodenames)
3470
3471     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3472              in self.cfg.GetInstanceList()]
3473
3474     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3475
3476     output = []
3477     for node in nodenames:
3478       nresult = volumes[node]
3479       if nresult.offline:
3480         continue
3481       msg = nresult.fail_msg
3482       if msg:
3483         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3484         continue
3485
3486       node_vols = nresult.payload[:]
3487       node_vols.sort(key=lambda vol: vol['dev'])
3488
3489       for vol in node_vols:
3490         node_output = []
3491         for field in self.op.output_fields:
3492           if field == "node":
3493             val = node
3494           elif field == "phys":
3495             val = vol['dev']
3496           elif field == "vg":
3497             val = vol['vg']
3498           elif field == "name":
3499             val = vol['name']
3500           elif field == "size":
3501             val = int(float(vol['size']))
3502           elif field == "instance":
3503             for inst in ilist:
3504               if node not in lv_by_node[inst]:
3505                 continue
3506               if vol['name'] in lv_by_node[inst][node]:
3507                 val = inst.name
3508                 break
3509             else:
3510               val = '-'
3511           else:
3512             raise errors.ParameterError(field)
3513           node_output.append(str(val))
3514
3515         output.append(node_output)
3516
3517     return output
3518
3519
3520 class LUQueryNodeStorage(NoHooksLU):
3521   """Logical unit for getting information on storage units on node(s).
3522
3523   """
3524   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3525   _OP_PARAMS = [
3526     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3527     ("storage_type", _NoDefault, _CheckStorageType),
3528     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3529     ("name", None, _TMaybeString),
3530     ]
3531   REQ_BGL = False
3532
3533   def CheckArguments(self):
3534     _CheckOutputFields(static=self._FIELDS_STATIC,
3535                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3536                        selected=self.op.output_fields)
3537
3538   def ExpandNames(self):
3539     self.needed_locks = {}
3540     self.share_locks[locking.LEVEL_NODE] = 1
3541
3542     if self.op.nodes:
3543       self.needed_locks[locking.LEVEL_NODE] = \
3544         _GetWantedNodes(self, self.op.nodes)
3545     else:
3546       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3547
3548   def Exec(self, feedback_fn):
3549     """Computes the list of nodes and their attributes.
3550
3551     """
3552     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3553
3554     # Always get name to sort by
3555     if constants.SF_NAME in self.op.output_fields:
3556       fields = self.op.output_fields[:]
3557     else:
3558       fields = [constants.SF_NAME] + self.op.output_fields
3559
3560     # Never ask for node or type as it's only known to the LU
3561     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3562       while extra in fields:
3563         fields.remove(extra)
3564
3565     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3566     name_idx = field_idx[constants.SF_NAME]
3567
3568     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3569     data = self.rpc.call_storage_list(self.nodes,
3570                                       self.op.storage_type, st_args,
3571                                       self.op.name, fields)
3572
3573     result = []
3574
3575     for node in utils.NiceSort(self.nodes):
3576       nresult = data[node]
3577       if nresult.offline:
3578         continue
3579
3580       msg = nresult.fail_msg
3581       if msg:
3582         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3583         continue
3584
3585       rows = dict([(row[name_idx], row) for row in nresult.payload])
3586
3587       for name in utils.NiceSort(rows.keys()):
3588         row = rows[name]
3589
3590         out = []
3591
3592         for field in self.op.output_fields:
3593           if field == constants.SF_NODE:
3594             val = node
3595           elif field == constants.SF_TYPE:
3596             val = self.op.storage_type
3597           elif field in field_idx:
3598             val = row[field_idx[field]]
3599           else:
3600             raise errors.ParameterError(field)
3601
3602           out.append(val)
3603
3604         result.append(out)
3605
3606     return result
3607
3608
3609 class LUModifyNodeStorage(NoHooksLU):
3610   """Logical unit for modifying a storage volume on a node.
3611
3612   """
3613   _OP_PARAMS = [
3614     _PNodeName,
3615     ("storage_type", _NoDefault, _CheckStorageType),
3616     ("name", _NoDefault, _TNonEmptyString),
3617     ("changes", _NoDefault, _TDict),
3618     ]
3619   REQ_BGL = False
3620
3621   def CheckArguments(self):
3622     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3623
3624     storage_type = self.op.storage_type
3625
3626     try:
3627       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3628     except KeyError:
3629       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3630                                  " modified" % storage_type,
3631                                  errors.ECODE_INVAL)
3632
3633     diff = set(self.op.changes.keys()) - modifiable
3634     if diff:
3635       raise errors.OpPrereqError("The following fields can not be modified for"
3636                                  " storage units of type '%s': %r" %
3637                                  (storage_type, list(diff)),
3638                                  errors.ECODE_INVAL)
3639
3640   def ExpandNames(self):
3641     self.needed_locks = {
3642       locking.LEVEL_NODE: self.op.node_name,
3643       }
3644
3645   def Exec(self, feedback_fn):
3646     """Computes the list of nodes and their attributes.
3647
3648     """
3649     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3650     result = self.rpc.call_storage_modify(self.op.node_name,
3651                                           self.op.storage_type, st_args,
3652                                           self.op.name, self.op.changes)
3653     result.Raise("Failed to modify storage unit '%s' on %s" %
3654                  (self.op.name, self.op.node_name))
3655
3656
3657 class LUAddNode(LogicalUnit):
3658   """Logical unit for adding node to the cluster.
3659
3660   """
3661   HPATH = "node-add"
3662   HTYPE = constants.HTYPE_NODE
3663   _OP_PARAMS = [
3664     _PNodeName,
3665     ("primary_ip", None, _NoType),
3666     ("secondary_ip", None, _TMaybeString),
3667     ("readd", False, _TBool),
3668     ]
3669
3670   def CheckArguments(self):
3671     # validate/normalize the node name
3672     self.hostname = netutils.GetHostname(name=self.op.node_name,
3673                                          family=self.cfg.GetPrimaryIPFamily())
3674     self.op.node_name = self.hostname.name
3675
3676   def BuildHooksEnv(self):
3677     """Build hooks env.
3678
3679     This will run on all nodes before, and on all nodes + the new node after.
3680
3681     """
3682     env = {
3683       "OP_TARGET": self.op.node_name,
3684       "NODE_NAME": self.op.node_name,
3685       "NODE_PIP": self.op.primary_ip,
3686       "NODE_SIP": self.op.secondary_ip,
3687       }
3688     nodes_0 = self.cfg.GetNodeList()
3689     nodes_1 = nodes_0 + [self.op.node_name, ]
3690     return env, nodes_0, nodes_1
3691
3692   def CheckPrereq(self):
3693     """Check prerequisites.
3694
3695     This checks:
3696      - the new node is not already in the config
3697      - it is resolvable
3698      - its parameters (single/dual homed) matches the cluster
3699
3700     Any errors are signaled by raising errors.OpPrereqError.
3701
3702     """
3703     cfg = self.cfg
3704     hostname = self.hostname
3705     node = hostname.name
3706     primary_ip = self.op.primary_ip = hostname.ip
3707     if self.op.secondary_ip is None:
3708       self.op.secondary_ip = primary_ip
3709
3710     secondary_ip = self.op.secondary_ip
3711     if not netutils.IP4Address.IsValid(secondary_ip):
3712       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3713                                  " address" % secondary_ip, errors.ECODE_INVAL)
3714
3715     node_list = cfg.GetNodeList()
3716     if not self.op.readd and node in node_list:
3717       raise errors.OpPrereqError("Node %s is already in the configuration" %
3718                                  node, errors.ECODE_EXISTS)
3719     elif self.op.readd and node not in node_list:
3720       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3721                                  errors.ECODE_NOENT)
3722
3723     self.changed_primary_ip = False
3724
3725     for existing_node_name in node_list:
3726       existing_node = cfg.GetNodeInfo(existing_node_name)
3727
3728       if self.op.readd and node == existing_node_name:
3729         if existing_node.secondary_ip != secondary_ip:
3730           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3731                                      " address configuration as before",
3732                                      errors.ECODE_INVAL)
3733         if existing_node.primary_ip != primary_ip:
3734           self.changed_primary_ip = True
3735
3736         continue
3737
3738       if (existing_node.primary_ip == primary_ip or
3739           existing_node.secondary_ip == primary_ip or
3740           existing_node.primary_ip == secondary_ip or
3741           existing_node.secondary_ip == secondary_ip):
3742         raise errors.OpPrereqError("New node ip address(es) conflict with"
3743                                    " existing node %s" % existing_node.name,
3744                                    errors.ECODE_NOTUNIQUE)
3745
3746     # check that the type of the node (single versus dual homed) is the
3747     # same as for the master
3748     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3749     master_singlehomed = myself.secondary_ip == myself.primary_ip
3750     newbie_singlehomed = secondary_ip == primary_ip
3751     if master_singlehomed != newbie_singlehomed:
3752       if master_singlehomed:
3753         raise errors.OpPrereqError("The master has no private ip but the"
3754                                    " new node has one",
3755                                    errors.ECODE_INVAL)
3756       else:
3757         raise errors.OpPrereqError("The master has a private ip but the"
3758                                    " new node doesn't have one",
3759                                    errors.ECODE_INVAL)
3760
3761     # checks reachability
3762     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3763       raise errors.OpPrereqError("Node not reachable by ping",
3764                                  errors.ECODE_ENVIRON)
3765
3766     if not newbie_singlehomed:
3767       # check reachability from my secondary ip to newbie's secondary ip
3768       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3769                            source=myself.secondary_ip):
3770         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3771                                    " based ping to noded port",
3772                                    errors.ECODE_ENVIRON)
3773
3774     if self.op.readd:
3775       exceptions = [node]
3776     else:
3777       exceptions = []
3778
3779     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3780
3781     if self.op.readd:
3782       self.new_node = self.cfg.GetNodeInfo(node)
3783       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3784     else:
3785       self.new_node = objects.Node(name=node,
3786                                    primary_ip=primary_ip,
3787                                    secondary_ip=secondary_ip,
3788                                    master_candidate=self.master_candidate,
3789                                    offline=False, drained=False)
3790
3791   def Exec(self, feedback_fn):
3792     """Adds the new node to the cluster.
3793
3794     """
3795     new_node = self.new_node
3796     node = new_node.name
3797
3798     # for re-adds, reset the offline/drained/master-candidate flags;
3799     # we need to reset here, otherwise offline would prevent RPC calls
3800     # later in the procedure; this also means that if the re-add
3801     # fails, we are left with a non-offlined, broken node
3802     if self.op.readd:
3803       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3804       self.LogInfo("Readding a node, the offline/drained flags were reset")
3805       # if we demote the node, we do cleanup later in the procedure
3806       new_node.master_candidate = self.master_candidate
3807       if self.changed_primary_ip:
3808         new_node.primary_ip = self.op.primary_ip
3809
3810     # notify the user about any possible mc promotion
3811     if new_node.master_candidate:
3812       self.LogInfo("Node will be a master candidate")
3813
3814     # check connectivity
3815     result = self.rpc.call_version([node])[node]
3816     result.Raise("Can't get version information from node %s" % node)
3817     if constants.PROTOCOL_VERSION == result.payload:
3818       logging.info("Communication to node %s fine, sw version %s match",
3819                    node, result.payload)
3820     else:
3821       raise errors.OpExecError("Version mismatch master version %s,"
3822                                " node version %s" %
3823                                (constants.PROTOCOL_VERSION, result.payload))
3824
3825     # Add node to our /etc/hosts, and add key to known_hosts
3826     if self.cfg.GetClusterInfo().modify_etc_hosts:
3827       # FIXME: this should be done via an rpc call to node daemon
3828       utils.AddHostToEtcHosts(self.hostname)
3829
3830     if new_node.secondary_ip != new_node.primary_ip:
3831       result = self.rpc.call_node_has_ip_address(new_node.name,
3832                                                  new_node.secondary_ip)
3833       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3834                    prereq=True, ecode=errors.ECODE_ENVIRON)
3835       if not result.payload:
3836         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3837                                  " you gave (%s). Please fix and re-run this"
3838                                  " command." % new_node.secondary_ip)
3839
3840     node_verify_list = [self.cfg.GetMasterNode()]
3841     node_verify_param = {
3842       constants.NV_NODELIST: [node],
3843       # TODO: do a node-net-test as well?
3844     }
3845
3846     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3847                                        self.cfg.GetClusterName())
3848     for verifier in node_verify_list:
3849       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3850       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3851       if nl_payload:
3852         for failed in nl_payload:
3853           feedback_fn("ssh/hostname verification failed"
3854                       " (checking from %s): %s" %
3855                       (verifier, nl_payload[failed]))
3856         raise errors.OpExecError("ssh/hostname verification failed.")
3857
3858     if self.op.readd:
3859       _RedistributeAncillaryFiles(self)
3860       self.context.ReaddNode(new_node)
3861       # make sure we redistribute the config
3862       self.cfg.Update(new_node, feedback_fn)
3863       # and make sure the new node will not have old files around
3864       if not new_node.master_candidate:
3865         result = self.rpc.call_node_demote_from_mc(new_node.name)
3866         msg = result.fail_msg
3867         if msg:
3868           self.LogWarning("Node failed to demote itself from master"
3869                           " candidate status: %s" % msg)
3870     else:
3871       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3872       self.context.AddNode(new_node, self.proc.GetECId())
3873
3874
3875 class LUSetNodeParams(LogicalUnit):
3876   """Modifies the parameters of a node.
3877
3878   """
3879   HPATH = "node-modify"
3880   HTYPE = constants.HTYPE_NODE
3881   _OP_PARAMS = [
3882     _PNodeName,
3883     ("master_candidate", None, _TMaybeBool),
3884     ("offline", None, _TMaybeBool),
3885     ("drained", None, _TMaybeBool),
3886     ("auto_promote", False, _TBool),
3887     _PForce,
3888     ]
3889   REQ_BGL = False
3890
3891   def CheckArguments(self):
3892     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3893     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3894     if all_mods.count(None) == 3:
3895       raise errors.OpPrereqError("Please pass at least one modification",
3896                                  errors.ECODE_INVAL)
3897     if all_mods.count(True) > 1:
3898       raise errors.OpPrereqError("Can't set the node into more than one"
3899                                  " state at the same time",
3900                                  errors.ECODE_INVAL)
3901
3902     # Boolean value that tells us whether we're offlining or draining the node
3903     self.offline_or_drain = (self.op.offline == True or
3904                              self.op.drained == True)
3905     self.deoffline_or_drain = (self.op.offline == False or
3906                                self.op.drained == False)
3907     self.might_demote = (self.op.master_candidate == False or
3908                          self.offline_or_drain)
3909
3910     self.lock_all = self.op.auto_promote and self.might_demote
3911
3912
3913   def ExpandNames(self):
3914     if self.lock_all:
3915       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3916     else:
3917       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3918
3919   def BuildHooksEnv(self):
3920     """Build hooks env.
3921
3922     This runs on the master node.
3923
3924     """
3925     env = {
3926       "OP_TARGET": self.op.node_name,
3927       "MASTER_CANDIDATE": str(self.op.master_candidate),
3928       "OFFLINE": str(self.op.offline),
3929       "DRAINED": str(self.op.drained),
3930       }
3931     nl = [self.cfg.GetMasterNode(),
3932           self.op.node_name]
3933     return env, nl, nl
3934
3935   def CheckPrereq(self):
3936     """Check prerequisites.
3937
3938     This only checks the instance list against the existing names.
3939
3940     """
3941     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3942
3943     if (self.op.master_candidate is not None or
3944         self.op.drained is not None or
3945         self.op.offline is not None):
3946       # we can't change the master's node flags
3947       if self.op.node_name == self.cfg.GetMasterNode():
3948         raise errors.OpPrereqError("The master role can be changed"
3949                                    " only via master-failover",
3950                                    errors.ECODE_INVAL)
3951
3952
3953     if node.master_candidate and self.might_demote and not self.lock_all:
3954       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3955       # check if after removing the current node, we're missing master
3956       # candidates
3957       (mc_remaining, mc_should, _) = \
3958           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3959       if mc_remaining < mc_should:
3960         raise errors.OpPrereqError("Not enough master candidates, please"
3961                                    " pass auto_promote to allow promotion",
3962                                    errors.ECODE_INVAL)
3963
3964     if (self.op.master_candidate == True and
3965         ((node.offline and not self.op.offline == False) or
3966          (node.drained and not self.op.drained == False))):
3967       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3968                                  " to master_candidate" % node.name,
3969                                  errors.ECODE_INVAL)
3970
3971     # If we're being deofflined/drained, we'll MC ourself if needed
3972     if (self.deoffline_or_drain and not self.offline_or_drain and not
3973         self.op.master_candidate == True and not node.master_candidate):
3974       self.op.master_candidate = _DecideSelfPromotion(self)
3975       if self.op.master_candidate:
3976         self.LogInfo("Autopromoting node to master candidate")
3977
3978     return
3979
3980   def Exec(self, feedback_fn):
3981     """Modifies a node.
3982
3983     """
3984     node = self.node
3985
3986     result = []
3987     changed_mc = False
3988
3989     if self.op.offline is not None:
3990       node.offline = self.op.offline
3991       result.append(("offline", str(self.op.offline)))
3992       if self.op.offline == True:
3993         if node.master_candidate:
3994           node.master_candidate = False
3995           changed_mc = True
3996           result.append(("master_candidate", "auto-demotion due to offline"))
3997         if node.drained:
3998           node.drained = False
3999           result.append(("drained", "clear drained status due to offline"))
4000
4001     if self.op.master_candidate is not None:
4002       node.master_candidate = self.op.master_candidate
4003       changed_mc = True
4004       result.append(("master_candidate", str(self.op.master_candidate)))
4005       if self.op.master_candidate == False:
4006         rrc = self.rpc.call_node_demote_from_mc(node.name)
4007         msg = rrc.fail_msg
4008         if msg:
4009           self.LogWarning("Node failed to demote itself: %s" % msg)
4010
4011     if self.op.drained is not None:
4012       node.drained = self.op.drained
4013       result.append(("drained", str(self.op.drained)))
4014       if self.op.drained == True:
4015         if node.master_candidate:
4016           node.master_candidate = False
4017           changed_mc = True
4018           result.append(("master_candidate", "auto-demotion due to drain"))
4019           rrc = self.rpc.call_node_demote_from_mc(node.name)
4020           msg = rrc.fail_msg
4021           if msg:
4022             self.LogWarning("Node failed to demote itself: %s" % msg)
4023         if node.offline:
4024           node.offline = False
4025           result.append(("offline", "clear offline status due to drain"))
4026
4027     # we locked all nodes, we adjust the CP before updating this node
4028     if self.lock_all:
4029       _AdjustCandidatePool(self, [node.name])
4030
4031     # this will trigger configuration file update, if needed
4032     self.cfg.Update(node, feedback_fn)
4033
4034     # this will trigger job queue propagation or cleanup
4035     if changed_mc:
4036       self.context.ReaddNode(node)
4037
4038     return result
4039
4040
4041 class LUPowercycleNode(NoHooksLU):
4042   """Powercycles a node.
4043
4044   """
4045   _OP_PARAMS = [
4046     _PNodeName,
4047     _PForce,
4048     ]
4049   REQ_BGL = False
4050
4051   def CheckArguments(self):
4052     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4053     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4054       raise errors.OpPrereqError("The node is the master and the force"
4055                                  " parameter was not set",
4056                                  errors.ECODE_INVAL)
4057
4058   def ExpandNames(self):
4059     """Locking for PowercycleNode.
4060
4061     This is a last-resort option and shouldn't block on other
4062     jobs. Therefore, we grab no locks.
4063
4064     """
4065     self.needed_locks = {}
4066
4067   def Exec(self, feedback_fn):
4068     """Reboots a node.
4069
4070     """
4071     result = self.rpc.call_node_powercycle(self.op.node_name,
4072                                            self.cfg.GetHypervisorType())
4073     result.Raise("Failed to schedule the reboot")
4074     return result.payload
4075
4076
4077 class LUQueryClusterInfo(NoHooksLU):
4078   """Query cluster configuration.
4079
4080   """
4081   REQ_BGL = False
4082
4083   def ExpandNames(self):
4084     self.needed_locks = {}
4085
4086   def Exec(self, feedback_fn):
4087     """Return cluster config.
4088
4089     """
4090     cluster = self.cfg.GetClusterInfo()
4091     os_hvp = {}
4092
4093     # Filter just for enabled hypervisors
4094     for os_name, hv_dict in cluster.os_hvp.items():
4095       os_hvp[os_name] = {}
4096       for hv_name, hv_params in hv_dict.items():
4097         if hv_name in cluster.enabled_hypervisors:
4098           os_hvp[os_name][hv_name] = hv_params
4099
4100     # Convert ip_family to ip_version
4101     primary_ip_version = constants.IP4_VERSION
4102     if cluster.primary_ip_family == netutils.IP6Address.family:
4103       primary_ip_version = constants.IP6_VERSION
4104
4105     result = {
4106       "software_version": constants.RELEASE_VERSION,
4107       "protocol_version": constants.PROTOCOL_VERSION,
4108       "config_version": constants.CONFIG_VERSION,
4109       "os_api_version": max(constants.OS_API_VERSIONS),
4110       "export_version": constants.EXPORT_VERSION,
4111       "architecture": (platform.architecture()[0], platform.machine()),
4112       "name": cluster.cluster_name,
4113       "master": cluster.master_node,
4114       "default_hypervisor": cluster.enabled_hypervisors[0],
4115       "enabled_hypervisors": cluster.enabled_hypervisors,
4116       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4117                         for hypervisor_name in cluster.enabled_hypervisors]),
4118       "os_hvp": os_hvp,
4119       "beparams": cluster.beparams,
4120       "osparams": cluster.osparams,
4121       "nicparams": cluster.nicparams,
4122       "candidate_pool_size": cluster.candidate_pool_size,
4123       "master_netdev": cluster.master_netdev,
4124       "volume_group_name": cluster.volume_group_name,
4125       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4126       "file_storage_dir": cluster.file_storage_dir,
4127       "maintain_node_health": cluster.maintain_node_health,
4128       "ctime": cluster.ctime,
4129       "mtime": cluster.mtime,
4130       "uuid": cluster.uuid,
4131       "tags": list(cluster.GetTags()),
4132       "uid_pool": cluster.uid_pool,
4133       "default_iallocator": cluster.default_iallocator,
4134       "reserved_lvs": cluster.reserved_lvs,
4135       "primary_ip_version": primary_ip_version,
4136       }
4137
4138     return result
4139
4140
4141 class LUQueryConfigValues(NoHooksLU):
4142   """Return configuration values.
4143
4144   """
4145   _OP_PARAMS = [_POutputFields]
4146   REQ_BGL = False
4147   _FIELDS_DYNAMIC = utils.FieldSet()
4148   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4149                                   "watcher_pause")
4150
4151   def CheckArguments(self):
4152     _CheckOutputFields(static=self._FIELDS_STATIC,
4153                        dynamic=self._FIELDS_DYNAMIC,
4154                        selected=self.op.output_fields)
4155
4156   def ExpandNames(self):
4157     self.needed_locks = {}
4158
4159   def Exec(self, feedback_fn):
4160     """Dump a representation of the cluster config to the standard output.
4161
4162     """
4163     values = []
4164     for field in self.op.output_fields:
4165       if field == "cluster_name":
4166         entry = self.cfg.GetClusterName()
4167       elif field == "master_node":
4168         entry = self.cfg.GetMasterNode()
4169       elif field == "drain_flag":
4170         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4171       elif field == "watcher_pause":
4172         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4173       else:
4174         raise errors.ParameterError(field)
4175       values.append(entry)
4176     return values
4177
4178
4179 class LUActivateInstanceDisks(NoHooksLU):
4180   """Bring up an instance's disks.
4181
4182   """
4183   _OP_PARAMS = [
4184     _PInstanceName,
4185     ("ignore_size", False, _TBool),
4186     ]
4187   REQ_BGL = False
4188
4189   def ExpandNames(self):
4190     self._ExpandAndLockInstance()
4191     self.needed_locks[locking.LEVEL_NODE] = []
4192     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4193
4194   def DeclareLocks(self, level):
4195     if level == locking.LEVEL_NODE:
4196       self._LockInstancesNodes()
4197
4198   def CheckPrereq(self):
4199     """Check prerequisites.
4200
4201     This checks that the instance is in the cluster.
4202
4203     """
4204     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4205     assert self.instance is not None, \
4206       "Cannot retrieve locked instance %s" % self.op.instance_name
4207     _CheckNodeOnline(self, self.instance.primary_node)
4208
4209   def Exec(self, feedback_fn):
4210     """Activate the disks.
4211
4212     """
4213     disks_ok, disks_info = \
4214               _AssembleInstanceDisks(self, self.instance,
4215                                      ignore_size=self.op.ignore_size)
4216     if not disks_ok:
4217       raise errors.OpExecError("Cannot activate block devices")
4218
4219     return disks_info
4220
4221
4222 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4223                            ignore_size=False):
4224   """Prepare the block devices for an instance.
4225
4226   This sets up the block devices on all nodes.
4227
4228   @type lu: L{LogicalUnit}
4229   @param lu: the logical unit on whose behalf we execute
4230   @type instance: L{objects.Instance}
4231   @param instance: the instance for whose disks we assemble
4232   @type disks: list of L{objects.Disk} or None
4233   @param disks: which disks to assemble (or all, if None)
4234   @type ignore_secondaries: boolean
4235   @param ignore_secondaries: if true, errors on secondary nodes
4236       won't result in an error return from the function
4237   @type ignore_size: boolean
4238   @param ignore_size: if true, the current known size of the disk
4239       will not be used during the disk activation, useful for cases
4240       when the size is wrong
4241   @return: False if the operation failed, otherwise a list of
4242       (host, instance_visible_name, node_visible_name)
4243       with the mapping from node devices to instance devices
4244
4245   """
4246   device_info = []
4247   disks_ok = True
4248   iname = instance.name
4249   disks = _ExpandCheckDisks(instance, disks)
4250
4251   # With the two passes mechanism we try to reduce the window of
4252   # opportunity for the race condition of switching DRBD to primary
4253   # before handshaking occured, but we do not eliminate it
4254
4255   # The proper fix would be to wait (with some limits) until the
4256   # connection has been made and drbd transitions from WFConnection
4257   # into any other network-connected state (Connected, SyncTarget,
4258   # SyncSource, etc.)
4259
4260   # 1st pass, assemble on all nodes in secondary mode
4261   for inst_disk in disks:
4262     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4263       if ignore_size:
4264         node_disk = node_disk.Copy()
4265         node_disk.UnsetSize()
4266       lu.cfg.SetDiskID(node_disk, node)
4267       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4268       msg = result.fail_msg
4269       if msg:
4270         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4271                            " (is_primary=False, pass=1): %s",
4272                            inst_disk.iv_name, node, msg)
4273         if not ignore_secondaries:
4274           disks_ok = False
4275
4276   # FIXME: race condition on drbd migration to primary
4277
4278   # 2nd pass, do only the primary node
4279   for inst_disk in disks:
4280     dev_path = None
4281
4282     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4283       if node != instance.primary_node:
4284         continue
4285       if ignore_size:
4286         node_disk = node_disk.Copy()
4287         node_disk.UnsetSize()
4288       lu.cfg.SetDiskID(node_disk, node)
4289       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4290       msg = result.fail_msg
4291       if msg:
4292         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4293                            " (is_primary=True, pass=2): %s",
4294                            inst_disk.iv_name, node, msg)
4295         disks_ok = False
4296       else:
4297         dev_path = result.payload
4298
4299     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4300
4301   # leave the disks configured for the primary node
4302   # this is a workaround that would be fixed better by
4303   # improving the logical/physical id handling
4304   for disk in disks:
4305     lu.cfg.SetDiskID(disk, instance.primary_node)
4306
4307   return disks_ok, device_info
4308
4309
4310 def _StartInstanceDisks(lu, instance, force):
4311   """Start the disks of an instance.
4312
4313   """
4314   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4315                                            ignore_secondaries=force)
4316   if not disks_ok:
4317     _ShutdownInstanceDisks(lu, instance)
4318     if force is not None and not force:
4319       lu.proc.LogWarning("", hint="If the message above refers to a"
4320                          " secondary node,"
4321                          " you can retry the operation using '--force'.")
4322     raise errors.OpExecError("Disk consistency error")
4323
4324
4325 class LUDeactivateInstanceDisks(NoHooksLU):
4326   """Shutdown an instance's disks.
4327
4328   """
4329   _OP_PARAMS = [
4330     _PInstanceName,
4331     ]
4332   REQ_BGL = False
4333
4334   def ExpandNames(self):
4335     self._ExpandAndLockInstance()
4336     self.needed_locks[locking.LEVEL_NODE] = []
4337     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4338
4339   def DeclareLocks(self, level):
4340     if level == locking.LEVEL_NODE:
4341       self._LockInstancesNodes()
4342
4343   def CheckPrereq(self):
4344     """Check prerequisites.
4345
4346     This checks that the instance is in the cluster.
4347
4348     """
4349     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4350     assert self.instance is not None, \
4351       "Cannot retrieve locked instance %s" % self.op.instance_name
4352
4353   def Exec(self, feedback_fn):
4354     """Deactivate the disks
4355
4356     """
4357     instance = self.instance
4358     _SafeShutdownInstanceDisks(self, instance)
4359
4360
4361 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4362   """Shutdown block devices of an instance.
4363
4364   This function checks if an instance is running, before calling
4365   _ShutdownInstanceDisks.
4366
4367   """
4368   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4369   _ShutdownInstanceDisks(lu, instance, disks=disks)
4370
4371
4372 def _ExpandCheckDisks(instance, disks):
4373   """Return the instance disks selected by the disks list
4374
4375   @type disks: list of L{objects.Disk} or None
4376   @param disks: selected disks
4377   @rtype: list of L{objects.Disk}
4378   @return: selected instance disks to act on
4379
4380   """
4381   if disks is None:
4382     return instance.disks
4383   else:
4384     if not set(disks).issubset(instance.disks):
4385       raise errors.ProgrammerError("Can only act on disks belonging to the"
4386                                    " target instance")
4387     return disks
4388
4389
4390 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4391   """Shutdown block devices of an instance.
4392
4393   This does the shutdown on all nodes of the instance.
4394
4395   If the ignore_primary is false, errors on the primary node are
4396   ignored.
4397
4398   """
4399   all_result = True
4400   disks = _ExpandCheckDisks(instance, disks)
4401
4402   for disk in disks:
4403     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4404       lu.cfg.SetDiskID(top_disk, node)
4405       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4406       msg = result.fail_msg
4407       if msg:
4408         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4409                       disk.iv_name, node, msg)
4410         if not ignore_primary or node != instance.primary_node:
4411           all_result = False
4412   return all_result
4413
4414
4415 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4416   """Checks if a node has enough free memory.
4417
4418   This function check if a given node has the needed amount of free
4419   memory. In case the node has less memory or we cannot get the
4420   information from the node, this function raise an OpPrereqError
4421   exception.
4422
4423   @type lu: C{LogicalUnit}
4424   @param lu: a logical unit from which we get configuration data
4425   @type node: C{str}
4426   @param node: the node to check
4427   @type reason: C{str}
4428   @param reason: string to use in the error message
4429   @type requested: C{int}
4430   @param requested: the amount of memory in MiB to check for
4431   @type hypervisor_name: C{str}
4432   @param hypervisor_name: the hypervisor to ask for memory stats
4433   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4434       we cannot check the node
4435
4436   """
4437   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4438   nodeinfo[node].Raise("Can't get data from node %s" % node,
4439                        prereq=True, ecode=errors.ECODE_ENVIRON)
4440   free_mem = nodeinfo[node].payload.get('memory_free', None)
4441   if not isinstance(free_mem, int):
4442     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4443                                " was '%s'" % (node, free_mem),
4444                                errors.ECODE_ENVIRON)
4445   if requested > free_mem:
4446     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4447                                " needed %s MiB, available %s MiB" %
4448                                (node, reason, requested, free_mem),
4449                                errors.ECODE_NORES)
4450
4451
4452 def _CheckNodesFreeDisk(lu, nodenames, requested):
4453   """Checks if nodes have enough free disk space in the default VG.
4454
4455   This function check if all given nodes have the needed amount of
4456   free disk. In case any node has less disk or we cannot get the
4457   information from the node, this function raise an OpPrereqError
4458   exception.
4459
4460   @type lu: C{LogicalUnit}
4461   @param lu: a logical unit from which we get configuration data
4462   @type nodenames: C{list}
4463   @param nodenames: the list of node names to check
4464   @type requested: C{int}
4465   @param requested: the amount of disk in MiB to check for
4466   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4467       we cannot check the node
4468
4469   """
4470   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4471                                    lu.cfg.GetHypervisorType())
4472   for node in nodenames:
4473     info = nodeinfo[node]
4474     info.Raise("Cannot get current information from node %s" % node,
4475                prereq=True, ecode=errors.ECODE_ENVIRON)
4476     vg_free = info.payload.get("vg_free", None)
4477     if not isinstance(vg_free, int):
4478       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4479                                  " result was '%s'" % (node, vg_free),
4480                                  errors.ECODE_ENVIRON)
4481     if requested > vg_free:
4482       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4483                                  " required %d MiB, available %d MiB" %
4484                                  (node, requested, vg_free),
4485                                  errors.ECODE_NORES)
4486
4487
4488 class LUStartupInstance(LogicalUnit):
4489   """Starts an instance.
4490
4491   """
4492   HPATH = "instance-start"
4493   HTYPE = constants.HTYPE_INSTANCE
4494   _OP_PARAMS = [
4495     _PInstanceName,
4496     _PForce,
4497     ("hvparams", _EmptyDict, _TDict),
4498     ("beparams", _EmptyDict, _TDict),
4499     ]
4500   REQ_BGL = False
4501
4502   def CheckArguments(self):
4503     # extra beparams
4504     if self.op.beparams:
4505       # fill the beparams dict
4506       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4507
4508   def ExpandNames(self):
4509     self._ExpandAndLockInstance()
4510
4511   def BuildHooksEnv(self):
4512     """Build hooks env.
4513
4514     This runs on master, primary and secondary nodes of the instance.
4515
4516     """
4517     env = {
4518       "FORCE": self.op.force,
4519       }
4520     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4521     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4522     return env, nl, nl
4523
4524   def CheckPrereq(self):
4525     """Check prerequisites.
4526
4527     This checks that the instance is in the cluster.
4528
4529     """
4530     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4531     assert self.instance is not None, \
4532       "Cannot retrieve locked instance %s" % self.op.instance_name
4533
4534     # extra hvparams
4535     if self.op.hvparams:
4536       # check hypervisor parameter syntax (locally)
4537       cluster = self.cfg.GetClusterInfo()
4538       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4539       filled_hvp = cluster.FillHV(instance)
4540       filled_hvp.update(self.op.hvparams)
4541       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4542       hv_type.CheckParameterSyntax(filled_hvp)
4543       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4544
4545     _CheckNodeOnline(self, instance.primary_node)
4546
4547     bep = self.cfg.GetClusterInfo().FillBE(instance)
4548     # check bridges existence
4549     _CheckInstanceBridgesExist(self, instance)
4550
4551     remote_info = self.rpc.call_instance_info(instance.primary_node,
4552                                               instance.name,
4553                                               instance.hypervisor)
4554     remote_info.Raise("Error checking node %s" % instance.primary_node,
4555                       prereq=True, ecode=errors.ECODE_ENVIRON)
4556     if not remote_info.payload: # not running already
4557       _CheckNodeFreeMemory(self, instance.primary_node,
4558                            "starting instance %s" % instance.name,
4559                            bep[constants.BE_MEMORY], instance.hypervisor)
4560
4561   def Exec(self, feedback_fn):
4562     """Start the instance.
4563
4564     """
4565     instance = self.instance
4566     force = self.op.force
4567
4568     self.cfg.MarkInstanceUp(instance.name)
4569
4570     node_current = instance.primary_node
4571
4572     _StartInstanceDisks(self, instance, force)
4573
4574     result = self.rpc.call_instance_start(node_current, instance,
4575                                           self.op.hvparams, self.op.beparams)
4576     msg = result.fail_msg
4577     if msg:
4578       _ShutdownInstanceDisks(self, instance)
4579       raise errors.OpExecError("Could not start instance: %s" % msg)
4580
4581
4582 class LURebootInstance(LogicalUnit):
4583   """Reboot an instance.
4584
4585   """
4586   HPATH = "instance-reboot"
4587   HTYPE = constants.HTYPE_INSTANCE
4588   _OP_PARAMS = [
4589     _PInstanceName,
4590     ("ignore_secondaries", False, _TBool),
4591     ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4592     _PShutdownTimeout,
4593     ]
4594   REQ_BGL = False
4595
4596   def ExpandNames(self):
4597     self._ExpandAndLockInstance()
4598
4599   def BuildHooksEnv(self):
4600     """Build hooks env.
4601
4602     This runs on master, primary and secondary nodes of the instance.
4603
4604     """
4605     env = {
4606       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4607       "REBOOT_TYPE": self.op.reboot_type,
4608       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4609       }
4610     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4611     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4612     return env, nl, nl
4613
4614   def CheckPrereq(self):
4615     """Check prerequisites.
4616
4617     This checks that the instance is in the cluster.
4618
4619     """
4620     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4621     assert self.instance is not None, \
4622       "Cannot retrieve locked instance %s" % self.op.instance_name
4623
4624     _CheckNodeOnline(self, instance.primary_node)
4625
4626     # check bridges existence
4627     _CheckInstanceBridgesExist(self, instance)
4628
4629   def Exec(self, feedback_fn):
4630     """Reboot the instance.
4631
4632     """
4633     instance = self.instance
4634     ignore_secondaries = self.op.ignore_secondaries
4635     reboot_type = self.op.reboot_type
4636
4637     node_current = instance.primary_node
4638
4639     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4640                        constants.INSTANCE_REBOOT_HARD]:
4641       for disk in instance.disks:
4642         self.cfg.SetDiskID(disk, node_current)
4643       result = self.rpc.call_instance_reboot(node_current, instance,
4644                                              reboot_type,
4645                                              self.op.shutdown_timeout)
4646       result.Raise("Could not reboot instance")
4647     else:
4648       result = self.rpc.call_instance_shutdown(node_current, instance,
4649                                                self.op.shutdown_timeout)
4650       result.Raise("Could not shutdown instance for full reboot")
4651       _ShutdownInstanceDisks(self, instance)
4652       _StartInstanceDisks(self, instance, ignore_secondaries)
4653       result = self.rpc.call_instance_start(node_current, instance, None, None)
4654       msg = result.fail_msg
4655       if msg:
4656         _ShutdownInstanceDisks(self, instance)
4657         raise errors.OpExecError("Could not start instance for"
4658                                  " full reboot: %s" % msg)
4659
4660     self.cfg.MarkInstanceUp(instance.name)
4661
4662
4663 class LUShutdownInstance(LogicalUnit):
4664   """Shutdown an instance.
4665
4666   """
4667   HPATH = "instance-stop"
4668   HTYPE = constants.HTYPE_INSTANCE
4669   _OP_PARAMS = [
4670     _PInstanceName,
4671     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4672     ]
4673   REQ_BGL = False
4674
4675   def ExpandNames(self):
4676     self._ExpandAndLockInstance()
4677
4678   def BuildHooksEnv(self):
4679     """Build hooks env.
4680
4681     This runs on master, primary and secondary nodes of the instance.
4682
4683     """
4684     env = _BuildInstanceHookEnvByObject(self, self.instance)
4685     env["TIMEOUT"] = self.op.timeout
4686     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4687     return env, nl, nl
4688
4689   def CheckPrereq(self):
4690     """Check prerequisites.
4691
4692     This checks that the instance is in the cluster.
4693
4694     """
4695     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4696     assert self.instance is not None, \
4697       "Cannot retrieve locked instance %s" % self.op.instance_name
4698     _CheckNodeOnline(self, self.instance.primary_node)
4699
4700   def Exec(self, feedback_fn):
4701     """Shutdown the instance.
4702
4703     """
4704     instance = self.instance
4705     node_current = instance.primary_node
4706     timeout = self.op.timeout
4707     self.cfg.MarkInstanceDown(instance.name)
4708     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4709     msg = result.fail_msg
4710     if msg:
4711       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4712
4713     _ShutdownInstanceDisks(self, instance)
4714
4715
4716 class LUReinstallInstance(LogicalUnit):
4717   """Reinstall an instance.
4718
4719   """
4720   HPATH = "instance-reinstall"
4721   HTYPE = constants.HTYPE_INSTANCE
4722   _OP_PARAMS = [
4723     _PInstanceName,
4724     ("os_type", None, _TMaybeString),
4725     ("force_variant", False, _TBool),
4726     ]
4727   REQ_BGL = False
4728
4729   def ExpandNames(self):
4730     self._ExpandAndLockInstance()
4731
4732   def BuildHooksEnv(self):
4733     """Build hooks env.
4734
4735     This runs on master, primary and secondary nodes of the instance.
4736
4737     """
4738     env = _BuildInstanceHookEnvByObject(self, self.instance)
4739     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4740     return env, nl, nl
4741
4742   def CheckPrereq(self):
4743     """Check prerequisites.
4744
4745     This checks that the instance is in the cluster and is not running.
4746
4747     """
4748     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4749     assert instance is not None, \
4750       "Cannot retrieve locked instance %s" % self.op.instance_name
4751     _CheckNodeOnline(self, instance.primary_node)
4752
4753     if instance.disk_template == constants.DT_DISKLESS:
4754       raise errors.OpPrereqError("Instance '%s' has no disks" %
4755                                  self.op.instance_name,
4756                                  errors.ECODE_INVAL)
4757     _CheckInstanceDown(self, instance, "cannot reinstall")
4758
4759     if self.op.os_type is not None:
4760       # OS verification
4761       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4762       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4763
4764     self.instance = instance
4765
4766   def Exec(self, feedback_fn):
4767     """Reinstall the instance.
4768
4769     """
4770     inst = self.instance
4771
4772     if self.op.os_type is not None:
4773       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4774       inst.os = self.op.os_type
4775       self.cfg.Update(inst, feedback_fn)
4776
4777     _StartInstanceDisks(self, inst, None)
4778     try:
4779       feedback_fn("Running the instance OS create scripts...")
4780       # FIXME: pass debug option from opcode to backend
4781       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4782                                              self.op.debug_level)
4783       result.Raise("Could not install OS for instance %s on node %s" %
4784                    (inst.name, inst.primary_node))
4785     finally:
4786       _ShutdownInstanceDisks(self, inst)
4787
4788
4789 class LURecreateInstanceDisks(LogicalUnit):
4790   """Recreate an instance's missing disks.
4791
4792   """
4793   HPATH = "instance-recreate-disks"
4794   HTYPE = constants.HTYPE_INSTANCE
4795   _OP_PARAMS = [
4796     _PInstanceName,
4797     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4798     ]
4799   REQ_BGL = False
4800
4801   def ExpandNames(self):
4802     self._ExpandAndLockInstance()
4803
4804   def BuildHooksEnv(self):
4805     """Build hooks env.
4806
4807     This runs on master, primary and secondary nodes of the instance.
4808
4809     """
4810     env = _BuildInstanceHookEnvByObject(self, self.instance)
4811     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4812     return env, nl, nl
4813
4814   def CheckPrereq(self):
4815     """Check prerequisites.
4816
4817     This checks that the instance is in the cluster and is not running.
4818
4819     """
4820     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4821     assert instance is not None, \
4822       "Cannot retrieve locked instance %s" % self.op.instance_name
4823     _CheckNodeOnline(self, instance.primary_node)
4824
4825     if instance.disk_template == constants.DT_DISKLESS:
4826       raise errors.OpPrereqError("Instance '%s' has no disks" %
4827                                  self.op.instance_name, errors.ECODE_INVAL)
4828     _CheckInstanceDown(self, instance, "cannot recreate disks")
4829
4830     if not self.op.disks:
4831       self.op.disks = range(len(instance.disks))
4832     else:
4833       for idx in self.op.disks:
4834         if idx >= len(instance.disks):
4835           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4836                                      errors.ECODE_INVAL)
4837
4838     self.instance = instance
4839
4840   def Exec(self, feedback_fn):
4841     """Recreate the disks.
4842
4843     """
4844     to_skip = []
4845     for idx, _ in enumerate(self.instance.disks):
4846       if idx not in self.op.disks: # disk idx has not been passed in
4847         to_skip.append(idx)
4848         continue
4849
4850     _CreateDisks(self, self.instance, to_skip=to_skip)
4851
4852
4853 class LURenameInstance(LogicalUnit):
4854   """Rename an instance.
4855
4856   """
4857   HPATH = "instance-rename"
4858   HTYPE = constants.HTYPE_INSTANCE
4859   _OP_PARAMS = [
4860     _PInstanceName,
4861     ("new_name", _NoDefault, _TNonEmptyString),
4862     ("ip_check", False, _TBool),
4863     ("name_check", True, _TBool),
4864     ]
4865
4866   def CheckArguments(self):
4867     """Check arguments.
4868
4869     """
4870     if self.op.ip_check and not self.op.name_check:
4871       # TODO: make the ip check more flexible and not depend on the name check
4872       raise errors.OpPrereqError("Cannot do ip check without a name check",
4873                                  errors.ECODE_INVAL)
4874
4875   def BuildHooksEnv(self):
4876     """Build hooks env.
4877
4878     This runs on master, primary and secondary nodes of the instance.
4879
4880     """
4881     env = _BuildInstanceHookEnvByObject(self, self.instance)
4882     env["INSTANCE_NEW_NAME"] = self.op.new_name
4883     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4884     return env, nl, nl
4885
4886   def CheckPrereq(self):
4887     """Check prerequisites.
4888
4889     This checks that the instance is in the cluster and is not running.
4890
4891     """
4892     self.op.instance_name = _ExpandInstanceName(self.cfg,
4893                                                 self.op.instance_name)
4894     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4895     assert instance is not None
4896     _CheckNodeOnline(self, instance.primary_node)
4897     _CheckInstanceDown(self, instance, "cannot rename")
4898     self.instance = instance
4899
4900     new_name = self.op.new_name
4901     if self.op.name_check:
4902       hostname = netutils.GetHostname(name=new_name)
4903       new_name = hostname.name
4904       if (self.op.ip_check and
4905           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
4906         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4907                                    (hostname.ip, new_name),
4908                                    errors.ECODE_NOTUNIQUE)
4909
4910     instance_list = self.cfg.GetInstanceList()
4911     if new_name in instance_list:
4912       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4913                                  new_name, errors.ECODE_EXISTS)
4914
4915   def Exec(self, feedback_fn):
4916     """Reinstall the instance.
4917
4918     """
4919     inst = self.instance
4920     old_name = inst.name
4921
4922     if inst.disk_template == constants.DT_FILE:
4923       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4924
4925     self.cfg.RenameInstance(inst.name, self.op.new_name)
4926     # Change the instance lock. This is definitely safe while we hold the BGL
4927     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4928     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4929
4930     # re-read the instance from the configuration after rename
4931     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4932
4933     if inst.disk_template == constants.DT_FILE:
4934       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4935       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4936                                                      old_file_storage_dir,
4937                                                      new_file_storage_dir)
4938       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4939                    " (but the instance has been renamed in Ganeti)" %
4940                    (inst.primary_node, old_file_storage_dir,
4941                     new_file_storage_dir))
4942
4943     _StartInstanceDisks(self, inst, None)
4944     try:
4945       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4946                                                  old_name, self.op.debug_level)
4947       msg = result.fail_msg
4948       if msg:
4949         msg = ("Could not run OS rename script for instance %s on node %s"
4950                " (but the instance has been renamed in Ganeti): %s" %
4951                (inst.name, inst.primary_node, msg))
4952         self.proc.LogWarning(msg)
4953     finally:
4954       _ShutdownInstanceDisks(self, inst)
4955
4956     return inst.name
4957
4958
4959 class LURemoveInstance(LogicalUnit):
4960   """Remove an instance.
4961
4962   """
4963   HPATH = "instance-remove"
4964   HTYPE = constants.HTYPE_INSTANCE
4965   _OP_PARAMS = [
4966     _PInstanceName,
4967     ("ignore_failures", False, _TBool),
4968     _PShutdownTimeout,
4969     ]
4970   REQ_BGL = False
4971
4972   def ExpandNames(self):
4973     self._ExpandAndLockInstance()
4974     self.needed_locks[locking.LEVEL_NODE] = []
4975     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4976
4977   def DeclareLocks(self, level):
4978     if level == locking.LEVEL_NODE:
4979       self._LockInstancesNodes()
4980
4981   def BuildHooksEnv(self):
4982     """Build hooks env.
4983
4984     This runs on master, primary and secondary nodes of the instance.
4985
4986     """
4987     env = _BuildInstanceHookEnvByObject(self, self.instance)
4988     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4989     nl = [self.cfg.GetMasterNode()]
4990     nl_post = list(self.instance.all_nodes) + nl
4991     return env, nl, nl_post
4992
4993   def CheckPrereq(self):
4994     """Check prerequisites.
4995
4996     This checks that the instance is in the cluster.
4997
4998     """
4999     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5000     assert self.instance is not None, \
5001       "Cannot retrieve locked instance %s" % self.op.instance_name
5002
5003   def Exec(self, feedback_fn):
5004     """Remove the instance.
5005
5006     """
5007     instance = self.instance
5008     logging.info("Shutting down instance %s on node %s",
5009                  instance.name, instance.primary_node)
5010
5011     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5012                                              self.op.shutdown_timeout)
5013     msg = result.fail_msg
5014     if msg:
5015       if self.op.ignore_failures:
5016         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5017       else:
5018         raise errors.OpExecError("Could not shutdown instance %s on"
5019                                  " node %s: %s" %
5020                                  (instance.name, instance.primary_node, msg))
5021
5022     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5023
5024
5025 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5026   """Utility function to remove an instance.
5027
5028   """
5029   logging.info("Removing block devices for instance %s", instance.name)
5030
5031   if not _RemoveDisks(lu, instance):
5032     if not ignore_failures:
5033       raise errors.OpExecError("Can't remove instance's disks")
5034     feedback_fn("Warning: can't remove instance's disks")
5035
5036   logging.info("Removing instance %s out of cluster config", instance.name)
5037
5038   lu.cfg.RemoveInstance(instance.name)
5039
5040   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5041     "Instance lock removal conflict"
5042
5043   # Remove lock for the instance
5044   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5045
5046
5047 class LUQueryInstances(NoHooksLU):
5048   """Logical unit for querying instances.
5049
5050   """
5051   # pylint: disable-msg=W0142
5052   _OP_PARAMS = [
5053     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5054     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5055     ("use_locking", False, _TBool),
5056     ]
5057   REQ_BGL = False
5058   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5059                     "serial_no", "ctime", "mtime", "uuid"]
5060   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5061                                     "admin_state",
5062                                     "disk_template", "ip", "mac", "bridge",
5063                                     "nic_mode", "nic_link",
5064                                     "sda_size", "sdb_size", "vcpus", "tags",
5065                                     "network_port", "beparams",
5066                                     r"(disk)\.(size)/([0-9]+)",
5067                                     r"(disk)\.(sizes)", "disk_usage",
5068                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5069                                     r"(nic)\.(bridge)/([0-9]+)",
5070                                     r"(nic)\.(macs|ips|modes|links|bridges)",
5071                                     r"(disk|nic)\.(count)",
5072                                     "hvparams",
5073                                     ] + _SIMPLE_FIELDS +
5074                                   ["hv/%s" % name
5075                                    for name in constants.HVS_PARAMETERS
5076                                    if name not in constants.HVC_GLOBALS] +
5077                                   ["be/%s" % name
5078                                    for name in constants.BES_PARAMETERS])
5079   _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5080                                    "oper_ram",
5081                                    "oper_vcpus",
5082                                    "status")
5083
5084
5085   def CheckArguments(self):
5086     _CheckOutputFields(static=self._FIELDS_STATIC,
5087                        dynamic=self._FIELDS_DYNAMIC,
5088                        selected=self.op.output_fields)
5089
5090   def ExpandNames(self):
5091     self.needed_locks = {}
5092     self.share_locks[locking.LEVEL_INSTANCE] = 1
5093     self.share_locks[locking.LEVEL_NODE] = 1
5094
5095     if self.op.names:
5096       self.wanted = _GetWantedInstances(self, self.op.names)
5097     else:
5098       self.wanted = locking.ALL_SET
5099
5100     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5101     self.do_locking = self.do_node_query and self.op.use_locking
5102     if self.do_locking:
5103       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5104       self.needed_locks[locking.LEVEL_NODE] = []
5105       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5106
5107   def DeclareLocks(self, level):
5108     if level == locking.LEVEL_NODE and self.do_locking:
5109       self._LockInstancesNodes()
5110
5111   def Exec(self, feedback_fn):
5112     """Computes the list of nodes and their attributes.
5113
5114     """
5115     # pylint: disable-msg=R0912
5116     # way too many branches here
5117     all_info = self.cfg.GetAllInstancesInfo()
5118     if self.wanted == locking.ALL_SET:
5119       # caller didn't specify instance names, so ordering is not important
5120       if self.do_locking:
5121         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5122       else:
5123         instance_names = all_info.keys()
5124       instance_names = utils.NiceSort(instance_names)
5125     else:
5126       # caller did specify names, so we must keep the ordering
5127       if self.do_locking:
5128         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5129       else:
5130         tgt_set = all_info.keys()
5131       missing = set(self.wanted).difference(tgt_set)
5132       if missing:
5133         raise errors.OpExecError("Some instances were removed before"
5134                                  " retrieving their data: %s" % missing)
5135       instance_names = self.wanted
5136
5137     instance_list = [all_info[iname] for iname in instance_names]
5138
5139     # begin data gathering
5140
5141     nodes = frozenset([inst.primary_node for inst in instance_list])
5142     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5143
5144     bad_nodes = []
5145     off_nodes = []
5146     if self.do_node_query:
5147       live_data = {}
5148       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5149       for name in nodes:
5150         result = node_data[name]
5151         if result.offline:
5152           # offline nodes will be in both lists
5153           off_nodes.append(name)
5154         if result.fail_msg:
5155           bad_nodes.append(name)
5156         else:
5157           if result.payload:
5158             live_data.update(result.payload)
5159           # else no instance is alive
5160     else:
5161       live_data = dict([(name, {}) for name in instance_names])
5162
5163     # end data gathering
5164
5165     HVPREFIX = "hv/"
5166     BEPREFIX = "be/"
5167     output = []
5168     cluster = self.cfg.GetClusterInfo()
5169     for instance in instance_list:
5170       iout = []
5171       i_hv = cluster.FillHV(instance, skip_globals=True)
5172       i_be = cluster.FillBE(instance)
5173       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5174       for field in self.op.output_fields:
5175         st_match = self._FIELDS_STATIC.Matches(field)
5176         if field in self._SIMPLE_FIELDS:
5177           val = getattr(instance, field)
5178         elif field == "pnode":
5179           val = instance.primary_node
5180         elif field == "snodes":
5181           val = list(instance.secondary_nodes)
5182         elif field == "admin_state":
5183           val = instance.admin_up
5184         elif field == "oper_state":
5185           if instance.primary_node in bad_nodes:
5186             val = None
5187           else:
5188             val = bool(live_data.get(instance.name))
5189         elif field == "status":
5190           if instance.primary_node in off_nodes:
5191             val = "ERROR_nodeoffline"
5192           elif instance.primary_node in bad_nodes:
5193             val = "ERROR_nodedown"
5194           else:
5195             running = bool(live_data.get(instance.name))
5196             if running:
5197               if instance.admin_up:
5198                 val = "running"
5199               else:
5200                 val = "ERROR_up"
5201             else:
5202               if instance.admin_up:
5203                 val = "ERROR_down"
5204               else:
5205                 val = "ADMIN_down"
5206         elif field == "oper_ram":
5207           if instance.primary_node in bad_nodes:
5208             val = None
5209           elif instance.name in live_data:
5210             val = live_data[instance.name].get("memory", "?")
5211           else:
5212             val = "-"
5213         elif field == "oper_vcpus":
5214           if instance.primary_node in bad_nodes:
5215             val = None
5216           elif instance.name in live_data:
5217             val = live_data[instance.name].get("vcpus", "?")
5218           else:
5219             val = "-"
5220         elif field == "vcpus":
5221           val = i_be[constants.BE_VCPUS]
5222         elif field == "disk_template":
5223           val = instance.disk_template
5224         elif field == "ip":
5225           if instance.nics:
5226             val = instance.nics[0].ip
5227           else:
5228             val = None
5229         elif field == "nic_mode":
5230           if instance.nics:
5231             val = i_nicp[0][constants.NIC_MODE]
5232           else:
5233             val = None
5234         elif field == "nic_link":
5235           if instance.nics:
5236             val = i_nicp[0][constants.NIC_LINK]
5237           else:
5238             val = None
5239         elif field == "bridge":
5240           if (instance.nics and
5241               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5242             val = i_nicp[0][constants.NIC_LINK]
5243           else:
5244             val = None
5245         elif field == "mac":
5246           if instance.nics:
5247             val = instance.nics[0].mac
5248           else:
5249             val = None
5250         elif field == "sda_size" or field == "sdb_size":
5251           idx = ord(field[2]) - ord('a')
5252           try:
5253             val = instance.FindDisk(idx).size
5254           except errors.OpPrereqError:
5255             val = None
5256         elif field == "disk_usage": # total disk usage per node
5257           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5258           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5259         elif field == "tags":
5260           val = list(instance.GetTags())
5261         elif field == "hvparams":
5262           val = i_hv
5263         elif (field.startswith(HVPREFIX) and
5264               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5265               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5266           val = i_hv.get(field[len(HVPREFIX):], None)
5267         elif field == "beparams":
5268           val = i_be
5269         elif (field.startswith(BEPREFIX) and
5270               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5271           val = i_be.get(field[len(BEPREFIX):], None)
5272         elif st_match and st_match.groups():
5273           # matches a variable list
5274           st_groups = st_match.groups()
5275           if st_groups and st_groups[0] == "disk":
5276             if st_groups[1] == "count":
5277               val = len(instance.disks)
5278             elif st_groups[1] == "sizes":
5279               val = [disk.size for disk in instance.disks]
5280             elif st_groups[1] == "size":
5281               try:
5282                 val = instance.FindDisk(st_groups[2]).size
5283               except errors.OpPrereqError:
5284                 val = None
5285             else:
5286               assert False, "Unhandled disk parameter"
5287           elif st_groups[0] == "nic":
5288             if st_groups[1] == "count":
5289               val = len(instance.nics)
5290             elif st_groups[1] == "macs":
5291               val = [nic.mac for nic in instance.nics]
5292             elif st_groups[1] == "ips":
5293               val = [nic.ip for nic in instance.nics]
5294             elif st_groups[1] == "modes":
5295               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5296             elif st_groups[1] == "links":
5297               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5298             elif st_groups[1] == "bridges":
5299               val = []
5300               for nicp in i_nicp:
5301                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5302                   val.append(nicp[constants.NIC_LINK])
5303                 else:
5304                   val.append(None)
5305             else:
5306               # index-based item
5307               nic_idx = int(st_groups[2])
5308               if nic_idx >= len(instance.nics):
5309                 val = None
5310               else:
5311                 if st_groups[1] == "mac":
5312                   val = instance.nics[nic_idx].mac
5313                 elif st_groups[1] == "ip":
5314                   val = instance.nics[nic_idx].ip
5315                 elif st_groups[1] == "mode":
5316                   val = i_nicp[nic_idx][constants.NIC_MODE]
5317                 elif st_groups[1] == "link":
5318                   val = i_nicp[nic_idx][constants.NIC_LINK]
5319                 elif st_groups[1] == "bridge":
5320                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5321                   if nic_mode == constants.NIC_MODE_BRIDGED:
5322                     val = i_nicp[nic_idx][constants.NIC_LINK]
5323                   else:
5324                     val = None
5325                 else:
5326                   assert False, "Unhandled NIC parameter"
5327           else:
5328             assert False, ("Declared but unhandled variable parameter '%s'" %
5329                            field)
5330         else:
5331           assert False, "Declared but unhandled parameter '%s'" % field
5332         iout.append(val)
5333       output.append(iout)
5334
5335     return output
5336
5337
5338 class LUFailoverInstance(LogicalUnit):
5339   """Failover an instance.
5340
5341   """
5342   HPATH = "instance-failover"
5343   HTYPE = constants.HTYPE_INSTANCE
5344   _OP_PARAMS = [
5345     _PInstanceName,
5346     ("ignore_consistency", False, _TBool),
5347     _PShutdownTimeout,
5348     ]
5349   REQ_BGL = False
5350
5351   def ExpandNames(self):
5352     self._ExpandAndLockInstance()
5353     self.needed_locks[locking.LEVEL_NODE] = []
5354     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5355
5356   def DeclareLocks(self, level):
5357     if level == locking.LEVEL_NODE:
5358       self._LockInstancesNodes()
5359
5360   def BuildHooksEnv(self):
5361     """Build hooks env.
5362
5363     This runs on master, primary and secondary nodes of the instance.
5364
5365     """
5366     instance = self.instance
5367     source_node = instance.primary_node
5368     target_node = instance.secondary_nodes[0]
5369     env = {
5370       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5371       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5372       "OLD_PRIMARY": source_node,
5373       "OLD_SECONDARY": target_node,
5374       "NEW_PRIMARY": target_node,
5375       "NEW_SECONDARY": source_node,
5376       }
5377     env.update(_BuildInstanceHookEnvByObject(self, instance))
5378     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5379     nl_post = list(nl)
5380     nl_post.append(source_node)
5381     return env, nl, nl_post
5382
5383   def CheckPrereq(self):
5384     """Check prerequisites.
5385
5386     This checks that the instance is in the cluster.
5387
5388     """
5389     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5390     assert self.instance is not None, \
5391       "Cannot retrieve locked instance %s" % self.op.instance_name
5392
5393     bep = self.cfg.GetClusterInfo().FillBE(instance)
5394     if instance.disk_template not in constants.DTS_NET_MIRROR:
5395       raise errors.OpPrereqError("Instance's disk layout is not"
5396                                  " network mirrored, cannot failover.",
5397                                  errors.ECODE_STATE)
5398
5399     secondary_nodes = instance.secondary_nodes
5400     if not secondary_nodes:
5401       raise errors.ProgrammerError("no secondary node but using "
5402                                    "a mirrored disk template")
5403
5404     target_node = secondary_nodes[0]
5405     _CheckNodeOnline(self, target_node)
5406     _CheckNodeNotDrained(self, target_node)
5407     if instance.admin_up:
5408       # check memory requirements on the secondary node
5409       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5410                            instance.name, bep[constants.BE_MEMORY],
5411                            instance.hypervisor)
5412     else:
5413       self.LogInfo("Not checking memory on the secondary node as"
5414                    " instance will not be started")
5415
5416     # check bridge existance
5417     _CheckInstanceBridgesExist(self, instance, node=target_node)
5418
5419   def Exec(self, feedback_fn):
5420     """Failover an instance.
5421
5422     The failover is done by shutting it down on its present node and
5423     starting it on the secondary.
5424
5425     """
5426     instance = self.instance
5427
5428     source_node = instance.primary_node
5429     target_node = instance.secondary_nodes[0]
5430
5431     if instance.admin_up:
5432       feedback_fn("* checking disk consistency between source and target")
5433       for dev in instance.disks:
5434         # for drbd, these are drbd over lvm
5435         if not _CheckDiskConsistency(self, dev, target_node, False):
5436           if not self.op.ignore_consistency:
5437             raise errors.OpExecError("Disk %s is degraded on target node,"
5438                                      " aborting failover." % dev.iv_name)
5439     else:
5440       feedback_fn("* not checking disk consistency as instance is not running")
5441
5442     feedback_fn("* shutting down instance on source node")
5443     logging.info("Shutting down instance %s on node %s",
5444                  instance.name, source_node)
5445
5446     result = self.rpc.call_instance_shutdown(source_node, instance,
5447                                              self.op.shutdown_timeout)
5448     msg = result.fail_msg
5449     if msg:
5450       if self.op.ignore_consistency:
5451         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5452                              " Proceeding anyway. Please make sure node"
5453                              " %s is down. Error details: %s",
5454                              instance.name, source_node, source_node, msg)
5455       else:
5456         raise errors.OpExecError("Could not shutdown instance %s on"
5457                                  " node %s: %s" %
5458                                  (instance.name, source_node, msg))
5459
5460     feedback_fn("* deactivating the instance's disks on source node")
5461     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5462       raise errors.OpExecError("Can't shut down the instance's disks.")
5463
5464     instance.primary_node = target_node
5465     # distribute new instance config to the other nodes
5466     self.cfg.Update(instance, feedback_fn)
5467
5468     # Only start the instance if it's marked as up
5469     if instance.admin_up:
5470       feedback_fn("* activating the instance's disks on target node")
5471       logging.info("Starting instance %s on node %s",
5472                    instance.name, target_node)
5473
5474       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5475                                            ignore_secondaries=True)
5476       if not disks_ok:
5477         _ShutdownInstanceDisks(self, instance)
5478         raise errors.OpExecError("Can't activate the instance's disks")
5479
5480       feedback_fn("* starting the instance on the target node")
5481       result = self.rpc.call_instance_start(target_node, instance, None, None)
5482       msg = result.fail_msg
5483       if msg:
5484         _ShutdownInstanceDisks(self, instance)
5485         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5486                                  (instance.name, target_node, msg))
5487
5488
5489 class LUMigrateInstance(LogicalUnit):
5490   """Migrate an instance.
5491
5492   This is migration without shutting down, compared to the failover,
5493   which is done with shutdown.
5494
5495   """
5496   HPATH = "instance-migrate"
5497   HTYPE = constants.HTYPE_INSTANCE
5498   _OP_PARAMS = [
5499     _PInstanceName,
5500     _PMigrationMode,
5501     _PMigrationLive,
5502     ("cleanup", False, _TBool),
5503     ]
5504
5505   REQ_BGL = False
5506
5507   def ExpandNames(self):
5508     self._ExpandAndLockInstance()
5509
5510     self.needed_locks[locking.LEVEL_NODE] = []
5511     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5512
5513     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5514                                        self.op.cleanup)
5515     self.tasklets = [self._migrater]
5516
5517   def DeclareLocks(self, level):
5518     if level == locking.LEVEL_NODE:
5519       self._LockInstancesNodes()
5520
5521   def BuildHooksEnv(self):
5522     """Build hooks env.
5523
5524     This runs on master, primary and secondary nodes of the instance.
5525
5526     """
5527     instance = self._migrater.instance
5528     source_node = instance.primary_node
5529     target_node = instance.secondary_nodes[0]
5530     env = _BuildInstanceHookEnvByObject(self, instance)
5531     env["MIGRATE_LIVE"] = self._migrater.live
5532     env["MIGRATE_CLEANUP"] = self.op.cleanup
5533     env.update({
5534         "OLD_PRIMARY": source_node,
5535         "OLD_SECONDARY": target_node,
5536         "NEW_PRIMARY": target_node,
5537         "NEW_SECONDARY": source_node,
5538         })
5539     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5540     nl_post = list(nl)
5541     nl_post.append(source_node)
5542     return env, nl, nl_post
5543
5544
5545 class LUMoveInstance(LogicalUnit):
5546   """Move an instance by data-copying.
5547
5548   """
5549   HPATH = "instance-move"
5550   HTYPE = constants.HTYPE_INSTANCE
5551   _OP_PARAMS = [
5552     _PInstanceName,
5553     ("target_node", _NoDefault, _TNonEmptyString),
5554     _PShutdownTimeout,
5555     ]
5556   REQ_BGL = False
5557
5558   def ExpandNames(self):
5559     self._ExpandAndLockInstance()
5560     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5561     self.op.target_node = target_node
5562     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5563     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5564
5565   def DeclareLocks(self, level):
5566     if level == locking.LEVEL_NODE:
5567       self._LockInstancesNodes(primary_only=True)
5568
5569   def BuildHooksEnv(self):
5570     """Build hooks env.
5571
5572     This runs on master, primary and secondary nodes of the instance.
5573
5574     """
5575     env = {
5576       "TARGET_NODE": self.op.target_node,
5577       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5578       }
5579     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5580     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5581                                        self.op.target_node]
5582     return env, nl, nl
5583
5584   def CheckPrereq(self):
5585     """Check prerequisites.
5586
5587     This checks that the instance is in the cluster.
5588
5589     """
5590     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5591     assert self.instance is not None, \
5592       "Cannot retrieve locked instance %s" % self.op.instance_name
5593
5594     node = self.cfg.GetNodeInfo(self.op.target_node)
5595     assert node is not None, \
5596       "Cannot retrieve locked node %s" % self.op.target_node
5597
5598     self.target_node = target_node = node.name
5599
5600     if target_node == instance.primary_node:
5601       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5602                                  (instance.name, target_node),
5603                                  errors.ECODE_STATE)
5604
5605     bep = self.cfg.GetClusterInfo().FillBE(instance)
5606
5607     for idx, dsk in enumerate(instance.disks):
5608       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5609         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5610                                    " cannot copy" % idx, errors.ECODE_STATE)
5611
5612     _CheckNodeOnline(self, target_node)
5613     _CheckNodeNotDrained(self, target_node)
5614
5615     if instance.admin_up:
5616       # check memory requirements on the secondary node
5617       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5618                            instance.name, bep[constants.BE_MEMORY],
5619                            instance.hypervisor)
5620     else:
5621       self.LogInfo("Not checking memory on the secondary node as"
5622                    " instance will not be started")
5623
5624     # check bridge existance
5625     _CheckInstanceBridgesExist(self, instance, node=target_node)
5626
5627   def Exec(self, feedback_fn):
5628     """Move an instance.
5629
5630     The move is done by shutting it down on its present node, copying
5631     the data over (slow) and starting it on the new node.
5632
5633     """
5634     instance = self.instance
5635
5636     source_node = instance.primary_node
5637     target_node = self.target_node
5638
5639     self.LogInfo("Shutting down instance %s on source node %s",
5640                  instance.name, source_node)
5641
5642     result = self.rpc.call_instance_shutdown(source_node, instance,
5643                                              self.op.shutdown_timeout)
5644     msg = result.fail_msg
5645     if msg:
5646       if self.op.ignore_consistency:
5647         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5648                              " Proceeding anyway. Please make sure node"
5649                              " %s is down. Error details: %s",
5650                              instance.name, source_node, source_node, msg)
5651       else:
5652         raise errors.OpExecError("Could not shutdown instance %s on"
5653                                  " node %s: %s" %
5654                                  (instance.name, source_node, msg))
5655
5656     # create the target disks
5657     try:
5658       _CreateDisks(self, instance, target_node=target_node)
5659     except errors.OpExecError:
5660       self.LogWarning("Device creation failed, reverting...")
5661       try:
5662         _RemoveDisks(self, instance, target_node=target_node)
5663       finally:
5664         self.cfg.ReleaseDRBDMinors(instance.name)
5665         raise
5666
5667     cluster_name = self.cfg.GetClusterInfo().cluster_name
5668
5669     errs = []
5670     # activate, get path, copy the data over
5671     for idx, disk in enumerate(instance.disks):
5672       self.LogInfo("Copying data for disk %d", idx)
5673       result = self.rpc.call_blockdev_assemble(target_node, disk,
5674                                                instance.name, True)
5675       if result.fail_msg:
5676         self.LogWarning("Can't assemble newly created disk %d: %s",
5677                         idx, result.fail_msg)
5678         errs.append(result.fail_msg)
5679         break
5680       dev_path = result.payload
5681       result = self.rpc.call_blockdev_export(source_node, disk,
5682                                              target_node, dev_path,
5683                                              cluster_name)
5684       if result.fail_msg:
5685         self.LogWarning("Can't copy data over for disk %d: %s",
5686                         idx, result.fail_msg)
5687         errs.append(result.fail_msg)
5688         break
5689
5690     if errs:
5691       self.LogWarning("Some disks failed to copy, aborting")
5692       try:
5693         _RemoveDisks(self, instance, target_node=target_node)
5694       finally:
5695         self.cfg.ReleaseDRBDMinors(instance.name)
5696         raise errors.OpExecError("Errors during disk copy: %s" %
5697                                  (",".join(errs),))
5698
5699     instance.primary_node = target_node
5700     self.cfg.Update(instance, feedback_fn)
5701
5702     self.LogInfo("Removing the disks on the original node")
5703     _RemoveDisks(self, instance, target_node=source_node)
5704
5705     # Only start the instance if it's marked as up
5706     if instance.admin_up:
5707       self.LogInfo("Starting instance %s on node %s",
5708                    instance.name, target_node)
5709
5710       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5711                                            ignore_secondaries=True)
5712       if not disks_ok:
5713         _ShutdownInstanceDisks(self, instance)
5714         raise errors.OpExecError("Can't activate the instance's disks")
5715
5716       result = self.rpc.call_instance_start(target_node, instance, None, None)
5717       msg = result.fail_msg
5718       if msg:
5719         _ShutdownInstanceDisks(self, instance)
5720         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5721                                  (instance.name, target_node, msg))
5722
5723
5724 class LUMigrateNode(LogicalUnit):
5725   """Migrate all instances from a node.
5726
5727   """
5728   HPATH = "node-migrate"
5729   HTYPE = constants.HTYPE_NODE
5730   _OP_PARAMS = [
5731     _PNodeName,
5732     _PMigrationMode,
5733     _PMigrationLive,
5734     ]
5735   REQ_BGL = False
5736
5737   def ExpandNames(self):
5738     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5739
5740     self.needed_locks = {
5741       locking.LEVEL_NODE: [self.op.node_name],
5742       }
5743
5744     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5745
5746     # Create tasklets for migrating instances for all instances on this node
5747     names = []
5748     tasklets = []
5749
5750     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5751       logging.debug("Migrating instance %s", inst.name)
5752       names.append(inst.name)
5753
5754       tasklets.append(TLMigrateInstance(self, inst.name, False))
5755
5756     self.tasklets = tasklets
5757
5758     # Declare instance locks
5759     self.needed_locks[locking.LEVEL_INSTANCE] = names
5760
5761   def DeclareLocks(self, level):
5762     if level == locking.LEVEL_NODE:
5763       self._LockInstancesNodes()
5764
5765   def BuildHooksEnv(self):
5766     """Build hooks env.
5767
5768     This runs on the master, the primary and all the secondaries.
5769
5770     """
5771     env = {
5772       "NODE_NAME": self.op.node_name,
5773       }
5774
5775     nl = [self.cfg.GetMasterNode()]
5776
5777     return (env, nl, nl)
5778
5779
5780 class TLMigrateInstance(Tasklet):
5781   """Tasklet class for instance migration.
5782
5783   @type live: boolean
5784   @ivar live: whether the migration will be done live or non-live;
5785       this variable is initalized only after CheckPrereq has run
5786
5787   """
5788   def __init__(self, lu, instance_name, cleanup):
5789     """Initializes this class.
5790
5791     """
5792     Tasklet.__init__(self, lu)
5793
5794     # Parameters
5795     self.instance_name = instance_name
5796     self.cleanup = cleanup
5797     self.live = False # will be overridden later
5798
5799   def CheckPrereq(self):
5800     """Check prerequisites.
5801
5802     This checks that the instance is in the cluster.
5803
5804     """
5805     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5806     instance = self.cfg.GetInstanceInfo(instance_name)
5807     assert instance is not None
5808
5809     if instance.disk_template != constants.DT_DRBD8:
5810       raise errors.OpPrereqError("Instance's disk layout is not"
5811                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5812
5813     secondary_nodes = instance.secondary_nodes
5814     if not secondary_nodes:
5815       raise errors.ConfigurationError("No secondary node but using"
5816                                       " drbd8 disk template")
5817
5818     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5819
5820     target_node = secondary_nodes[0]
5821     # check memory requirements on the secondary node
5822     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5823                          instance.name, i_be[constants.BE_MEMORY],
5824                          instance.hypervisor)
5825
5826     # check bridge existance
5827     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5828
5829     if not self.cleanup:
5830       _CheckNodeNotDrained(self.lu, target_node)
5831       result = self.rpc.call_instance_migratable(instance.primary_node,
5832                                                  instance)
5833       result.Raise("Can't migrate, please use failover",
5834                    prereq=True, ecode=errors.ECODE_STATE)
5835
5836     self.instance = instance
5837
5838     if self.lu.op.live is not None and self.lu.op.mode is not None:
5839       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5840                                  " parameters are accepted",
5841                                  errors.ECODE_INVAL)
5842     if self.lu.op.live is not None:
5843       if self.lu.op.live:
5844         self.lu.op.mode = constants.HT_MIGRATION_LIVE
5845       else:
5846         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5847       # reset the 'live' parameter to None so that repeated
5848       # invocations of CheckPrereq do not raise an exception
5849       self.lu.op.live = None
5850     elif self.lu.op.mode is None:
5851       # read the default value from the hypervisor
5852       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5853       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5854
5855     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5856
5857   def _WaitUntilSync(self):
5858     """Poll with custom rpc for disk sync.
5859
5860     This uses our own step-based rpc call.
5861
5862     """
5863     self.feedback_fn("* wait until resync is done")
5864     all_done = False
5865     while not all_done:
5866       all_done = True
5867       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5868                                             self.nodes_ip,
5869                                             self.instance.disks)
5870       min_percent = 100
5871       for node, nres in result.items():
5872         nres.Raise("Cannot resync disks on node %s" % node)
5873         node_done, node_percent = nres.payload
5874         all_done = all_done and node_done
5875         if node_percent is not None:
5876           min_percent = min(min_percent, node_percent)
5877       if not all_done:
5878         if min_percent < 100:
5879           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5880         time.sleep(2)
5881
5882   def _EnsureSecondary(self, node):
5883     """Demote a node to secondary.
5884
5885     """
5886     self.feedback_fn("* switching node %s to secondary mode" % node)
5887
5888     for dev in self.instance.disks:
5889       self.cfg.SetDiskID(dev, node)
5890
5891     result = self.rpc.call_blockdev_close(node, self.instance.name,
5892                                           self.instance.disks)
5893     result.Raise("Cannot change disk to secondary on node %s" % node)
5894
5895   def _GoStandalone(self):
5896     """Disconnect from the network.
5897
5898     """
5899     self.feedback_fn("* changing into standalone mode")
5900     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5901                                                self.instance.disks)
5902     for node, nres in result.items():
5903       nres.Raise("Cannot disconnect disks node %s" % node)
5904
5905   def _GoReconnect(self, multimaster):
5906     """Reconnect to the network.
5907
5908     """
5909     if multimaster:
5910       msg = "dual-master"
5911     else:
5912       msg = "single-master"
5913     self.feedback_fn("* changing disks into %s mode" % msg)
5914     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5915                                            self.instance.disks,
5916                                            self.instance.name, multimaster)
5917     for node, nres in result.items():
5918       nres.Raise("Cannot change disks config on node %s" % node)
5919
5920   def _ExecCleanup(self):
5921     """Try to cleanup after a failed migration.
5922
5923     The cleanup is done by:
5924       - check that the instance is running only on one node
5925         (and update the config if needed)
5926       - change disks on its secondary node to secondary
5927       - wait until disks are fully synchronized
5928       - disconnect from the network
5929       - change disks into single-master mode
5930       - wait again until disks are fully synchronized
5931
5932     """
5933     instance = self.instance
5934     target_node = self.target_node
5935     source_node = self.source_node
5936
5937     # check running on only one node
5938     self.feedback_fn("* checking where the instance actually runs"
5939                      " (if this hangs, the hypervisor might be in"
5940                      " a bad state)")
5941     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5942     for node, result in ins_l.items():
5943       result.Raise("Can't contact node %s" % node)
5944
5945     runningon_source = instance.name in ins_l[source_node].payload
5946     runningon_target = instance.name in ins_l[target_node].payload
5947
5948     if runningon_source and runningon_target:
5949       raise errors.OpExecError("Instance seems to be running on two nodes,"
5950                                " or the hypervisor is confused. You will have"
5951                                " to ensure manually that it runs only on one"
5952                                " and restart this operation.")
5953
5954     if not (runningon_source or runningon_target):
5955       raise errors.OpExecError("Instance does not seem to be running at all."
5956                                " In this case, it's safer to repair by"
5957                                " running 'gnt-instance stop' to ensure disk"
5958                                " shutdown, and then restarting it.")
5959
5960     if runningon_target:
5961       # the migration has actually succeeded, we need to update the config
5962       self.feedback_fn("* instance running on secondary node (%s),"
5963                        " updating config" % target_node)
5964       instance.primary_node = target_node
5965       self.cfg.Update(instance, self.feedback_fn)
5966       demoted_node = source_node
5967     else:
5968       self.feedback_fn("* instance confirmed to be running on its"
5969                        " primary node (%s)" % source_node)
5970       demoted_node = target_node
5971
5972     self._EnsureSecondary(demoted_node)
5973     try:
5974       self._WaitUntilSync()
5975     except errors.OpExecError:
5976       # we ignore here errors, since if the device is standalone, it
5977       # won't be able to sync
5978       pass
5979     self._GoStandalone()
5980     self._GoReconnect(False)
5981     self._WaitUntilSync()
5982
5983     self.feedback_fn("* done")
5984
5985   def _RevertDiskStatus(self):
5986     """Try to revert the disk status after a failed migration.
5987
5988     """
5989     target_node = self.target_node
5990     try:
5991       self._EnsureSecondary(target_node)
5992       self._GoStandalone()
5993       self._GoReconnect(False)
5994       self._WaitUntilSync()
5995     except errors.OpExecError, err:
5996       self.lu.LogWarning("Migration failed and I can't reconnect the"
5997                          " drives: error '%s'\n"
5998                          "Please look and recover the instance status" %
5999                          str(err))
6000
6001   def _AbortMigration(self):
6002     """Call the hypervisor code to abort a started migration.
6003
6004     """
6005     instance = self.instance
6006     target_node = self.target_node
6007     migration_info = self.migration_info
6008
6009     abort_result = self.rpc.call_finalize_migration(target_node,
6010                                                     instance,
6011                                                     migration_info,
6012                                                     False)
6013     abort_msg = abort_result.fail_msg
6014     if abort_msg:
6015       logging.error("Aborting migration failed on target node %s: %s",
6016                     target_node, abort_msg)
6017       # Don't raise an exception here, as we stil have to try to revert the
6018       # disk status, even if this step failed.
6019
6020   def _ExecMigration(self):
6021     """Migrate an instance.
6022
6023     The migrate is done by:
6024       - change the disks into dual-master mode
6025       - wait until disks are fully synchronized again
6026       - migrate the instance
6027       - change disks on the new secondary node (the old primary) to secondary
6028       - wait until disks are fully synchronized
6029       - change disks into single-master mode
6030
6031     """
6032     instance = self.instance
6033     target_node = self.target_node
6034     source_node = self.source_node
6035
6036     self.feedback_fn("* checking disk consistency between source and target")
6037     for dev in instance.disks:
6038       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6039         raise errors.OpExecError("Disk %s is degraded or not fully"
6040                                  " synchronized on target node,"
6041                                  " aborting migrate." % dev.iv_name)
6042
6043     # First get the migration information from the remote node
6044     result = self.rpc.call_migration_info(source_node, instance)
6045     msg = result.fail_msg
6046     if msg:
6047       log_err = ("Failed fetching source migration information from %s: %s" %
6048                  (source_node, msg))
6049       logging.error(log_err)
6050       raise errors.OpExecError(log_err)
6051
6052     self.migration_info = migration_info = result.payload
6053
6054     # Then switch the disks to master/master mode
6055     self._EnsureSecondary(target_node)
6056     self._GoStandalone()
6057     self._GoReconnect(True)
6058     self._WaitUntilSync()
6059
6060     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6061     result = self.rpc.call_accept_instance(target_node,
6062                                            instance,
6063                                            migration_info,
6064                                            self.nodes_ip[target_node])
6065
6066     msg = result.fail_msg
6067     if msg:
6068       logging.error("Instance pre-migration failed, trying to revert"
6069                     " disk status: %s", msg)
6070       self.feedback_fn("Pre-migration failed, aborting")
6071       self._AbortMigration()
6072       self._RevertDiskStatus()
6073       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6074                                (instance.name, msg))
6075
6076     self.feedback_fn("* migrating instance to %s" % target_node)
6077     time.sleep(10)
6078     result = self.rpc.call_instance_migrate(source_node, instance,
6079                                             self.nodes_ip[target_node],
6080                                             self.live)
6081     msg = result.fail_msg
6082     if msg:
6083       logging.error("Instance migration failed, trying to revert"
6084                     " disk status: %s", msg)
6085       self.feedback_fn("Migration failed, aborting")
6086       self._AbortMigration()
6087       self._RevertDiskStatus()
6088       raise errors.OpExecError("Could not migrate instance %s: %s" %
6089                                (instance.name, msg))
6090     time.sleep(10)
6091
6092     instance.primary_node = target_node
6093     # distribute new instance config to the other nodes
6094     self.cfg.Update(instance, self.feedback_fn)
6095
6096     result = self.rpc.call_finalize_migration(target_node,
6097                                               instance,
6098                                               migration_info,
6099                                               True)
6100     msg = result.fail_msg
6101     if msg:
6102       logging.error("Instance migration succeeded, but finalization failed:"
6103                     " %s", msg)
6104       raise errors.OpExecError("Could not finalize instance migration: %s" %
6105                                msg)
6106
6107     self._EnsureSecondary(source_node)
6108     self._WaitUntilSync()
6109     self._GoStandalone()
6110     self._GoReconnect(False)
6111     self._WaitUntilSync()
6112
6113     self.feedback_fn("* done")
6114
6115   def Exec(self, feedback_fn):
6116     """Perform the migration.
6117
6118     """
6119     feedback_fn("Migrating instance %s" % self.instance.name)
6120
6121     self.feedback_fn = feedback_fn
6122
6123     self.source_node = self.instance.primary_node
6124     self.target_node = self.instance.secondary_nodes[0]
6125     self.all_nodes = [self.source_node, self.target_node]
6126     self.nodes_ip = {
6127       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6128       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6129       }
6130
6131     if self.cleanup:
6132       return self._ExecCleanup()
6133     else:
6134       return self._ExecMigration()
6135
6136
6137 def _CreateBlockDev(lu, node, instance, device, force_create,
6138                     info, force_open):
6139   """Create a tree of block devices on a given node.
6140
6141   If this device type has to be created on secondaries, create it and
6142   all its children.
6143
6144   If not, just recurse to children keeping the same 'force' value.
6145
6146   @param lu: the lu on whose behalf we execute
6147   @param node: the node on which to create the device
6148   @type instance: L{objects.Instance}
6149   @param instance: the instance which owns the device
6150   @type device: L{objects.Disk}
6151   @param device: the device to create
6152   @type force_create: boolean
6153   @param force_create: whether to force creation of this device; this
6154       will be change to True whenever we find a device which has
6155       CreateOnSecondary() attribute
6156   @param info: the extra 'metadata' we should attach to the device
6157       (this will be represented as a LVM tag)
6158   @type force_open: boolean
6159   @param force_open: this parameter will be passes to the
6160       L{backend.BlockdevCreate} function where it specifies
6161       whether we run on primary or not, and it affects both
6162       the child assembly and the device own Open() execution
6163
6164   """
6165   if device.CreateOnSecondary():
6166     force_create = True
6167
6168   if device.children:
6169     for child in device.children:
6170       _CreateBlockDev(lu, node, instance, child, force_create,
6171                       info, force_open)
6172
6173   if not force_create:
6174     return
6175
6176   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6177
6178
6179 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6180   """Create a single block device on a given node.
6181
6182   This will not recurse over children of the device, so they must be
6183   created in advance.
6184
6185   @param lu: the lu on whose behalf we execute
6186   @param node: the node on which to create the device
6187   @type instance: L{objects.Instance}
6188   @param instance: the instance which owns the device
6189   @type device: L{objects.Disk}
6190   @param device: the device to create
6191   @param info: the extra 'metadata' we should attach to the device
6192       (this will be represented as a LVM tag)
6193   @type force_open: boolean
6194   @param force_open: this parameter will be passes to the
6195       L{backend.BlockdevCreate} function where it specifies
6196       whether we run on primary or not, and it affects both
6197       the child assembly and the device own Open() execution
6198
6199   """
6200   lu.cfg.SetDiskID(device, node)
6201   result = lu.rpc.call_blockdev_create(node, device, device.size,
6202                                        instance.name, force_open, info)
6203   result.Raise("Can't create block device %s on"
6204                " node %s for instance %s" % (device, node, instance.name))
6205   if device.physical_id is None:
6206     device.physical_id = result.payload
6207
6208
6209 def _GenerateUniqueNames(lu, exts):
6210   """Generate a suitable LV name.
6211
6212   This will generate a logical volume name for the given instance.
6213
6214   """
6215   results = []
6216   for val in exts:
6217     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6218     results.append("%s%s" % (new_id, val))
6219   return results
6220
6221
6222 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6223                          p_minor, s_minor):
6224   """Generate a drbd8 device complete with its children.
6225
6226   """
6227   port = lu.cfg.AllocatePort()
6228   vgname = lu.cfg.GetVGName()
6229   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6230   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6231                           logical_id=(vgname, names[0]))
6232   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6233                           logical_id=(vgname, names[1]))
6234   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6235                           logical_id=(primary, secondary, port,
6236                                       p_minor, s_minor,
6237                                       shared_secret),
6238                           children=[dev_data, dev_meta],
6239                           iv_name=iv_name)
6240   return drbd_dev
6241
6242
6243 def _GenerateDiskTemplate(lu, template_name,
6244                           instance_name, primary_node,
6245                           secondary_nodes, disk_info,
6246                           file_storage_dir, file_driver,
6247                           base_index):
6248   """Generate the entire disk layout for a given template type.
6249
6250   """
6251   #TODO: compute space requirements
6252
6253   vgname = lu.cfg.GetVGName()
6254   disk_count = len(disk_info)
6255   disks = []
6256   if template_name == constants.DT_DISKLESS:
6257     pass
6258   elif template_name == constants.DT_PLAIN:
6259     if len(secondary_nodes) != 0:
6260       raise errors.ProgrammerError("Wrong template configuration")
6261
6262     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6263                                       for i in range(disk_count)])
6264     for idx, disk in enumerate(disk_info):
6265       disk_index = idx + base_index
6266       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6267                               logical_id=(vgname, names[idx]),
6268                               iv_name="disk/%d" % disk_index,
6269                               mode=disk["mode"])
6270       disks.append(disk_dev)
6271   elif template_name == constants.DT_DRBD8:
6272     if len(secondary_nodes) != 1:
6273       raise errors.ProgrammerError("Wrong template configuration")
6274     remote_node = secondary_nodes[0]
6275     minors = lu.cfg.AllocateDRBDMinor(
6276       [primary_node, remote_node] * len(disk_info), instance_name)
6277
6278     names = []
6279     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6280                                                for i in range(disk_count)]):
6281       names.append(lv_prefix + "_data")
6282       names.append(lv_prefix + "_meta")
6283     for idx, disk in enumerate(disk_info):
6284       disk_index = idx + base_index
6285       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6286                                       disk["size"], names[idx*2:idx*2+2],
6287                                       "disk/%d" % disk_index,
6288                                       minors[idx*2], minors[idx*2+1])
6289       disk_dev.mode = disk["mode"]
6290       disks.append(disk_dev)
6291   elif template_name == constants.DT_FILE:
6292     if len(secondary_nodes) != 0:
6293       raise errors.ProgrammerError("Wrong template configuration")
6294
6295     _RequireFileStorage()
6296
6297     for idx, disk in enumerate(disk_info):
6298       disk_index = idx + base_index
6299       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6300                               iv_name="disk/%d" % disk_index,
6301                               logical_id=(file_driver,
6302                                           "%s/disk%d" % (file_storage_dir,
6303                                                          disk_index)),
6304                               mode=disk["mode"])
6305       disks.append(disk_dev)
6306   else:
6307     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6308   return disks
6309
6310
6311 def _GetInstanceInfoText(instance):
6312   """Compute that text that should be added to the disk's metadata.
6313
6314   """
6315   return "originstname+%s" % instance.name
6316
6317
6318 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6319   """Create all disks for an instance.
6320
6321   This abstracts away some work from AddInstance.
6322
6323   @type lu: L{LogicalUnit}
6324   @param lu: the logical unit on whose behalf we execute
6325   @type instance: L{objects.Instance}
6326   @param instance: the instance whose disks we should create
6327   @type to_skip: list
6328   @param to_skip: list of indices to skip
6329   @type target_node: string
6330   @param target_node: if passed, overrides the target node for creation
6331   @rtype: boolean
6332   @return: the success of the creation
6333
6334   """
6335   info = _GetInstanceInfoText(instance)
6336   if target_node is None:
6337     pnode = instance.primary_node
6338     all_nodes = instance.all_nodes
6339   else:
6340     pnode = target_node
6341     all_nodes = [pnode]
6342
6343   if instance.disk_template == constants.DT_FILE:
6344     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6345     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6346
6347     result.Raise("Failed to create directory '%s' on"
6348                  " node %s" % (file_storage_dir, pnode))
6349
6350   # Note: this needs to be kept in sync with adding of disks in
6351   # LUSetInstanceParams
6352   for idx, device in enumerate(instance.disks):
6353     if to_skip and idx in to_skip:
6354       continue
6355     logging.info("Creating volume %s for instance %s",
6356                  device.iv_name, instance.name)
6357     #HARDCODE
6358     for node in all_nodes:
6359       f_create = node == pnode
6360       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6361
6362
6363 def _RemoveDisks(lu, instance, target_node=None):
6364   """Remove all disks for an instance.
6365
6366   This abstracts away some work from `AddInstance()` and
6367   `RemoveInstance()`. Note that in case some of the devices couldn't
6368   be removed, the removal will continue with the other ones (compare
6369   with `_CreateDisks()`).
6370
6371   @type lu: L{LogicalUnit}
6372   @param lu: the logical unit on whose behalf we execute
6373   @type instance: L{objects.Instance}
6374   @param instance: the instance whose disks we should remove
6375   @type target_node: string
6376   @param target_node: used to override the node on which to remove the disks
6377   @rtype: boolean
6378   @return: the success of the removal
6379
6380   """
6381   logging.info("Removing block devices for instance %s", instance.name)
6382
6383   all_result = True
6384   for device in instance.disks:
6385     if target_node:
6386       edata = [(target_node, device)]
6387     else:
6388       edata = device.ComputeNodeTree(instance.primary_node)
6389     for node, disk in edata:
6390       lu.cfg.SetDiskID(disk, node)
6391       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6392       if msg:
6393         lu.LogWarning("Could not remove block device %s on node %s,"
6394                       " continuing anyway: %s", device.iv_name, node, msg)
6395         all_result = False
6396
6397   if instance.disk_template == constants.DT_FILE:
6398     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6399     if target_node:
6400       tgt = target_node
6401     else:
6402       tgt = instance.primary_node
6403     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6404     if result.fail_msg:
6405       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6406                     file_storage_dir, instance.primary_node, result.fail_msg)
6407       all_result = False
6408
6409   return all_result
6410
6411
6412 def _ComputeDiskSize(disk_template, disks):
6413   """Compute disk size requirements in the volume group
6414
6415   """
6416   # Required free disk space as a function of disk and swap space
6417   req_size_dict = {
6418     constants.DT_DISKLESS: None,
6419     constants.DT_PLAIN: sum(d["size"] for d in disks),
6420     # 128 MB are added for drbd metadata for each disk
6421     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6422     constants.DT_FILE: None,
6423   }
6424
6425   if disk_template not in req_size_dict:
6426     raise errors.ProgrammerError("Disk template '%s' size requirement"
6427                                  " is unknown" %  disk_template)
6428
6429   return req_size_dict[disk_template]
6430
6431
6432 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6433   """Hypervisor parameter validation.
6434
6435   This function abstract the hypervisor parameter validation to be
6436   used in both instance create and instance modify.
6437
6438   @type lu: L{LogicalUnit}
6439   @param lu: the logical unit for which we check
6440   @type nodenames: list
6441   @param nodenames: the list of nodes on which we should check
6442   @type hvname: string
6443   @param hvname: the name of the hypervisor we should use
6444   @type hvparams: dict
6445   @param hvparams: the parameters which we need to check
6446   @raise errors.OpPrereqError: if the parameters are not valid
6447
6448   """
6449   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6450                                                   hvname,
6451                                                   hvparams)
6452   for node in nodenames:
6453     info = hvinfo[node]
6454     if info.offline:
6455       continue
6456     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6457
6458
6459 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6460   """OS parameters validation.
6461
6462   @type lu: L{LogicalUnit}
6463   @param lu: the logical unit for which we check
6464   @type required: boolean
6465   @param required: whether the validation should fail if the OS is not
6466       found
6467   @type nodenames: list
6468   @param nodenames: the list of nodes on which we should check
6469   @type osname: string
6470   @param osname: the name of the hypervisor we should use
6471   @type osparams: dict
6472   @param osparams: the parameters which we need to check
6473   @raise errors.OpPrereqError: if the parameters are not valid
6474
6475   """
6476   result = lu.rpc.call_os_validate(required, nodenames, osname,
6477                                    [constants.OS_VALIDATE_PARAMETERS],
6478                                    osparams)
6479   for node, nres in result.items():
6480     # we don't check for offline cases since this should be run only
6481     # against the master node and/or an instance's nodes
6482     nres.Raise("OS Parameters validation failed on node %s" % node)
6483     if not nres.payload:
6484       lu.LogInfo("OS %s not found on node %s, validation skipped",
6485                  osname, node)
6486
6487
6488 class LUCreateInstance(LogicalUnit):
6489   """Create an instance.
6490
6491   """
6492   HPATH = "instance-add"
6493   HTYPE = constants.HTYPE_INSTANCE
6494   _OP_PARAMS = [
6495     _PInstanceName,
6496     ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6497     ("start", True, _TBool),
6498     ("wait_for_sync", True, _TBool),
6499     ("ip_check", True, _TBool),
6500     ("name_check", True, _TBool),
6501     ("disks", _NoDefault, _TListOf(_TDict)),
6502     ("nics", _NoDefault, _TListOf(_TDict)),
6503     ("hvparams", _EmptyDict, _TDict),
6504     ("beparams", _EmptyDict, _TDict),
6505     ("osparams", _EmptyDict, _TDict),
6506     ("no_install", None, _TMaybeBool),
6507     ("os_type", None, _TMaybeString),
6508     ("force_variant", False, _TBool),
6509     ("source_handshake", None, _TOr(_TList, _TNone)),
6510     ("source_x509_ca", None, _TMaybeString),
6511     ("source_instance_name", None, _TMaybeString),
6512     ("src_node", None, _TMaybeString),
6513     ("src_path", None, _TMaybeString),
6514     ("pnode", None, _TMaybeString),
6515     ("snode", None, _TMaybeString),
6516     ("iallocator", None, _TMaybeString),
6517     ("hypervisor", None, _TMaybeString),
6518     ("disk_template", _NoDefault, _CheckDiskTemplate),
6519     ("identify_defaults", False, _TBool),
6520     ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6521     ("file_storage_dir", None, _TMaybeString),
6522     ("dry_run", False, _TBool),
6523     ]
6524   REQ_BGL = False
6525
6526   def CheckArguments(self):
6527     """Check arguments.
6528
6529     """
6530     # do not require name_check to ease forward/backward compatibility
6531     # for tools
6532     if self.op.no_install and self.op.start:
6533       self.LogInfo("No-installation mode selected, disabling startup")
6534       self.op.start = False
6535     # validate/normalize the instance name
6536     self.op.instance_name = \
6537       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6538
6539     if self.op.ip_check and not self.op.name_check:
6540       # TODO: make the ip check more flexible and not depend on the name check
6541       raise errors.OpPrereqError("Cannot do ip check without a name check",
6542                                  errors.ECODE_INVAL)
6543
6544     # check nics' parameter names
6545     for nic in self.op.nics:
6546       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6547
6548     # check disks. parameter names and consistent adopt/no-adopt strategy
6549     has_adopt = has_no_adopt = False
6550     for disk in self.op.disks:
6551       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6552       if "adopt" in disk:
6553         has_adopt = True
6554       else:
6555         has_no_adopt = True
6556     if has_adopt and has_no_adopt:
6557       raise errors.OpPrereqError("Either all disks are adopted or none is",
6558                                  errors.ECODE_INVAL)
6559     if has_adopt:
6560       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6561         raise errors.OpPrereqError("Disk adoption is not supported for the"
6562                                    " '%s' disk template" %
6563                                    self.op.disk_template,
6564                                    errors.ECODE_INVAL)
6565       if self.op.iallocator is not None:
6566         raise errors.OpPrereqError("Disk adoption not allowed with an"
6567                                    " iallocator script", errors.ECODE_INVAL)
6568       if self.op.mode == constants.INSTANCE_IMPORT:
6569         raise errors.OpPrereqError("Disk adoption not allowed for"
6570                                    " instance import", errors.ECODE_INVAL)
6571
6572     self.adopt_disks = has_adopt
6573
6574     # instance name verification
6575     if self.op.name_check:
6576       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6577       self.op.instance_name = self.hostname1.name
6578       # used in CheckPrereq for ip ping check
6579       self.check_ip = self.hostname1.ip
6580     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6581       raise errors.OpPrereqError("Remote imports require names to be checked" %
6582                                  errors.ECODE_INVAL)
6583     else:
6584       self.check_ip = None
6585
6586     # file storage checks
6587     if (self.op.file_driver and
6588         not self.op.file_driver in constants.FILE_DRIVER):
6589       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6590                                  self.op.file_driver, errors.ECODE_INVAL)
6591
6592     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6593       raise errors.OpPrereqError("File storage directory path not absolute",
6594                                  errors.ECODE_INVAL)
6595
6596     ### Node/iallocator related checks
6597     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6598
6599     self._cds = _GetClusterDomainSecret()
6600
6601     if self.op.mode == constants.INSTANCE_IMPORT:
6602       # On import force_variant must be True, because if we forced it at
6603       # initial install, our only chance when importing it back is that it
6604       # works again!
6605       self.op.force_variant = True
6606
6607       if self.op.no_install:
6608         self.LogInfo("No-installation mode has no effect during import")
6609
6610     elif self.op.mode == constants.INSTANCE_CREATE:
6611       if self.op.os_type is None:
6612         raise errors.OpPrereqError("No guest OS specified",
6613                                    errors.ECODE_INVAL)
6614       if self.op.disk_template is None:
6615         raise errors.OpPrereqError("No disk template specified",
6616                                    errors.ECODE_INVAL)
6617
6618     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6619       # Check handshake to ensure both clusters have the same domain secret
6620       src_handshake = self.op.source_handshake
6621       if not src_handshake:
6622         raise errors.OpPrereqError("Missing source handshake",
6623                                    errors.ECODE_INVAL)
6624
6625       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6626                                                            src_handshake)
6627       if errmsg:
6628         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6629                                    errors.ECODE_INVAL)
6630
6631       # Load and check source CA
6632       self.source_x509_ca_pem = self.op.source_x509_ca
6633       if not self.source_x509_ca_pem:
6634         raise errors.OpPrereqError("Missing source X509 CA",
6635                                    errors.ECODE_INVAL)
6636
6637       try:
6638         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6639                                                     self._cds)
6640       except OpenSSL.crypto.Error, err:
6641         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6642                                    (err, ), errors.ECODE_INVAL)
6643
6644       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6645       if errcode is not None:
6646         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6647                                    errors.ECODE_INVAL)
6648
6649       self.source_x509_ca = cert
6650
6651       src_instance_name = self.op.source_instance_name
6652       if not src_instance_name:
6653         raise errors.OpPrereqError("Missing source instance name",
6654                                    errors.ECODE_INVAL)
6655
6656       self.source_instance_name = \
6657           netutils.GetHostname(name=src_instance_name).name
6658
6659     else:
6660       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6661                                  self.op.mode, errors.ECODE_INVAL)
6662
6663   def ExpandNames(self):
6664     """ExpandNames for CreateInstance.
6665
6666     Figure out the right locks for instance creation.
6667
6668     """
6669     self.needed_locks = {}
6670
6671     instance_name = self.op.instance_name
6672     # this is just a preventive check, but someone might still add this
6673     # instance in the meantime, and creation will fail at lock-add time
6674     if instance_name in self.cfg.GetInstanceList():
6675       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6676                                  instance_name, errors.ECODE_EXISTS)
6677
6678     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6679
6680     if self.op.iallocator:
6681       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6682     else:
6683       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6684       nodelist = [self.op.pnode]
6685       if self.op.snode is not None:
6686         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6687         nodelist.append(self.op.snode)
6688       self.needed_locks[locking.LEVEL_NODE] = nodelist
6689
6690     # in case of import lock the source node too
6691     if self.op.mode == constants.INSTANCE_IMPORT:
6692       src_node = self.op.src_node
6693       src_path = self.op.src_path
6694
6695       if src_path is None:
6696         self.op.src_path = src_path = self.op.instance_name
6697
6698       if src_node is None:
6699         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6700         self.op.src_node = None
6701         if os.path.isabs(src_path):
6702           raise errors.OpPrereqError("Importing an instance from an absolute"
6703                                      " path requires a source node option.",
6704                                      errors.ECODE_INVAL)
6705       else:
6706         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6707         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6708           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6709         if not os.path.isabs(src_path):
6710           self.op.src_path = src_path = \
6711             utils.PathJoin(constants.EXPORT_DIR, src_path)
6712
6713   def _RunAllocator(self):
6714     """Run the allocator based on input opcode.
6715
6716     """
6717     nics = [n.ToDict() for n in self.nics]
6718     ial = IAllocator(self.cfg, self.rpc,
6719                      mode=constants.IALLOCATOR_MODE_ALLOC,
6720                      name=self.op.instance_name,
6721                      disk_template=self.op.disk_template,
6722                      tags=[],
6723                      os=self.op.os_type,
6724                      vcpus=self.be_full[constants.BE_VCPUS],
6725                      mem_size=self.be_full[constants.BE_MEMORY],
6726                      disks=self.disks,
6727                      nics=nics,
6728                      hypervisor=self.op.hypervisor,
6729                      )
6730
6731     ial.Run(self.op.iallocator)
6732
6733     if not ial.success:
6734       raise errors.OpPrereqError("Can't compute nodes using"
6735                                  " iallocator '%s': %s" %
6736                                  (self.op.iallocator, ial.info),
6737                                  errors.ECODE_NORES)
6738     if len(ial.result) != ial.required_nodes:
6739       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6740                                  " of nodes (%s), required %s" %
6741                                  (self.op.iallocator, len(ial.result),
6742                                   ial.required_nodes), errors.ECODE_FAULT)
6743     self.op.pnode = ial.result[0]
6744     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6745                  self.op.instance_name, self.op.iallocator,
6746                  utils.CommaJoin(ial.result))
6747     if ial.required_nodes == 2:
6748       self.op.snode = ial.result[1]
6749
6750   def BuildHooksEnv(self):
6751     """Build hooks env.
6752
6753     This runs on master, primary and secondary nodes of the instance.
6754
6755     """
6756     env = {
6757       "ADD_MODE": self.op.mode,
6758       }
6759     if self.op.mode == constants.INSTANCE_IMPORT:
6760       env["SRC_NODE"] = self.op.src_node
6761       env["SRC_PATH"] = self.op.src_path
6762       env["SRC_IMAGES"] = self.src_images
6763
6764     env.update(_BuildInstanceHookEnv(
6765       name=self.op.instance_name,
6766       primary_node=self.op.pnode,
6767       secondary_nodes=self.secondaries,
6768       status=self.op.start,
6769       os_type=self.op.os_type,
6770       memory=self.be_full[constants.BE_MEMORY],
6771       vcpus=self.be_full[constants.BE_VCPUS],
6772       nics=_NICListToTuple(self, self.nics),
6773       disk_template=self.op.disk_template,
6774       disks=[(d["size"], d["mode"]) for d in self.disks],
6775       bep=self.be_full,
6776       hvp=self.hv_full,
6777       hypervisor_name=self.op.hypervisor,
6778     ))
6779
6780     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6781           self.secondaries)
6782     return env, nl, nl
6783
6784   def _ReadExportInfo(self):
6785     """Reads the export information from disk.
6786
6787     It will override the opcode source node and path with the actual
6788     information, if these two were not specified before.
6789
6790     @return: the export information
6791
6792     """
6793     assert self.op.mode == constants.INSTANCE_IMPORT
6794
6795     src_node = self.op.src_node
6796     src_path = self.op.src_path
6797
6798     if src_node is None:
6799       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6800       exp_list = self.rpc.call_export_list(locked_nodes)
6801       found = False
6802       for node in exp_list:
6803         if exp_list[node].fail_msg:
6804           continue
6805         if src_path in exp_list[node].payload:
6806           found = True
6807           self.op.src_node = src_node = node
6808           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6809                                                        src_path)
6810           break
6811       if not found:
6812         raise errors.OpPrereqError("No export found for relative path %s" %
6813                                     src_path, errors.ECODE_INVAL)
6814
6815     _CheckNodeOnline(self, src_node)
6816     result = self.rpc.call_export_info(src_node, src_path)
6817     result.Raise("No export or invalid export found in dir %s" % src_path)
6818
6819     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6820     if not export_info.has_section(constants.INISECT_EXP):
6821       raise errors.ProgrammerError("Corrupted export config",
6822                                    errors.ECODE_ENVIRON)
6823
6824     ei_version = export_info.get(constants.INISECT_EXP, "version")
6825     if (int(ei_version) != constants.EXPORT_VERSION):
6826       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6827                                  (ei_version, constants.EXPORT_VERSION),
6828                                  errors.ECODE_ENVIRON)
6829     return export_info
6830
6831   def _ReadExportParams(self, einfo):
6832     """Use export parameters as defaults.
6833
6834     In case the opcode doesn't specify (as in override) some instance
6835     parameters, then try to use them from the export information, if
6836     that declares them.
6837
6838     """
6839     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6840
6841     if self.op.disk_template is None:
6842       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6843         self.op.disk_template = einfo.get(constants.INISECT_INS,
6844                                           "disk_template")
6845       else:
6846         raise errors.OpPrereqError("No disk template specified and the export"
6847                                    " is missing the disk_template information",
6848                                    errors.ECODE_INVAL)
6849
6850     if not self.op.disks:
6851       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6852         disks = []
6853         # TODO: import the disk iv_name too
6854         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6855           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6856           disks.append({"size": disk_sz})
6857         self.op.disks = disks
6858       else:
6859         raise errors.OpPrereqError("No disk info specified and the export"
6860                                    " is missing the disk information",
6861                                    errors.ECODE_INVAL)
6862
6863     if (not self.op.nics and
6864         einfo.has_option(constants.INISECT_INS, "nic_count")):
6865       nics = []
6866       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6867         ndict = {}
6868         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6869           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6870           ndict[name] = v
6871         nics.append(ndict)
6872       self.op.nics = nics
6873
6874     if (self.op.hypervisor is None and
6875         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6876       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6877     if einfo.has_section(constants.INISECT_HYP):
6878       # use the export parameters but do not override the ones
6879       # specified by the user
6880       for name, value in einfo.items(constants.INISECT_HYP):
6881         if name not in self.op.hvparams:
6882           self.op.hvparams[name] = value
6883
6884     if einfo.has_section(constants.INISECT_BEP):
6885       # use the parameters, without overriding
6886       for name, value in einfo.items(constants.INISECT_BEP):
6887         if name not in self.op.beparams:
6888           self.op.beparams[name] = value
6889     else:
6890       # try to read the parameters old style, from the main section
6891       for name in constants.BES_PARAMETERS:
6892         if (name not in self.op.beparams and
6893             einfo.has_option(constants.INISECT_INS, name)):
6894           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6895
6896     if einfo.has_section(constants.INISECT_OSP):
6897       # use the parameters, without overriding
6898       for name, value in einfo.items(constants.INISECT_OSP):
6899         if name not in self.op.osparams:
6900           self.op.osparams[name] = value
6901
6902   def _RevertToDefaults(self, cluster):
6903     """Revert the instance parameters to the default values.
6904
6905     """
6906     # hvparams
6907     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6908     for name in self.op.hvparams.keys():
6909       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6910         del self.op.hvparams[name]
6911     # beparams
6912     be_defs = cluster.SimpleFillBE({})
6913     for name in self.op.beparams.keys():
6914       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6915         del self.op.beparams[name]
6916     # nic params
6917     nic_defs = cluster.SimpleFillNIC({})
6918     for nic in self.op.nics:
6919       for name in constants.NICS_PARAMETERS:
6920         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6921           del nic[name]
6922     # osparams
6923     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6924     for name in self.op.osparams.keys():
6925       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6926         del self.op.osparams[name]
6927
6928   def CheckPrereq(self):
6929     """Check prerequisites.
6930
6931     """
6932     if self.op.mode == constants.INSTANCE_IMPORT:
6933       export_info = self._ReadExportInfo()
6934       self._ReadExportParams(export_info)
6935
6936     _CheckDiskTemplate(self.op.disk_template)
6937
6938     if (not self.cfg.GetVGName() and
6939         self.op.disk_template not in constants.DTS_NOT_LVM):
6940       raise errors.OpPrereqError("Cluster does not support lvm-based"
6941                                  " instances", errors.ECODE_STATE)
6942
6943     if self.op.hypervisor is None:
6944       self.op.hypervisor = self.cfg.GetHypervisorType()
6945
6946     cluster = self.cfg.GetClusterInfo()
6947     enabled_hvs = cluster.enabled_hypervisors
6948     if self.op.hypervisor not in enabled_hvs:
6949       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6950                                  " cluster (%s)" % (self.op.hypervisor,
6951                                   ",".join(enabled_hvs)),
6952                                  errors.ECODE_STATE)
6953
6954     # check hypervisor parameter syntax (locally)
6955     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6956     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6957                                       self.op.hvparams)
6958     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6959     hv_type.CheckParameterSyntax(filled_hvp)
6960     self.hv_full = filled_hvp
6961     # check that we don't specify global parameters on an instance
6962     _CheckGlobalHvParams(self.op.hvparams)
6963
6964     # fill and remember the beparams dict
6965     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6966     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6967
6968     # build os parameters
6969     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6970
6971     # now that hvp/bep are in final format, let's reset to defaults,
6972     # if told to do so
6973     if self.op.identify_defaults:
6974       self._RevertToDefaults(cluster)
6975
6976     # NIC buildup
6977     self.nics = []
6978     for idx, nic in enumerate(self.op.nics):
6979       nic_mode_req = nic.get("mode", None)
6980       nic_mode = nic_mode_req
6981       if nic_mode is None:
6982         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6983
6984       # in routed mode, for the first nic, the default ip is 'auto'
6985       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6986         default_ip_mode = constants.VALUE_AUTO
6987       else:
6988         default_ip_mode = constants.VALUE_NONE
6989
6990       # ip validity checks
6991       ip = nic.get("ip", default_ip_mode)
6992       if ip is None or ip.lower() == constants.VALUE_NONE:
6993         nic_ip = None
6994       elif ip.lower() == constants.VALUE_AUTO:
6995         if not self.op.name_check:
6996           raise errors.OpPrereqError("IP address set to auto but name checks"
6997                                      " have been skipped. Aborting.",
6998                                      errors.ECODE_INVAL)
6999         nic_ip = self.hostname1.ip
7000       else:
7001         if not netutils.IP4Address.IsValid(ip):
7002           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7003                                      " like a valid IP" % ip,
7004                                      errors.ECODE_INVAL)
7005         nic_ip = ip
7006
7007       # TODO: check the ip address for uniqueness
7008       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7009         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7010                                    errors.ECODE_INVAL)
7011
7012       # MAC address verification
7013       mac = nic.get("mac", constants.VALUE_AUTO)
7014       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7015         mac = utils.NormalizeAndValidateMac(mac)
7016
7017         try:
7018           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7019         except errors.ReservationError:
7020           raise errors.OpPrereqError("MAC address %s already in use"
7021                                      " in cluster" % mac,
7022                                      errors.ECODE_NOTUNIQUE)
7023
7024       # bridge verification
7025       bridge = nic.get("bridge", None)
7026       link = nic.get("link", None)
7027       if bridge and link:
7028         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7029                                    " at the same time", errors.ECODE_INVAL)
7030       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7031         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7032                                    errors.ECODE_INVAL)
7033       elif bridge:
7034         link = bridge
7035
7036       nicparams = {}
7037       if nic_mode_req:
7038         nicparams[constants.NIC_MODE] = nic_mode_req
7039       if link:
7040         nicparams[constants.NIC_LINK] = link
7041
7042       check_params = cluster.SimpleFillNIC(nicparams)
7043       objects.NIC.CheckParameterSyntax(check_params)
7044       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7045
7046     # disk checks/pre-build
7047     self.disks = []
7048     for disk in self.op.disks:
7049       mode = disk.get("mode", constants.DISK_RDWR)
7050       if mode not in constants.DISK_ACCESS_SET:
7051         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7052                                    mode, errors.ECODE_INVAL)
7053       size = disk.get("size", None)
7054       if size is None:
7055         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7056       try:
7057         size = int(size)
7058       except (TypeError, ValueError):
7059         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7060                                    errors.ECODE_INVAL)
7061       new_disk = {"size": size, "mode": mode}
7062       if "adopt" in disk:
7063         new_disk["adopt"] = disk["adopt"]
7064       self.disks.append(new_disk)
7065
7066     if self.op.mode == constants.INSTANCE_IMPORT:
7067
7068       # Check that the new instance doesn't have less disks than the export
7069       instance_disks = len(self.disks)
7070       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7071       if instance_disks < export_disks:
7072         raise errors.OpPrereqError("Not enough disks to import."
7073                                    " (instance: %d, export: %d)" %
7074                                    (instance_disks, export_disks),
7075                                    errors.ECODE_INVAL)
7076
7077       disk_images = []
7078       for idx in range(export_disks):
7079         option = 'disk%d_dump' % idx
7080         if export_info.has_option(constants.INISECT_INS, option):
7081           # FIXME: are the old os-es, disk sizes, etc. useful?
7082           export_name = export_info.get(constants.INISECT_INS, option)
7083           image = utils.PathJoin(self.op.src_path, export_name)
7084           disk_images.append(image)
7085         else:
7086           disk_images.append(False)
7087
7088       self.src_images = disk_images
7089
7090       old_name = export_info.get(constants.INISECT_INS, 'name')
7091       try:
7092         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7093       except (TypeError, ValueError), err:
7094         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7095                                    " an integer: %s" % str(err),
7096                                    errors.ECODE_STATE)
7097       if self.op.instance_name == old_name:
7098         for idx, nic in enumerate(self.nics):
7099           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7100             nic_mac_ini = 'nic%d_mac' % idx
7101             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7102
7103     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7104
7105     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7106     if self.op.ip_check:
7107       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7108         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7109                                    (self.check_ip, self.op.instance_name),
7110                                    errors.ECODE_NOTUNIQUE)
7111
7112     #### mac address generation
7113     # By generating here the mac address both the allocator and the hooks get
7114     # the real final mac address rather than the 'auto' or 'generate' value.
7115     # There is a race condition between the generation and the instance object
7116     # creation, which means that we know the mac is valid now, but we're not
7117     # sure it will be when we actually add the instance. If things go bad
7118     # adding the instance will abort because of a duplicate mac, and the
7119     # creation job will fail.
7120     for nic in self.nics:
7121       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7122         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7123
7124     #### allocator run
7125
7126     if self.op.iallocator is not None:
7127       self._RunAllocator()
7128
7129     #### node related checks
7130
7131     # check primary node
7132     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7133     assert self.pnode is not None, \
7134       "Cannot retrieve locked node %s" % self.op.pnode
7135     if pnode.offline:
7136       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7137                                  pnode.name, errors.ECODE_STATE)
7138     if pnode.drained:
7139       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7140                                  pnode.name, errors.ECODE_STATE)
7141
7142     self.secondaries = []
7143
7144     # mirror node verification
7145     if self.op.disk_template in constants.DTS_NET_MIRROR:
7146       if self.op.snode is None:
7147         raise errors.OpPrereqError("The networked disk templates need"
7148                                    " a mirror node", errors.ECODE_INVAL)
7149       if self.op.snode == pnode.name:
7150         raise errors.OpPrereqError("The secondary node cannot be the"
7151                                    " primary node.", errors.ECODE_INVAL)
7152       _CheckNodeOnline(self, self.op.snode)
7153       _CheckNodeNotDrained(self, self.op.snode)
7154       self.secondaries.append(self.op.snode)
7155
7156     nodenames = [pnode.name] + self.secondaries
7157
7158     req_size = _ComputeDiskSize(self.op.disk_template,
7159                                 self.disks)
7160
7161     # Check lv size requirements, if not adopting
7162     if req_size is not None and not self.adopt_disks:
7163       _CheckNodesFreeDisk(self, nodenames, req_size)
7164
7165     if self.adopt_disks: # instead, we must check the adoption data
7166       all_lvs = set([i["adopt"] for i in self.disks])
7167       if len(all_lvs) != len(self.disks):
7168         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7169                                    errors.ECODE_INVAL)
7170       for lv_name in all_lvs:
7171         try:
7172           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7173         except errors.ReservationError:
7174           raise errors.OpPrereqError("LV named %s used by another instance" %
7175                                      lv_name, errors.ECODE_NOTUNIQUE)
7176
7177       node_lvs = self.rpc.call_lv_list([pnode.name],
7178                                        self.cfg.GetVGName())[pnode.name]
7179       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7180       node_lvs = node_lvs.payload
7181       delta = all_lvs.difference(node_lvs.keys())
7182       if delta:
7183         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7184                                    utils.CommaJoin(delta),
7185                                    errors.ECODE_INVAL)
7186       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7187       if online_lvs:
7188         raise errors.OpPrereqError("Online logical volumes found, cannot"
7189                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7190                                    errors.ECODE_STATE)
7191       # update the size of disk based on what is found
7192       for dsk in self.disks:
7193         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7194
7195     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7196
7197     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7198     # check OS parameters (remotely)
7199     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7200
7201     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7202
7203     # memory check on primary node
7204     if self.op.start:
7205       _CheckNodeFreeMemory(self, self.pnode.name,
7206                            "creating instance %s" % self.op.instance_name,
7207                            self.be_full[constants.BE_MEMORY],
7208                            self.op.hypervisor)
7209
7210     self.dry_run_result = list(nodenames)
7211
7212   def Exec(self, feedback_fn):
7213     """Create and add the instance to the cluster.
7214
7215     """
7216     instance = self.op.instance_name
7217     pnode_name = self.pnode.name
7218
7219     ht_kind = self.op.hypervisor
7220     if ht_kind in constants.HTS_REQ_PORT:
7221       network_port = self.cfg.AllocatePort()
7222     else:
7223       network_port = None
7224
7225     if constants.ENABLE_FILE_STORAGE:
7226       # this is needed because os.path.join does not accept None arguments
7227       if self.op.file_storage_dir is None:
7228         string_file_storage_dir = ""
7229       else:
7230         string_file_storage_dir = self.op.file_storage_dir
7231
7232       # build the full file storage dir path
7233       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7234                                         string_file_storage_dir, instance)
7235     else:
7236       file_storage_dir = ""
7237
7238     disks = _GenerateDiskTemplate(self,
7239                                   self.op.disk_template,
7240                                   instance, pnode_name,
7241                                   self.secondaries,
7242                                   self.disks,
7243                                   file_storage_dir,
7244                                   self.op.file_driver,
7245                                   0)
7246
7247     iobj = objects.Instance(name=instance, os=self.op.os_type,
7248                             primary_node=pnode_name,
7249                             nics=self.nics, disks=disks,
7250                             disk_template=self.op.disk_template,
7251                             admin_up=False,
7252                             network_port=network_port,
7253                             beparams=self.op.beparams,
7254                             hvparams=self.op.hvparams,
7255                             hypervisor=self.op.hypervisor,
7256                             osparams=self.op.osparams,
7257                             )
7258
7259     if self.adopt_disks:
7260       # rename LVs to the newly-generated names; we need to construct
7261       # 'fake' LV disks with the old data, plus the new unique_id
7262       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7263       rename_to = []
7264       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7265         rename_to.append(t_dsk.logical_id)
7266         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7267         self.cfg.SetDiskID(t_dsk, pnode_name)
7268       result = self.rpc.call_blockdev_rename(pnode_name,
7269                                              zip(tmp_disks, rename_to))
7270       result.Raise("Failed to rename adoped LVs")
7271     else:
7272       feedback_fn("* creating instance disks...")
7273       try:
7274         _CreateDisks(self, iobj)
7275       except errors.OpExecError:
7276         self.LogWarning("Device creation failed, reverting...")
7277         try:
7278           _RemoveDisks(self, iobj)
7279         finally:
7280           self.cfg.ReleaseDRBDMinors(instance)
7281           raise
7282
7283     feedback_fn("adding instance %s to cluster config" % instance)
7284
7285     self.cfg.AddInstance(iobj, self.proc.GetECId())
7286
7287     # Declare that we don't want to remove the instance lock anymore, as we've
7288     # added the instance to the config
7289     del self.remove_locks[locking.LEVEL_INSTANCE]
7290     # Unlock all the nodes
7291     if self.op.mode == constants.INSTANCE_IMPORT:
7292       nodes_keep = [self.op.src_node]
7293       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7294                        if node != self.op.src_node]
7295       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7296       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7297     else:
7298       self.context.glm.release(locking.LEVEL_NODE)
7299       del self.acquired_locks[locking.LEVEL_NODE]
7300
7301     if self.op.wait_for_sync:
7302       disk_abort = not _WaitForSync(self, iobj)
7303     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7304       # make sure the disks are not degraded (still sync-ing is ok)
7305       time.sleep(15)
7306       feedback_fn("* checking mirrors status")
7307       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7308     else:
7309       disk_abort = False
7310
7311     if disk_abort:
7312       _RemoveDisks(self, iobj)
7313       self.cfg.RemoveInstance(iobj.name)
7314       # Make sure the instance lock gets removed
7315       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7316       raise errors.OpExecError("There are some degraded disks for"
7317                                " this instance")
7318
7319     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7320       if self.op.mode == constants.INSTANCE_CREATE:
7321         if not self.op.no_install:
7322           feedback_fn("* running the instance OS create scripts...")
7323           # FIXME: pass debug option from opcode to backend
7324           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7325                                                  self.op.debug_level)
7326           result.Raise("Could not add os for instance %s"
7327                        " on node %s" % (instance, pnode_name))
7328
7329       elif self.op.mode == constants.INSTANCE_IMPORT:
7330         feedback_fn("* running the instance OS import scripts...")
7331
7332         transfers = []
7333
7334         for idx, image in enumerate(self.src_images):
7335           if not image:
7336             continue
7337
7338           # FIXME: pass debug option from opcode to backend
7339           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7340                                              constants.IEIO_FILE, (image, ),
7341                                              constants.IEIO_SCRIPT,
7342                                              (iobj.disks[idx], idx),
7343                                              None)
7344           transfers.append(dt)
7345
7346         import_result = \
7347           masterd.instance.TransferInstanceData(self, feedback_fn,
7348                                                 self.op.src_node, pnode_name,
7349                                                 self.pnode.secondary_ip,
7350                                                 iobj, transfers)
7351         if not compat.all(import_result):
7352           self.LogWarning("Some disks for instance %s on node %s were not"
7353                           " imported successfully" % (instance, pnode_name))
7354
7355       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7356         feedback_fn("* preparing remote import...")
7357         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7358         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7359
7360         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7361                                                      self.source_x509_ca,
7362                                                      self._cds, timeouts)
7363         if not compat.all(disk_results):
7364           # TODO: Should the instance still be started, even if some disks
7365           # failed to import (valid for local imports, too)?
7366           self.LogWarning("Some disks for instance %s on node %s were not"
7367                           " imported successfully" % (instance, pnode_name))
7368
7369         # Run rename script on newly imported instance
7370         assert iobj.name == instance
7371         feedback_fn("Running rename script for %s" % instance)
7372         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7373                                                    self.source_instance_name,
7374                                                    self.op.debug_level)
7375         if result.fail_msg:
7376           self.LogWarning("Failed to run rename script for %s on node"
7377                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7378
7379       else:
7380         # also checked in the prereq part
7381         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7382                                      % self.op.mode)
7383
7384     if self.op.start:
7385       iobj.admin_up = True
7386       self.cfg.Update(iobj, feedback_fn)
7387       logging.info("Starting instance %s on node %s", instance, pnode_name)
7388       feedback_fn("* starting instance...")
7389       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7390       result.Raise("Could not start instance")
7391
7392     return list(iobj.all_nodes)
7393
7394
7395 class LUConnectConsole(NoHooksLU):
7396   """Connect to an instance's console.
7397
7398   This is somewhat special in that it returns the command line that
7399   you need to run on the master node in order to connect to the
7400   console.
7401
7402   """
7403   _OP_PARAMS = [
7404     _PInstanceName
7405     ]
7406   REQ_BGL = False
7407
7408   def ExpandNames(self):
7409     self._ExpandAndLockInstance()
7410
7411   def CheckPrereq(self):
7412     """Check prerequisites.
7413
7414     This checks that the instance is in the cluster.
7415
7416     """
7417     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7418     assert self.instance is not None, \
7419       "Cannot retrieve locked instance %s" % self.op.instance_name
7420     _CheckNodeOnline(self, self.instance.primary_node)
7421
7422   def Exec(self, feedback_fn):
7423     """Connect to the console of an instance
7424
7425     """
7426     instance = self.instance
7427     node = instance.primary_node
7428
7429     node_insts = self.rpc.call_instance_list([node],
7430                                              [instance.hypervisor])[node]
7431     node_insts.Raise("Can't get node information from %s" % node)
7432
7433     if instance.name not in node_insts.payload:
7434       raise errors.OpExecError("Instance %s is not running." % instance.name)
7435
7436     logging.debug("Connecting to console of %s on %s", instance.name, node)
7437
7438     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7439     cluster = self.cfg.GetClusterInfo()
7440     # beparams and hvparams are passed separately, to avoid editing the
7441     # instance and then saving the defaults in the instance itself.
7442     hvparams = cluster.FillHV(instance)
7443     beparams = cluster.FillBE(instance)
7444     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7445
7446     # build ssh cmdline
7447     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7448
7449
7450 class LUReplaceDisks(LogicalUnit):
7451   """Replace the disks of an instance.
7452
7453   """
7454   HPATH = "mirrors-replace"
7455   HTYPE = constants.HTYPE_INSTANCE
7456   _OP_PARAMS = [
7457     _PInstanceName,
7458     ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7459     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7460     ("remote_node", None, _TMaybeString),
7461     ("iallocator", None, _TMaybeString),
7462     ("early_release", False, _TBool),
7463     ]
7464   REQ_BGL = False
7465
7466   def CheckArguments(self):
7467     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7468                                   self.op.iallocator)
7469
7470   def ExpandNames(self):
7471     self._ExpandAndLockInstance()
7472
7473     if self.op.iallocator is not None:
7474       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7475
7476     elif self.op.remote_node is not None:
7477       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7478       self.op.remote_node = remote_node
7479
7480       # Warning: do not remove the locking of the new secondary here
7481       # unless DRBD8.AddChildren is changed to work in parallel;
7482       # currently it doesn't since parallel invocations of
7483       # FindUnusedMinor will conflict
7484       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7485       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7486
7487     else:
7488       self.needed_locks[locking.LEVEL_NODE] = []
7489       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7490
7491     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7492                                    self.op.iallocator, self.op.remote_node,
7493                                    self.op.disks, False, self.op.early_release)
7494
7495     self.tasklets = [self.replacer]
7496
7497   def DeclareLocks(self, level):
7498     # If we're not already locking all nodes in the set we have to declare the
7499     # instance's primary/secondary nodes.
7500     if (level == locking.LEVEL_NODE and
7501         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7502       self._LockInstancesNodes()
7503
7504   def BuildHooksEnv(self):
7505     """Build hooks env.
7506
7507     This runs on the master, the primary and all the secondaries.
7508
7509     """
7510     instance = self.replacer.instance
7511     env = {
7512       "MODE": self.op.mode,
7513       "NEW_SECONDARY": self.op.remote_node,
7514       "OLD_SECONDARY": instance.secondary_nodes[0],
7515       }
7516     env.update(_BuildInstanceHookEnvByObject(self, instance))
7517     nl = [
7518       self.cfg.GetMasterNode(),
7519       instance.primary_node,
7520       ]
7521     if self.op.remote_node is not None:
7522       nl.append(self.op.remote_node)
7523     return env, nl, nl
7524
7525
7526 class TLReplaceDisks(Tasklet):
7527   """Replaces disks for an instance.
7528
7529   Note: Locking is not within the scope of this class.
7530
7531   """
7532   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7533                disks, delay_iallocator, early_release):
7534     """Initializes this class.
7535
7536     """
7537     Tasklet.__init__(self, lu)
7538
7539     # Parameters
7540     self.instance_name = instance_name
7541     self.mode = mode
7542     self.iallocator_name = iallocator_name
7543     self.remote_node = remote_node
7544     self.disks = disks
7545     self.delay_iallocator = delay_iallocator
7546     self.early_release = early_release
7547
7548     # Runtime data
7549     self.instance = None
7550     self.new_node = None
7551     self.target_node = None
7552     self.other_node = None
7553     self.remote_node_info = None
7554     self.node_secondary_ip = None
7555
7556   @staticmethod
7557   def CheckArguments(mode, remote_node, iallocator):
7558     """Helper function for users of this class.
7559
7560     """
7561     # check for valid parameter combination
7562     if mode == constants.REPLACE_DISK_CHG:
7563       if remote_node is None and iallocator is None:
7564         raise errors.OpPrereqError("When changing the secondary either an"
7565                                    " iallocator script must be used or the"
7566                                    " new node given", errors.ECODE_INVAL)
7567
7568       if remote_node is not None and iallocator is not None:
7569         raise errors.OpPrereqError("Give either the iallocator or the new"
7570                                    " secondary, not both", errors.ECODE_INVAL)
7571
7572     elif remote_node is not None or iallocator is not None:
7573       # Not replacing the secondary
7574       raise errors.OpPrereqError("The iallocator and new node options can"
7575                                  " only be used when changing the"
7576                                  " secondary node", errors.ECODE_INVAL)
7577
7578   @staticmethod
7579   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7580     """Compute a new secondary node using an IAllocator.
7581
7582     """
7583     ial = IAllocator(lu.cfg, lu.rpc,
7584                      mode=constants.IALLOCATOR_MODE_RELOC,
7585                      name=instance_name,
7586                      relocate_from=relocate_from)
7587
7588     ial.Run(iallocator_name)
7589
7590     if not ial.success:
7591       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7592                                  " %s" % (iallocator_name, ial.info),
7593                                  errors.ECODE_NORES)
7594
7595     if len(ial.result) != ial.required_nodes:
7596       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7597                                  " of nodes (%s), required %s" %
7598                                  (iallocator_name,
7599                                   len(ial.result), ial.required_nodes),
7600                                  errors.ECODE_FAULT)
7601
7602     remote_node_name = ial.result[0]
7603
7604     lu.LogInfo("Selected new secondary for instance '%s': %s",
7605                instance_name, remote_node_name)
7606
7607     return remote_node_name
7608
7609   def _FindFaultyDisks(self, node_name):
7610     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7611                                     node_name, True)
7612
7613   def CheckPrereq(self):
7614     """Check prerequisites.
7615
7616     This checks that the instance is in the cluster.
7617
7618     """
7619     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7620     assert instance is not None, \
7621       "Cannot retrieve locked instance %s" % self.instance_name
7622
7623     if instance.disk_template != constants.DT_DRBD8:
7624       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7625                                  " instances", errors.ECODE_INVAL)
7626
7627     if len(instance.secondary_nodes) != 1:
7628       raise errors.OpPrereqError("The instance has a strange layout,"
7629                                  " expected one secondary but found %d" %
7630                                  len(instance.secondary_nodes),
7631                                  errors.ECODE_FAULT)
7632
7633     if not self.delay_iallocator:
7634       self._CheckPrereq2()
7635
7636   def _CheckPrereq2(self):
7637     """Check prerequisites, second part.
7638
7639     This function should always be part of CheckPrereq. It was separated and is
7640     now called from Exec because during node evacuation iallocator was only
7641     called with an unmodified cluster model, not taking planned changes into
7642     account.
7643
7644     """
7645     instance = self.instance
7646     secondary_node = instance.secondary_nodes[0]
7647
7648     if self.iallocator_name is None:
7649       remote_node = self.remote_node
7650     else:
7651       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7652                                        instance.name, instance.secondary_nodes)
7653
7654     if remote_node is not None:
7655       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7656       assert self.remote_node_info is not None, \
7657         "Cannot retrieve locked node %s" % remote_node
7658     else:
7659       self.remote_node_info = None
7660
7661     if remote_node == self.instance.primary_node:
7662       raise errors.OpPrereqError("The specified node is the primary node of"
7663                                  " the instance.", errors.ECODE_INVAL)
7664
7665     if remote_node == secondary_node:
7666       raise errors.OpPrereqError("The specified node is already the"
7667                                  " secondary node of the instance.",
7668                                  errors.ECODE_INVAL)
7669
7670     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7671                                     constants.REPLACE_DISK_CHG):
7672       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7673                                  errors.ECODE_INVAL)
7674
7675     if self.mode == constants.REPLACE_DISK_AUTO:
7676       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7677       faulty_secondary = self._FindFaultyDisks(secondary_node)
7678
7679       if faulty_primary and faulty_secondary:
7680         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7681                                    " one node and can not be repaired"
7682                                    " automatically" % self.instance_name,
7683                                    errors.ECODE_STATE)
7684
7685       if faulty_primary:
7686         self.disks = faulty_primary
7687         self.target_node = instance.primary_node
7688         self.other_node = secondary_node
7689         check_nodes = [self.target_node, self.other_node]
7690       elif faulty_secondary:
7691         self.disks = faulty_secondary
7692         self.target_node = secondary_node
7693         self.other_node = instance.primary_node
7694         check_nodes = [self.target_node, self.other_node]
7695       else:
7696         self.disks = []
7697         check_nodes = []
7698
7699     else:
7700       # Non-automatic modes
7701       if self.mode == constants.REPLACE_DISK_PRI:
7702         self.target_node = instance.primary_node
7703         self.other_node = secondary_node
7704         check_nodes = [self.target_node, self.other_node]
7705
7706       elif self.mode == constants.REPLACE_DISK_SEC:
7707         self.target_node = secondary_node
7708         self.other_node = instance.primary_node
7709         check_nodes = [self.target_node, self.other_node]
7710
7711       elif self.mode == constants.REPLACE_DISK_CHG:
7712         self.new_node = remote_node
7713         self.other_node = instance.primary_node
7714         self.target_node = secondary_node
7715         check_nodes = [self.new_node, self.other_node]
7716
7717         _CheckNodeNotDrained(self.lu, remote_node)
7718
7719         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7720         assert old_node_info is not None
7721         if old_node_info.offline and not self.early_release:
7722           # doesn't make sense to delay the release
7723           self.early_release = True
7724           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7725                           " early-release mode", secondary_node)
7726
7727       else:
7728         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7729                                      self.mode)
7730
7731       # If not specified all disks should be replaced
7732       if not self.disks:
7733         self.disks = range(len(self.instance.disks))
7734
7735     for node in check_nodes:
7736       _CheckNodeOnline(self.lu, node)
7737
7738     # Check whether disks are valid
7739     for disk_idx in self.disks:
7740       instance.FindDisk(disk_idx)
7741
7742     # Get secondary node IP addresses
7743     node_2nd_ip = {}
7744
7745     for node_name in [self.target_node, self.other_node, self.new_node]:
7746       if node_name is not None:
7747         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7748
7749     self.node_secondary_ip = node_2nd_ip
7750
7751   def Exec(self, feedback_fn):
7752     """Execute disk replacement.
7753
7754     This dispatches the disk replacement to the appropriate handler.
7755
7756     """
7757     if self.delay_iallocator:
7758       self._CheckPrereq2()
7759
7760     if not self.disks:
7761       feedback_fn("No disks need replacement")
7762       return
7763
7764     feedback_fn("Replacing disk(s) %s for %s" %
7765                 (utils.CommaJoin(self.disks), self.instance.name))
7766
7767     activate_disks = (not self.instance.admin_up)
7768
7769     # Activate the instance disks if we're replacing them on a down instance
7770     if activate_disks:
7771       _StartInstanceDisks(self.lu, self.instance, True)
7772
7773     try:
7774       # Should we replace the secondary node?
7775       if self.new_node is not None:
7776         fn = self._ExecDrbd8Secondary
7777       else:
7778         fn = self._ExecDrbd8DiskOnly
7779
7780       return fn(feedback_fn)
7781
7782     finally:
7783       # Deactivate the instance disks if we're replacing them on a
7784       # down instance
7785       if activate_disks:
7786         _SafeShutdownInstanceDisks(self.lu, self.instance)
7787
7788   def _CheckVolumeGroup(self, nodes):
7789     self.lu.LogInfo("Checking volume groups")
7790
7791     vgname = self.cfg.GetVGName()
7792
7793     # Make sure volume group exists on all involved nodes
7794     results = self.rpc.call_vg_list(nodes)
7795     if not results:
7796       raise errors.OpExecError("Can't list volume groups on the nodes")
7797
7798     for node in nodes:
7799       res = results[node]
7800       res.Raise("Error checking node %s" % node)
7801       if vgname not in res.payload:
7802         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7803                                  (vgname, node))
7804
7805   def _CheckDisksExistence(self, nodes):
7806     # Check disk existence
7807     for idx, dev in enumerate(self.instance.disks):
7808       if idx not in self.disks:
7809         continue
7810
7811       for node in nodes:
7812         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7813         self.cfg.SetDiskID(dev, node)
7814
7815         result = self.rpc.call_blockdev_find(node, dev)
7816
7817         msg = result.fail_msg
7818         if msg or not result.payload:
7819           if not msg:
7820             msg = "disk not found"
7821           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7822                                    (idx, node, msg))
7823
7824   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7825     for idx, dev in enumerate(self.instance.disks):
7826       if idx not in self.disks:
7827         continue
7828
7829       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7830                       (idx, node_name))
7831
7832       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7833                                    ldisk=ldisk):
7834         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7835                                  " replace disks for instance %s" %
7836                                  (node_name, self.instance.name))
7837
7838   def _CreateNewStorage(self, node_name):
7839     vgname = self.cfg.GetVGName()
7840     iv_names = {}
7841
7842     for idx, dev in enumerate(self.instance.disks):
7843       if idx not in self.disks:
7844         continue
7845
7846       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7847
7848       self.cfg.SetDiskID(dev, node_name)
7849
7850       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7851       names = _GenerateUniqueNames(self.lu, lv_names)
7852
7853       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7854                              logical_id=(vgname, names[0]))
7855       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7856                              logical_id=(vgname, names[1]))
7857
7858       new_lvs = [lv_data, lv_meta]
7859       old_lvs = dev.children
7860       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7861
7862       # we pass force_create=True to force the LVM creation
7863       for new_lv in new_lvs:
7864         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7865                         _GetInstanceInfoText(self.instance), False)
7866
7867     return iv_names
7868
7869   def _CheckDevices(self, node_name, iv_names):
7870     for name, (dev, _, _) in iv_names.iteritems():
7871       self.cfg.SetDiskID(dev, node_name)
7872
7873       result = self.rpc.call_blockdev_find(node_name, dev)
7874
7875       msg = result.fail_msg
7876       if msg or not result.payload:
7877         if not msg:
7878           msg = "disk not found"
7879         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7880                                  (name, msg))
7881
7882       if result.payload.is_degraded:
7883         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7884
7885   def _RemoveOldStorage(self, node_name, iv_names):
7886     for name, (_, old_lvs, _) in iv_names.iteritems():
7887       self.lu.LogInfo("Remove logical volumes for %s" % name)
7888
7889       for lv in old_lvs:
7890         self.cfg.SetDiskID(lv, node_name)
7891
7892         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7893         if msg:
7894           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7895                              hint="remove unused LVs manually")
7896
7897   def _ReleaseNodeLock(self, node_name):
7898     """Releases the lock for a given node."""
7899     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7900
7901   def _ExecDrbd8DiskOnly(self, feedback_fn):
7902     """Replace a disk on the primary or secondary for DRBD 8.
7903
7904     The algorithm for replace is quite complicated:
7905
7906       1. for each disk to be replaced:
7907
7908         1. create new LVs on the target node with unique names
7909         1. detach old LVs from the drbd device
7910         1. rename old LVs to name_replaced.<time_t>
7911         1. rename new LVs to old LVs
7912         1. attach the new LVs (with the old names now) to the drbd device
7913
7914       1. wait for sync across all devices
7915
7916       1. for each modified disk:
7917
7918         1. remove old LVs (which have the name name_replaces.<time_t>)
7919
7920     Failures are not very well handled.
7921
7922     """
7923     steps_total = 6
7924
7925     # Step: check device activation
7926     self.lu.LogStep(1, steps_total, "Check device existence")
7927     self._CheckDisksExistence([self.other_node, self.target_node])
7928     self._CheckVolumeGroup([self.target_node, self.other_node])
7929
7930     # Step: check other node consistency
7931     self.lu.LogStep(2, steps_total, "Check peer consistency")
7932     self._CheckDisksConsistency(self.other_node,
7933                                 self.other_node == self.instance.primary_node,
7934                                 False)
7935
7936     # Step: create new storage
7937     self.lu.LogStep(3, steps_total, "Allocate new storage")
7938     iv_names = self._CreateNewStorage(self.target_node)
7939
7940     # Step: for each lv, detach+rename*2+attach
7941     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7942     for dev, old_lvs, new_lvs in iv_names.itervalues():
7943       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7944
7945       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7946                                                      old_lvs)
7947       result.Raise("Can't detach drbd from local storage on node"
7948                    " %s for device %s" % (self.target_node, dev.iv_name))
7949       #dev.children = []
7950       #cfg.Update(instance)
7951
7952       # ok, we created the new LVs, so now we know we have the needed
7953       # storage; as such, we proceed on the target node to rename
7954       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7955       # using the assumption that logical_id == physical_id (which in
7956       # turn is the unique_id on that node)
7957
7958       # FIXME(iustin): use a better name for the replaced LVs
7959       temp_suffix = int(time.time())
7960       ren_fn = lambda d, suff: (d.physical_id[0],
7961                                 d.physical_id[1] + "_replaced-%s" % suff)
7962
7963       # Build the rename list based on what LVs exist on the node
7964       rename_old_to_new = []
7965       for to_ren in old_lvs:
7966         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7967         if not result.fail_msg and result.payload:
7968           # device exists
7969           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7970
7971       self.lu.LogInfo("Renaming the old LVs on the target node")
7972       result = self.rpc.call_blockdev_rename(self.target_node,
7973                                              rename_old_to_new)
7974       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7975
7976       # Now we rename the new LVs to the old LVs
7977       self.lu.LogInfo("Renaming the new LVs on the target node")
7978       rename_new_to_old = [(new, old.physical_id)
7979                            for old, new in zip(old_lvs, new_lvs)]
7980       result = self.rpc.call_blockdev_rename(self.target_node,
7981                                              rename_new_to_old)
7982       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7983
7984       for old, new in zip(old_lvs, new_lvs):
7985         new.logical_id = old.logical_id
7986         self.cfg.SetDiskID(new, self.target_node)
7987
7988       for disk in old_lvs:
7989         disk.logical_id = ren_fn(disk, temp_suffix)
7990         self.cfg.SetDiskID(disk, self.target_node)
7991
7992       # Now that the new lvs have the old name, we can add them to the device
7993       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7994       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7995                                                   new_lvs)
7996       msg = result.fail_msg
7997       if msg:
7998         for new_lv in new_lvs:
7999           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8000                                                new_lv).fail_msg
8001           if msg2:
8002             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8003                                hint=("cleanup manually the unused logical"
8004                                      "volumes"))
8005         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8006
8007       dev.children = new_lvs
8008
8009       self.cfg.Update(self.instance, feedback_fn)
8010
8011     cstep = 5
8012     if self.early_release:
8013       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8014       cstep += 1
8015       self._RemoveOldStorage(self.target_node, iv_names)
8016       # WARNING: we release both node locks here, do not do other RPCs
8017       # than WaitForSync to the primary node
8018       self._ReleaseNodeLock([self.target_node, self.other_node])
8019
8020     # Wait for sync
8021     # This can fail as the old devices are degraded and _WaitForSync
8022     # does a combined result over all disks, so we don't check its return value
8023     self.lu.LogStep(cstep, steps_total, "Sync devices")
8024     cstep += 1
8025     _WaitForSync(self.lu, self.instance)
8026
8027     # Check all devices manually
8028     self._CheckDevices(self.instance.primary_node, iv_names)
8029
8030     # Step: remove old storage
8031     if not self.early_release:
8032       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8033       cstep += 1
8034       self._RemoveOldStorage(self.target_node, iv_names)
8035
8036   def _ExecDrbd8Secondary(self, feedback_fn):
8037     """Replace the secondary node for DRBD 8.
8038
8039     The algorithm for replace is quite complicated:
8040       - for all disks of the instance:
8041         - create new LVs on the new node with same names
8042         - shutdown the drbd device on the old secondary
8043         - disconnect the drbd network on the primary
8044         - create the drbd device on the new secondary
8045         - network attach the drbd on the primary, using an artifice:
8046           the drbd code for Attach() will connect to the network if it
8047           finds a device which is connected to the good local disks but
8048           not network enabled
8049       - wait for sync across all devices
8050       - remove all disks from the old secondary
8051
8052     Failures are not very well handled.
8053
8054     """
8055     steps_total = 6
8056
8057     # Step: check device activation
8058     self.lu.LogStep(1, steps_total, "Check device existence")
8059     self._CheckDisksExistence([self.instance.primary_node])
8060     self._CheckVolumeGroup([self.instance.primary_node])
8061
8062     # Step: check other node consistency
8063     self.lu.LogStep(2, steps_total, "Check peer consistency")
8064     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8065
8066     # Step: create new storage
8067     self.lu.LogStep(3, steps_total, "Allocate new storage")
8068     for idx, dev in enumerate(self.instance.disks):
8069       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8070                       (self.new_node, idx))
8071       # we pass force_create=True to force LVM creation
8072       for new_lv in dev.children:
8073         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8074                         _GetInstanceInfoText(self.instance), False)
8075
8076     # Step 4: dbrd minors and drbd setups changes
8077     # after this, we must manually remove the drbd minors on both the
8078     # error and the success paths
8079     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8080     minors = self.cfg.AllocateDRBDMinor([self.new_node
8081                                          for dev in self.instance.disks],
8082                                         self.instance.name)
8083     logging.debug("Allocated minors %r", minors)
8084
8085     iv_names = {}
8086     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8087       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8088                       (self.new_node, idx))
8089       # create new devices on new_node; note that we create two IDs:
8090       # one without port, so the drbd will be activated without
8091       # networking information on the new node at this stage, and one
8092       # with network, for the latter activation in step 4
8093       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8094       if self.instance.primary_node == o_node1:
8095         p_minor = o_minor1
8096       else:
8097         assert self.instance.primary_node == o_node2, "Three-node instance?"
8098         p_minor = o_minor2
8099
8100       new_alone_id = (self.instance.primary_node, self.new_node, None,
8101                       p_minor, new_minor, o_secret)
8102       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8103                     p_minor, new_minor, o_secret)
8104
8105       iv_names[idx] = (dev, dev.children, new_net_id)
8106       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8107                     new_net_id)
8108       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8109                               logical_id=new_alone_id,
8110                               children=dev.children,
8111                               size=dev.size)
8112       try:
8113         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8114                               _GetInstanceInfoText(self.instance), False)
8115       except errors.GenericError:
8116         self.cfg.ReleaseDRBDMinors(self.instance.name)
8117         raise
8118
8119     # We have new devices, shutdown the drbd on the old secondary
8120     for idx, dev in enumerate(self.instance.disks):
8121       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8122       self.cfg.SetDiskID(dev, self.target_node)
8123       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8124       if msg:
8125         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8126                            "node: %s" % (idx, msg),
8127                            hint=("Please cleanup this device manually as"
8128                                  " soon as possible"))
8129
8130     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8131     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8132                                                self.node_secondary_ip,
8133                                                self.instance.disks)\
8134                                               [self.instance.primary_node]
8135
8136     msg = result.fail_msg
8137     if msg:
8138       # detaches didn't succeed (unlikely)
8139       self.cfg.ReleaseDRBDMinors(self.instance.name)
8140       raise errors.OpExecError("Can't detach the disks from the network on"
8141                                " old node: %s" % (msg,))
8142
8143     # if we managed to detach at least one, we update all the disks of
8144     # the instance to point to the new secondary
8145     self.lu.LogInfo("Updating instance configuration")
8146     for dev, _, new_logical_id in iv_names.itervalues():
8147       dev.logical_id = new_logical_id
8148       self.cfg.SetDiskID(dev, self.instance.primary_node)
8149
8150     self.cfg.Update(self.instance, feedback_fn)
8151
8152     # and now perform the drbd attach
8153     self.lu.LogInfo("Attaching primary drbds to new secondary"
8154                     " (standalone => connected)")
8155     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8156                                             self.new_node],
8157                                            self.node_secondary_ip,
8158                                            self.instance.disks,
8159                                            self.instance.name,
8160                                            False)
8161     for to_node, to_result in result.items():
8162       msg = to_result.fail_msg
8163       if msg:
8164         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8165                            to_node, msg,
8166                            hint=("please do a gnt-instance info to see the"
8167                                  " status of disks"))
8168     cstep = 5
8169     if self.early_release:
8170       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8171       cstep += 1
8172       self._RemoveOldStorage(self.target_node, iv_names)
8173       # WARNING: we release all node locks here, do not do other RPCs
8174       # than WaitForSync to the primary node
8175       self._ReleaseNodeLock([self.instance.primary_node,
8176                              self.target_node,
8177                              self.new_node])
8178
8179     # Wait for sync
8180     # This can fail as the old devices are degraded and _WaitForSync
8181     # does a combined result over all disks, so we don't check its return value
8182     self.lu.LogStep(cstep, steps_total, "Sync devices")
8183     cstep += 1
8184     _WaitForSync(self.lu, self.instance)
8185
8186     # Check all devices manually
8187     self._CheckDevices(self.instance.primary_node, iv_names)
8188
8189     # Step: remove old storage
8190     if not self.early_release:
8191       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8192       self._RemoveOldStorage(self.target_node, iv_names)
8193
8194
8195 class LURepairNodeStorage(NoHooksLU):
8196   """Repairs the volume group on a node.
8197
8198   """
8199   _OP_PARAMS = [
8200     _PNodeName,
8201     ("storage_type", _NoDefault, _CheckStorageType),
8202     ("name", _NoDefault, _TNonEmptyString),
8203     ("ignore_consistency", False, _TBool),
8204     ]
8205   REQ_BGL = False
8206
8207   def CheckArguments(self):
8208     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8209
8210     storage_type = self.op.storage_type
8211
8212     if (constants.SO_FIX_CONSISTENCY not in
8213         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8214       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8215                                  " repaired" % storage_type,
8216                                  errors.ECODE_INVAL)
8217
8218   def ExpandNames(self):
8219     self.needed_locks = {
8220       locking.LEVEL_NODE: [self.op.node_name],
8221       }
8222
8223   def _CheckFaultyDisks(self, instance, node_name):
8224     """Ensure faulty disks abort the opcode or at least warn."""
8225     try:
8226       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8227                                   node_name, True):
8228         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8229                                    " node '%s'" % (instance.name, node_name),
8230                                    errors.ECODE_STATE)
8231     except errors.OpPrereqError, err:
8232       if self.op.ignore_consistency:
8233         self.proc.LogWarning(str(err.args[0]))
8234       else:
8235         raise
8236
8237   def CheckPrereq(self):
8238     """Check prerequisites.
8239
8240     """
8241     # Check whether any instance on this node has faulty disks
8242     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8243       if not inst.admin_up:
8244         continue
8245       check_nodes = set(inst.all_nodes)
8246       check_nodes.discard(self.op.node_name)
8247       for inst_node_name in check_nodes:
8248         self._CheckFaultyDisks(inst, inst_node_name)
8249
8250   def Exec(self, feedback_fn):
8251     feedback_fn("Repairing storage unit '%s' on %s ..." %
8252                 (self.op.name, self.op.node_name))
8253
8254     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8255     result = self.rpc.call_storage_execute(self.op.node_name,
8256                                            self.op.storage_type, st_args,
8257                                            self.op.name,
8258                                            constants.SO_FIX_CONSISTENCY)
8259     result.Raise("Failed to repair storage unit '%s' on %s" %
8260                  (self.op.name, self.op.node_name))
8261
8262
8263 class LUNodeEvacuationStrategy(NoHooksLU):
8264   """Computes the node evacuation strategy.
8265
8266   """
8267   _OP_PARAMS = [
8268     ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8269     ("remote_node", None, _TMaybeString),
8270     ("iallocator", None, _TMaybeString),
8271     ]
8272   REQ_BGL = False
8273
8274   def CheckArguments(self):
8275     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8276
8277   def ExpandNames(self):
8278     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8279     self.needed_locks = locks = {}
8280     if self.op.remote_node is None:
8281       locks[locking.LEVEL_NODE] = locking.ALL_SET
8282     else:
8283       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8284       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8285
8286   def Exec(self, feedback_fn):
8287     if self.op.remote_node is not None:
8288       instances = []
8289       for node in self.op.nodes:
8290         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8291       result = []
8292       for i in instances:
8293         if i.primary_node == self.op.remote_node:
8294           raise errors.OpPrereqError("Node %s is the primary node of"
8295                                      " instance %s, cannot use it as"
8296                                      " secondary" %
8297                                      (self.op.remote_node, i.name),
8298                                      errors.ECODE_INVAL)
8299         result.append([i.name, self.op.remote_node])
8300     else:
8301       ial = IAllocator(self.cfg, self.rpc,
8302                        mode=constants.IALLOCATOR_MODE_MEVAC,
8303                        evac_nodes=self.op.nodes)
8304       ial.Run(self.op.iallocator, validate=True)
8305       if not ial.success:
8306         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8307                                  errors.ECODE_NORES)
8308       result = ial.result
8309     return result
8310
8311
8312 class LUGrowDisk(LogicalUnit):
8313   """Grow a disk of an instance.
8314
8315   """
8316   HPATH = "disk-grow"
8317   HTYPE = constants.HTYPE_INSTANCE
8318   _OP_PARAMS = [
8319     _PInstanceName,
8320     ("disk", _NoDefault, _TInt),
8321     ("amount", _NoDefault, _TInt),
8322     ("wait_for_sync", True, _TBool),
8323     ]
8324   REQ_BGL = False
8325
8326   def ExpandNames(self):
8327     self._ExpandAndLockInstance()
8328     self.needed_locks[locking.LEVEL_NODE] = []
8329     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8330
8331   def DeclareLocks(self, level):
8332     if level == locking.LEVEL_NODE:
8333       self._LockInstancesNodes()
8334
8335   def BuildHooksEnv(self):
8336     """Build hooks env.
8337
8338     This runs on the master, the primary and all the secondaries.
8339
8340     """
8341     env = {
8342       "DISK": self.op.disk,
8343       "AMOUNT": self.op.amount,
8344       }
8345     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8346     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8347     return env, nl, nl
8348
8349   def CheckPrereq(self):
8350     """Check prerequisites.
8351
8352     This checks that the instance is in the cluster.
8353
8354     """
8355     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8356     assert instance is not None, \
8357       "Cannot retrieve locked instance %s" % self.op.instance_name
8358     nodenames = list(instance.all_nodes)
8359     for node in nodenames:
8360       _CheckNodeOnline(self, node)
8361
8362     self.instance = instance
8363
8364     if instance.disk_template not in constants.DTS_GROWABLE:
8365       raise errors.OpPrereqError("Instance's disk layout does not support"
8366                                  " growing.", errors.ECODE_INVAL)
8367
8368     self.disk = instance.FindDisk(self.op.disk)
8369
8370     if instance.disk_template != constants.DT_FILE:
8371       # TODO: check the free disk space for file, when that feature will be
8372       # supported
8373       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8374
8375   def Exec(self, feedback_fn):
8376     """Execute disk grow.
8377
8378     """
8379     instance = self.instance
8380     disk = self.disk
8381
8382     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8383     if not disks_ok:
8384       raise errors.OpExecError("Cannot activate block device to grow")
8385
8386     for node in instance.all_nodes:
8387       self.cfg.SetDiskID(disk, node)
8388       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8389       result.Raise("Grow request failed to node %s" % node)
8390
8391       # TODO: Rewrite code to work properly
8392       # DRBD goes into sync mode for a short amount of time after executing the
8393       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8394       # calling "resize" in sync mode fails. Sleeping for a short amount of
8395       # time is a work-around.
8396       time.sleep(5)
8397
8398     disk.RecordGrow(self.op.amount)
8399     self.cfg.Update(instance, feedback_fn)
8400     if self.op.wait_for_sync:
8401       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8402       if disk_abort:
8403         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8404                              " status.\nPlease check the instance.")
8405       if not instance.admin_up:
8406         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8407     elif not instance.admin_up:
8408       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8409                            " not supposed to be running because no wait for"
8410                            " sync mode was requested.")
8411
8412
8413 class LUQueryInstanceData(NoHooksLU):
8414   """Query runtime instance data.
8415
8416   """
8417   _OP_PARAMS = [
8418     ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8419     ("static", False, _TBool),
8420     ]
8421   REQ_BGL = False
8422
8423   def ExpandNames(self):
8424     self.needed_locks = {}
8425     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8426
8427     if self.op.instances:
8428       self.wanted_names = []
8429       for name in self.op.instances:
8430         full_name = _ExpandInstanceName(self.cfg, name)
8431         self.wanted_names.append(full_name)
8432       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8433     else:
8434       self.wanted_names = None
8435       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8436
8437     self.needed_locks[locking.LEVEL_NODE] = []
8438     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8439
8440   def DeclareLocks(self, level):
8441     if level == locking.LEVEL_NODE:
8442       self._LockInstancesNodes()
8443
8444   def CheckPrereq(self):
8445     """Check prerequisites.
8446
8447     This only checks the optional instance list against the existing names.
8448
8449     """
8450     if self.wanted_names is None:
8451       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8452
8453     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8454                              in self.wanted_names]
8455
8456   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8457     """Returns the status of a block device
8458
8459     """
8460     if self.op.static or not node:
8461       return None
8462
8463     self.cfg.SetDiskID(dev, node)
8464
8465     result = self.rpc.call_blockdev_find(node, dev)
8466     if result.offline:
8467       return None
8468
8469     result.Raise("Can't compute disk status for %s" % instance_name)
8470
8471     status = result.payload
8472     if status is None:
8473       return None
8474
8475     return (status.dev_path, status.major, status.minor,
8476             status.sync_percent, status.estimated_time,
8477             status.is_degraded, status.ldisk_status)
8478
8479   def _ComputeDiskStatus(self, instance, snode, dev):
8480     """Compute block device status.
8481
8482     """
8483     if dev.dev_type in constants.LDS_DRBD:
8484       # we change the snode then (otherwise we use the one passed in)
8485       if dev.logical_id[0] == instance.primary_node:
8486         snode = dev.logical_id[1]
8487       else:
8488         snode = dev.logical_id[0]
8489
8490     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8491                                               instance.name, dev)
8492     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8493
8494     if dev.children:
8495       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8496                       for child in dev.children]
8497     else:
8498       dev_children = []
8499
8500     data = {
8501       "iv_name": dev.iv_name,
8502       "dev_type": dev.dev_type,
8503       "logical_id": dev.logical_id,
8504       "physical_id": dev.physical_id,
8505       "pstatus": dev_pstatus,
8506       "sstatus": dev_sstatus,
8507       "children": dev_children,
8508       "mode": dev.mode,
8509       "size": dev.size,
8510       }
8511
8512     return data
8513
8514   def Exec(self, feedback_fn):
8515     """Gather and return data"""
8516     result = {}
8517
8518     cluster = self.cfg.GetClusterInfo()
8519
8520     for instance in self.wanted_instances:
8521       if not self.op.static:
8522         remote_info = self.rpc.call_instance_info(instance.primary_node,
8523                                                   instance.name,
8524                                                   instance.hypervisor)
8525         remote_info.Raise("Error checking node %s" % instance.primary_node)
8526         remote_info = remote_info.payload
8527         if remote_info and "state" in remote_info:
8528           remote_state = "up"
8529         else:
8530           remote_state = "down"
8531       else:
8532         remote_state = None
8533       if instance.admin_up:
8534         config_state = "up"
8535       else:
8536         config_state = "down"
8537
8538       disks = [self._ComputeDiskStatus(instance, None, device)
8539                for device in instance.disks]
8540
8541       idict = {
8542         "name": instance.name,
8543         "config_state": config_state,
8544         "run_state": remote_state,
8545         "pnode": instance.primary_node,
8546         "snodes": instance.secondary_nodes,
8547         "os": instance.os,
8548         # this happens to be the same format used for hooks
8549         "nics": _NICListToTuple(self, instance.nics),
8550         "disk_template": instance.disk_template,
8551         "disks": disks,
8552         "hypervisor": instance.hypervisor,
8553         "network_port": instance.network_port,
8554         "hv_instance": instance.hvparams,
8555         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8556         "be_instance": instance.beparams,
8557         "be_actual": cluster.FillBE(instance),
8558         "os_instance": instance.osparams,
8559         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8560         "serial_no": instance.serial_no,
8561         "mtime": instance.mtime,
8562         "ctime": instance.ctime,
8563         "uuid": instance.uuid,
8564         }
8565
8566       result[instance.name] = idict
8567
8568     return result
8569
8570
8571 class LUSetInstanceParams(LogicalUnit):
8572   """Modifies an instances's parameters.
8573
8574   """
8575   HPATH = "instance-modify"
8576   HTYPE = constants.HTYPE_INSTANCE
8577   _OP_PARAMS = [
8578     _PInstanceName,
8579     ("nics", _EmptyList, _TList),
8580     ("disks", _EmptyList, _TList),
8581     ("beparams", _EmptyDict, _TDict),
8582     ("hvparams", _EmptyDict, _TDict),
8583     ("disk_template", None, _TMaybeString),
8584     ("remote_node", None, _TMaybeString),
8585     ("os_name", None, _TMaybeString),
8586     ("force_variant", False, _TBool),
8587     ("osparams", None, _TOr(_TDict, _TNone)),
8588     _PForce,
8589     ]
8590   REQ_BGL = False
8591
8592   def CheckArguments(self):
8593     if not (self.op.nics or self.op.disks or self.op.disk_template or
8594             self.op.hvparams or self.op.beparams or self.op.os_name):
8595       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8596
8597     if self.op.hvparams:
8598       _CheckGlobalHvParams(self.op.hvparams)
8599
8600     # Disk validation
8601     disk_addremove = 0
8602     for disk_op, disk_dict in self.op.disks:
8603       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8604       if disk_op == constants.DDM_REMOVE:
8605         disk_addremove += 1
8606         continue
8607       elif disk_op == constants.DDM_ADD:
8608         disk_addremove += 1
8609       else:
8610         if not isinstance(disk_op, int):
8611           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8612         if not isinstance(disk_dict, dict):
8613           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8614           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8615
8616       if disk_op == constants.DDM_ADD:
8617         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8618         if mode not in constants.DISK_ACCESS_SET:
8619           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8620                                      errors.ECODE_INVAL)
8621         size = disk_dict.get('size', None)
8622         if size is None:
8623           raise errors.OpPrereqError("Required disk parameter size missing",
8624                                      errors.ECODE_INVAL)
8625         try:
8626           size = int(size)
8627         except (TypeError, ValueError), err:
8628           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8629                                      str(err), errors.ECODE_INVAL)
8630         disk_dict['size'] = size
8631       else:
8632         # modification of disk
8633         if 'size' in disk_dict:
8634           raise errors.OpPrereqError("Disk size change not possible, use"
8635                                      " grow-disk", errors.ECODE_INVAL)
8636
8637     if disk_addremove > 1:
8638       raise errors.OpPrereqError("Only one disk add or remove operation"
8639                                  " supported at a time", errors.ECODE_INVAL)
8640
8641     if self.op.disks and self.op.disk_template is not None:
8642       raise errors.OpPrereqError("Disk template conversion and other disk"
8643                                  " changes not supported at the same time",
8644                                  errors.ECODE_INVAL)
8645
8646     if self.op.disk_template:
8647       _CheckDiskTemplate(self.op.disk_template)
8648       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8649           self.op.remote_node is None):
8650         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8651                                    " one requires specifying a secondary node",
8652                                    errors.ECODE_INVAL)
8653
8654     # NIC validation
8655     nic_addremove = 0
8656     for nic_op, nic_dict in self.op.nics:
8657       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8658       if nic_op == constants.DDM_REMOVE:
8659         nic_addremove += 1
8660         continue
8661       elif nic_op == constants.DDM_ADD:
8662         nic_addremove += 1
8663       else:
8664         if not isinstance(nic_op, int):
8665           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8666         if not isinstance(nic_dict, dict):
8667           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8668           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8669
8670       # nic_dict should be a dict
8671       nic_ip = nic_dict.get('ip', None)
8672       if nic_ip is not None:
8673         if nic_ip.lower() == constants.VALUE_NONE:
8674           nic_dict['ip'] = None
8675         else:
8676           if not netutils.IP4Address.IsValid(nic_ip):
8677             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8678                                        errors.ECODE_INVAL)
8679
8680       nic_bridge = nic_dict.get('bridge', None)
8681       nic_link = nic_dict.get('link', None)
8682       if nic_bridge and nic_link:
8683         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8684                                    " at the same time", errors.ECODE_INVAL)
8685       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8686         nic_dict['bridge'] = None
8687       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8688         nic_dict['link'] = None
8689
8690       if nic_op == constants.DDM_ADD:
8691         nic_mac = nic_dict.get('mac', None)
8692         if nic_mac is None:
8693           nic_dict['mac'] = constants.VALUE_AUTO
8694
8695       if 'mac' in nic_dict:
8696         nic_mac = nic_dict['mac']
8697         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8698           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8699
8700         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8701           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8702                                      " modifying an existing nic",
8703                                      errors.ECODE_INVAL)
8704
8705     if nic_addremove > 1:
8706       raise errors.OpPrereqError("Only one NIC add or remove operation"
8707                                  " supported at a time", errors.ECODE_INVAL)
8708
8709   def ExpandNames(self):
8710     self._ExpandAndLockInstance()
8711     self.needed_locks[locking.LEVEL_NODE] = []
8712     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8713
8714   def DeclareLocks(self, level):
8715     if level == locking.LEVEL_NODE:
8716       self._LockInstancesNodes()
8717       if self.op.disk_template and self.op.remote_node:
8718         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8719         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8720
8721   def BuildHooksEnv(self):
8722     """Build hooks env.
8723
8724     This runs on the master, primary and secondaries.
8725
8726     """
8727     args = dict()
8728     if constants.BE_MEMORY in self.be_new:
8729       args['memory'] = self.be_new[constants.BE_MEMORY]
8730     if constants.BE_VCPUS in self.be_new:
8731       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8732     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8733     # information at all.
8734     if self.op.nics:
8735       args['nics'] = []
8736       nic_override = dict(self.op.nics)
8737       for idx, nic in enumerate(self.instance.nics):
8738         if idx in nic_override:
8739           this_nic_override = nic_override[idx]
8740         else:
8741           this_nic_override = {}
8742         if 'ip' in this_nic_override:
8743           ip = this_nic_override['ip']
8744         else:
8745           ip = nic.ip
8746         if 'mac' in this_nic_override:
8747           mac = this_nic_override['mac']
8748         else:
8749           mac = nic.mac
8750         if idx in self.nic_pnew:
8751           nicparams = self.nic_pnew[idx]
8752         else:
8753           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8754         mode = nicparams[constants.NIC_MODE]
8755         link = nicparams[constants.NIC_LINK]
8756         args['nics'].append((ip, mac, mode, link))
8757       if constants.DDM_ADD in nic_override:
8758         ip = nic_override[constants.DDM_ADD].get('ip', None)
8759         mac = nic_override[constants.DDM_ADD]['mac']
8760         nicparams = self.nic_pnew[constants.DDM_ADD]
8761         mode = nicparams[constants.NIC_MODE]
8762         link = nicparams[constants.NIC_LINK]
8763         args['nics'].append((ip, mac, mode, link))
8764       elif constants.DDM_REMOVE in nic_override:
8765         del args['nics'][-1]
8766
8767     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8768     if self.op.disk_template:
8769       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8770     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8771     return env, nl, nl
8772
8773   def CheckPrereq(self):
8774     """Check prerequisites.
8775
8776     This only checks the instance list against the existing names.
8777
8778     """
8779     # checking the new params on the primary/secondary nodes
8780
8781     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8782     cluster = self.cluster = self.cfg.GetClusterInfo()
8783     assert self.instance is not None, \
8784       "Cannot retrieve locked instance %s" % self.op.instance_name
8785     pnode = instance.primary_node
8786     nodelist = list(instance.all_nodes)
8787
8788     # OS change
8789     if self.op.os_name and not self.op.force:
8790       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8791                       self.op.force_variant)
8792       instance_os = self.op.os_name
8793     else:
8794       instance_os = instance.os
8795
8796     if self.op.disk_template:
8797       if instance.disk_template == self.op.disk_template:
8798         raise errors.OpPrereqError("Instance already has disk template %s" %
8799                                    instance.disk_template, errors.ECODE_INVAL)
8800
8801       if (instance.disk_template,
8802           self.op.disk_template) not in self._DISK_CONVERSIONS:
8803         raise errors.OpPrereqError("Unsupported disk template conversion from"
8804                                    " %s to %s" % (instance.disk_template,
8805                                                   self.op.disk_template),
8806                                    errors.ECODE_INVAL)
8807       _CheckInstanceDown(self, instance, "cannot change disk template")
8808       if self.op.disk_template in constants.DTS_NET_MIRROR:
8809         if self.op.remote_node == pnode:
8810           raise errors.OpPrereqError("Given new secondary node %s is the same"
8811                                      " as the primary node of the instance" %
8812                                      self.op.remote_node, errors.ECODE_STATE)
8813         _CheckNodeOnline(self, self.op.remote_node)
8814         _CheckNodeNotDrained(self, self.op.remote_node)
8815         disks = [{"size": d.size} for d in instance.disks]
8816         required = _ComputeDiskSize(self.op.disk_template, disks)
8817         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8818
8819     # hvparams processing
8820     if self.op.hvparams:
8821       hv_type = instance.hypervisor
8822       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8823       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8824       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8825
8826       # local check
8827       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8828       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8829       self.hv_new = hv_new # the new actual values
8830       self.hv_inst = i_hvdict # the new dict (without defaults)
8831     else:
8832       self.hv_new = self.hv_inst = {}
8833
8834     # beparams processing
8835     if self.op.beparams:
8836       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8837                                    use_none=True)
8838       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8839       be_new = cluster.SimpleFillBE(i_bedict)
8840       self.be_new = be_new # the new actual values
8841       self.be_inst = i_bedict # the new dict (without defaults)
8842     else:
8843       self.be_new = self.be_inst = {}
8844
8845     # osparams processing
8846     if self.op.osparams:
8847       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8848       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8849       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8850       self.os_inst = i_osdict # the new dict (without defaults)
8851     else:
8852       self.os_new = self.os_inst = {}
8853
8854     self.warn = []
8855
8856     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8857       mem_check_list = [pnode]
8858       if be_new[constants.BE_AUTO_BALANCE]:
8859         # either we changed auto_balance to yes or it was from before
8860         mem_check_list.extend(instance.secondary_nodes)
8861       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8862                                                   instance.hypervisor)
8863       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8864                                          instance.hypervisor)
8865       pninfo = nodeinfo[pnode]
8866       msg = pninfo.fail_msg
8867       if msg:
8868         # Assume the primary node is unreachable and go ahead
8869         self.warn.append("Can't get info from primary node %s: %s" %
8870                          (pnode,  msg))
8871       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8872         self.warn.append("Node data from primary node %s doesn't contain"
8873                          " free memory information" % pnode)
8874       elif instance_info.fail_msg:
8875         self.warn.append("Can't get instance runtime information: %s" %
8876                         instance_info.fail_msg)
8877       else:
8878         if instance_info.payload:
8879           current_mem = int(instance_info.payload['memory'])
8880         else:
8881           # Assume instance not running
8882           # (there is a slight race condition here, but it's not very probable,
8883           # and we have no other way to check)
8884           current_mem = 0
8885         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8886                     pninfo.payload['memory_free'])
8887         if miss_mem > 0:
8888           raise errors.OpPrereqError("This change will prevent the instance"
8889                                      " from starting, due to %d MB of memory"
8890                                      " missing on its primary node" % miss_mem,
8891                                      errors.ECODE_NORES)
8892
8893       if be_new[constants.BE_AUTO_BALANCE]:
8894         for node, nres in nodeinfo.items():
8895           if node not in instance.secondary_nodes:
8896             continue
8897           msg = nres.fail_msg
8898           if msg:
8899             self.warn.append("Can't get info from secondary node %s: %s" %
8900                              (node, msg))
8901           elif not isinstance(nres.payload.get('memory_free', None), int):
8902             self.warn.append("Secondary node %s didn't return free"
8903                              " memory information" % node)
8904           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8905             self.warn.append("Not enough memory to failover instance to"
8906                              " secondary node %s" % node)
8907
8908     # NIC processing
8909     self.nic_pnew = {}
8910     self.nic_pinst = {}
8911     for nic_op, nic_dict in self.op.nics:
8912       if nic_op == constants.DDM_REMOVE:
8913         if not instance.nics:
8914           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8915                                      errors.ECODE_INVAL)
8916         continue
8917       if nic_op != constants.DDM_ADD:
8918         # an existing nic
8919         if not instance.nics:
8920           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8921                                      " no NICs" % nic_op,
8922                                      errors.ECODE_INVAL)
8923         if nic_op < 0 or nic_op >= len(instance.nics):
8924           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8925                                      " are 0 to %d" %
8926                                      (nic_op, len(instance.nics) - 1),
8927                                      errors.ECODE_INVAL)
8928         old_nic_params = instance.nics[nic_op].nicparams
8929         old_nic_ip = instance.nics[nic_op].ip
8930       else:
8931         old_nic_params = {}
8932         old_nic_ip = None
8933
8934       update_params_dict = dict([(key, nic_dict[key])
8935                                  for key in constants.NICS_PARAMETERS
8936                                  if key in nic_dict])
8937
8938       if 'bridge' in nic_dict:
8939         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8940
8941       new_nic_params = _GetUpdatedParams(old_nic_params,
8942                                          update_params_dict)
8943       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8944       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8945       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8946       self.nic_pinst[nic_op] = new_nic_params
8947       self.nic_pnew[nic_op] = new_filled_nic_params
8948       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8949
8950       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8951         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8952         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8953         if msg:
8954           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8955           if self.op.force:
8956             self.warn.append(msg)
8957           else:
8958             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8959       if new_nic_mode == constants.NIC_MODE_ROUTED:
8960         if 'ip' in nic_dict:
8961           nic_ip = nic_dict['ip']
8962         else:
8963           nic_ip = old_nic_ip
8964         if nic_ip is None:
8965           raise errors.OpPrereqError('Cannot set the nic ip to None'
8966                                      ' on a routed nic', errors.ECODE_INVAL)
8967       if 'mac' in nic_dict:
8968         nic_mac = nic_dict['mac']
8969         if nic_mac is None:
8970           raise errors.OpPrereqError('Cannot set the nic mac to None',
8971                                      errors.ECODE_INVAL)
8972         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8973           # otherwise generate the mac
8974           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8975         else:
8976           # or validate/reserve the current one
8977           try:
8978             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8979           except errors.ReservationError:
8980             raise errors.OpPrereqError("MAC address %s already in use"
8981                                        " in cluster" % nic_mac,
8982                                        errors.ECODE_NOTUNIQUE)
8983
8984     # DISK processing
8985     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8986       raise errors.OpPrereqError("Disk operations not supported for"
8987                                  " diskless instances",
8988                                  errors.ECODE_INVAL)
8989     for disk_op, _ in self.op.disks:
8990       if disk_op == constants.DDM_REMOVE:
8991         if len(instance.disks) == 1:
8992           raise errors.OpPrereqError("Cannot remove the last disk of"
8993                                      " an instance", errors.ECODE_INVAL)
8994         _CheckInstanceDown(self, instance, "cannot remove disks")
8995
8996       if (disk_op == constants.DDM_ADD and
8997           len(instance.nics) >= constants.MAX_DISKS):
8998         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8999                                    " add more" % constants.MAX_DISKS,
9000                                    errors.ECODE_STATE)
9001       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9002         # an existing disk
9003         if disk_op < 0 or disk_op >= len(instance.disks):
9004           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9005                                      " are 0 to %d" %
9006                                      (disk_op, len(instance.disks)),
9007                                      errors.ECODE_INVAL)
9008
9009     return
9010
9011   def _ConvertPlainToDrbd(self, feedback_fn):
9012     """Converts an instance from plain to drbd.
9013
9014     """
9015     feedback_fn("Converting template to drbd")
9016     instance = self.instance
9017     pnode = instance.primary_node
9018     snode = self.op.remote_node
9019
9020     # create a fake disk info for _GenerateDiskTemplate
9021     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9022     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9023                                       instance.name, pnode, [snode],
9024                                       disk_info, None, None, 0)
9025     info = _GetInstanceInfoText(instance)
9026     feedback_fn("Creating aditional volumes...")
9027     # first, create the missing data and meta devices
9028     for disk in new_disks:
9029       # unfortunately this is... not too nice
9030       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9031                             info, True)
9032       for child in disk.children:
9033         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9034     # at this stage, all new LVs have been created, we can rename the
9035     # old ones
9036     feedback_fn("Renaming original volumes...")
9037     rename_list = [(o, n.children[0].logical_id)
9038                    for (o, n) in zip(instance.disks, new_disks)]
9039     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9040     result.Raise("Failed to rename original LVs")
9041
9042     feedback_fn("Initializing DRBD devices...")
9043     # all child devices are in place, we can now create the DRBD devices
9044     for disk in new_disks:
9045       for node in [pnode, snode]:
9046         f_create = node == pnode
9047         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9048
9049     # at this point, the instance has been modified
9050     instance.disk_template = constants.DT_DRBD8
9051     instance.disks = new_disks
9052     self.cfg.Update(instance, feedback_fn)
9053
9054     # disks are created, waiting for sync
9055     disk_abort = not _WaitForSync(self, instance)
9056     if disk_abort:
9057       raise errors.OpExecError("There are some degraded disks for"
9058                                " this instance, please cleanup manually")
9059
9060   def _ConvertDrbdToPlain(self, feedback_fn):
9061     """Converts an instance from drbd to plain.
9062
9063     """
9064     instance = self.instance
9065     assert len(instance.secondary_nodes) == 1
9066     pnode = instance.primary_node
9067     snode = instance.secondary_nodes[0]
9068     feedback_fn("Converting template to plain")
9069
9070     old_disks = instance.disks
9071     new_disks = [d.children[0] for d in old_disks]
9072
9073     # copy over size and mode
9074     for parent, child in zip(old_disks, new_disks):
9075       child.size = parent.size
9076       child.mode = parent.mode
9077
9078     # update instance structure
9079     instance.disks = new_disks
9080     instance.disk_template = constants.DT_PLAIN
9081     self.cfg.Update(instance, feedback_fn)
9082
9083     feedback_fn("Removing volumes on the secondary node...")
9084     for disk in old_disks:
9085       self.cfg.SetDiskID(disk, snode)
9086       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9087       if msg:
9088         self.LogWarning("Could not remove block device %s on node %s,"
9089                         " continuing anyway: %s", disk.iv_name, snode, msg)
9090
9091     feedback_fn("Removing unneeded volumes on the primary node...")
9092     for idx, disk in enumerate(old_disks):
9093       meta = disk.children[1]
9094       self.cfg.SetDiskID(meta, pnode)
9095       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9096       if msg:
9097         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9098                         " continuing anyway: %s", idx, pnode, msg)
9099
9100
9101   def Exec(self, feedback_fn):
9102     """Modifies an instance.
9103
9104     All parameters take effect only at the next restart of the instance.
9105
9106     """
9107     # Process here the warnings from CheckPrereq, as we don't have a
9108     # feedback_fn there.
9109     for warn in self.warn:
9110       feedback_fn("WARNING: %s" % warn)
9111
9112     result = []
9113     instance = self.instance
9114     # disk changes
9115     for disk_op, disk_dict in self.op.disks:
9116       if disk_op == constants.DDM_REMOVE:
9117         # remove the last disk
9118         device = instance.disks.pop()
9119         device_idx = len(instance.disks)
9120         for node, disk in device.ComputeNodeTree(instance.primary_node):
9121           self.cfg.SetDiskID(disk, node)
9122           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9123           if msg:
9124             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9125                             " continuing anyway", device_idx, node, msg)
9126         result.append(("disk/%d" % device_idx, "remove"))
9127       elif disk_op == constants.DDM_ADD:
9128         # add a new disk
9129         if instance.disk_template == constants.DT_FILE:
9130           file_driver, file_path = instance.disks[0].logical_id
9131           file_path = os.path.dirname(file_path)
9132         else:
9133           file_driver = file_path = None
9134         disk_idx_base = len(instance.disks)
9135         new_disk = _GenerateDiskTemplate(self,
9136                                          instance.disk_template,
9137                                          instance.name, instance.primary_node,
9138                                          instance.secondary_nodes,
9139                                          [disk_dict],
9140                                          file_path,
9141                                          file_driver,
9142                                          disk_idx_base)[0]
9143         instance.disks.append(new_disk)
9144         info = _GetInstanceInfoText(instance)
9145
9146         logging.info("Creating volume %s for instance %s",
9147                      new_disk.iv_name, instance.name)
9148         # Note: this needs to be kept in sync with _CreateDisks
9149         #HARDCODE
9150         for node in instance.all_nodes:
9151           f_create = node == instance.primary_node
9152           try:
9153             _CreateBlockDev(self, node, instance, new_disk,
9154                             f_create, info, f_create)
9155           except errors.OpExecError, err:
9156             self.LogWarning("Failed to create volume %s (%s) on"
9157                             " node %s: %s",
9158                             new_disk.iv_name, new_disk, node, err)
9159         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9160                        (new_disk.size, new_disk.mode)))
9161       else:
9162         # change a given disk
9163         instance.disks[disk_op].mode = disk_dict['mode']
9164         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9165
9166     if self.op.disk_template:
9167       r_shut = _ShutdownInstanceDisks(self, instance)
9168       if not r_shut:
9169         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9170                                  " proceed with disk template conversion")
9171       mode = (instance.disk_template, self.op.disk_template)
9172       try:
9173         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9174       except:
9175         self.cfg.ReleaseDRBDMinors(instance.name)
9176         raise
9177       result.append(("disk_template", self.op.disk_template))
9178
9179     # NIC changes
9180     for nic_op, nic_dict in self.op.nics:
9181       if nic_op == constants.DDM_REMOVE:
9182         # remove the last nic
9183         del instance.nics[-1]
9184         result.append(("nic.%d" % len(instance.nics), "remove"))
9185       elif nic_op == constants.DDM_ADD:
9186         # mac and bridge should be set, by now
9187         mac = nic_dict['mac']
9188         ip = nic_dict.get('ip', None)
9189         nicparams = self.nic_pinst[constants.DDM_ADD]
9190         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9191         instance.nics.append(new_nic)
9192         result.append(("nic.%d" % (len(instance.nics) - 1),
9193                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9194                        (new_nic.mac, new_nic.ip,
9195                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9196                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9197                        )))
9198       else:
9199         for key in 'mac', 'ip':
9200           if key in nic_dict:
9201             setattr(instance.nics[nic_op], key, nic_dict[key])
9202         if nic_op in self.nic_pinst:
9203           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9204         for key, val in nic_dict.iteritems():
9205           result.append(("nic.%s/%d" % (key, nic_op), val))
9206
9207     # hvparams changes
9208     if self.op.hvparams:
9209       instance.hvparams = self.hv_inst
9210       for key, val in self.op.hvparams.iteritems():
9211         result.append(("hv/%s" % key, val))
9212
9213     # beparams changes
9214     if self.op.beparams:
9215       instance.beparams = self.be_inst
9216       for key, val in self.op.beparams.iteritems():
9217         result.append(("be/%s" % key, val))
9218
9219     # OS change
9220     if self.op.os_name:
9221       instance.os = self.op.os_name
9222
9223     # osparams changes
9224     if self.op.osparams:
9225       instance.osparams = self.os_inst
9226       for key, val in self.op.osparams.iteritems():
9227         result.append(("os/%s" % key, val))
9228
9229     self.cfg.Update(instance, feedback_fn)
9230
9231     return result
9232
9233   _DISK_CONVERSIONS = {
9234     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9235     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9236     }
9237
9238
9239 class LUQueryExports(NoHooksLU):
9240   """Query the exports list
9241
9242   """
9243   _OP_PARAMS = [
9244     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9245     ("use_locking", False, _TBool),
9246     ]
9247   REQ_BGL = False
9248
9249   def ExpandNames(self):
9250     self.needed_locks = {}
9251     self.share_locks[locking.LEVEL_NODE] = 1
9252     if not self.op.nodes:
9253       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9254     else:
9255       self.needed_locks[locking.LEVEL_NODE] = \
9256         _GetWantedNodes(self, self.op.nodes)
9257
9258   def Exec(self, feedback_fn):
9259     """Compute the list of all the exported system images.
9260
9261     @rtype: dict
9262     @return: a dictionary with the structure node->(export-list)
9263         where export-list is a list of the instances exported on
9264         that node.
9265
9266     """
9267     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9268     rpcresult = self.rpc.call_export_list(self.nodes)
9269     result = {}
9270     for node in rpcresult:
9271       if rpcresult[node].fail_msg:
9272         result[node] = False
9273       else:
9274         result[node] = rpcresult[node].payload
9275
9276     return result
9277
9278
9279 class LUPrepareExport(NoHooksLU):
9280   """Prepares an instance for an export and returns useful information.
9281
9282   """
9283   _OP_PARAMS = [
9284     _PInstanceName,
9285     ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9286     ]
9287   REQ_BGL = False
9288
9289   def ExpandNames(self):
9290     self._ExpandAndLockInstance()
9291
9292   def CheckPrereq(self):
9293     """Check prerequisites.
9294
9295     """
9296     instance_name = self.op.instance_name
9297
9298     self.instance = self.cfg.GetInstanceInfo(instance_name)
9299     assert self.instance is not None, \
9300           "Cannot retrieve locked instance %s" % self.op.instance_name
9301     _CheckNodeOnline(self, self.instance.primary_node)
9302
9303     self._cds = _GetClusterDomainSecret()
9304
9305   def Exec(self, feedback_fn):
9306     """Prepares an instance for an export.
9307
9308     """
9309     instance = self.instance
9310
9311     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9312       salt = utils.GenerateSecret(8)
9313
9314       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9315       result = self.rpc.call_x509_cert_create(instance.primary_node,
9316                                               constants.RIE_CERT_VALIDITY)
9317       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9318
9319       (name, cert_pem) = result.payload
9320
9321       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9322                                              cert_pem)
9323
9324       return {
9325         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9326         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9327                           salt),
9328         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9329         }
9330
9331     return None
9332
9333
9334 class LUExportInstance(LogicalUnit):
9335   """Export an instance to an image in the cluster.
9336
9337   """
9338   HPATH = "instance-export"
9339   HTYPE = constants.HTYPE_INSTANCE
9340   _OP_PARAMS = [
9341     _PInstanceName,
9342     ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9343     ("shutdown", True, _TBool),
9344     _PShutdownTimeout,
9345     ("remove_instance", False, _TBool),
9346     ("ignore_remove_failures", False, _TBool),
9347     ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9348     ("x509_key_name", None, _TOr(_TList, _TNone)),
9349     ("destination_x509_ca", None, _TMaybeString),
9350     ]
9351   REQ_BGL = False
9352
9353   def CheckArguments(self):
9354     """Check the arguments.
9355
9356     """
9357     self.x509_key_name = self.op.x509_key_name
9358     self.dest_x509_ca_pem = self.op.destination_x509_ca
9359
9360     if self.op.remove_instance and not self.op.shutdown:
9361       raise errors.OpPrereqError("Can not remove instance without shutting it"
9362                                  " down before")
9363
9364     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9365       if not self.x509_key_name:
9366         raise errors.OpPrereqError("Missing X509 key name for encryption",
9367                                    errors.ECODE_INVAL)
9368
9369       if not self.dest_x509_ca_pem:
9370         raise errors.OpPrereqError("Missing destination X509 CA",
9371                                    errors.ECODE_INVAL)
9372
9373   def ExpandNames(self):
9374     self._ExpandAndLockInstance()
9375
9376     # Lock all nodes for local exports
9377     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9378       # FIXME: lock only instance primary and destination node
9379       #
9380       # Sad but true, for now we have do lock all nodes, as we don't know where
9381       # the previous export might be, and in this LU we search for it and
9382       # remove it from its current node. In the future we could fix this by:
9383       #  - making a tasklet to search (share-lock all), then create the
9384       #    new one, then one to remove, after
9385       #  - removing the removal operation altogether
9386       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9387
9388   def DeclareLocks(self, level):
9389     """Last minute lock declaration."""
9390     # All nodes are locked anyway, so nothing to do here.
9391
9392   def BuildHooksEnv(self):
9393     """Build hooks env.
9394
9395     This will run on the master, primary node and target node.
9396
9397     """
9398     env = {
9399       "EXPORT_MODE": self.op.mode,
9400       "EXPORT_NODE": self.op.target_node,
9401       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9402       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9403       # TODO: Generic function for boolean env variables
9404       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9405       }
9406
9407     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9408
9409     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9410
9411     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9412       nl.append(self.op.target_node)
9413
9414     return env, nl, nl
9415
9416   def CheckPrereq(self):
9417     """Check prerequisites.
9418
9419     This checks that the instance and node names are valid.
9420
9421     """
9422     instance_name = self.op.instance_name
9423
9424     self.instance = self.cfg.GetInstanceInfo(instance_name)
9425     assert self.instance is not None, \
9426           "Cannot retrieve locked instance %s" % self.op.instance_name
9427     _CheckNodeOnline(self, self.instance.primary_node)
9428
9429     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9430       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9431       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9432       assert self.dst_node is not None
9433
9434       _CheckNodeOnline(self, self.dst_node.name)
9435       _CheckNodeNotDrained(self, self.dst_node.name)
9436
9437       self._cds = None
9438       self.dest_disk_info = None
9439       self.dest_x509_ca = None
9440
9441     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9442       self.dst_node = None
9443
9444       if len(self.op.target_node) != len(self.instance.disks):
9445         raise errors.OpPrereqError(("Received destination information for %s"
9446                                     " disks, but instance %s has %s disks") %
9447                                    (len(self.op.target_node), instance_name,
9448                                     len(self.instance.disks)),
9449                                    errors.ECODE_INVAL)
9450
9451       cds = _GetClusterDomainSecret()
9452
9453       # Check X509 key name
9454       try:
9455         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9456       except (TypeError, ValueError), err:
9457         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9458
9459       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9460         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9461                                    errors.ECODE_INVAL)
9462
9463       # Load and verify CA
9464       try:
9465         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9466       except OpenSSL.crypto.Error, err:
9467         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9468                                    (err, ), errors.ECODE_INVAL)
9469
9470       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9471       if errcode is not None:
9472         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9473                                    (msg, ), errors.ECODE_INVAL)
9474
9475       self.dest_x509_ca = cert
9476
9477       # Verify target information
9478       disk_info = []
9479       for idx, disk_data in enumerate(self.op.target_node):
9480         try:
9481           (host, port, magic) = \
9482             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9483         except errors.GenericError, err:
9484           raise errors.OpPrereqError("Target info for disk %s: %s" %
9485                                      (idx, err), errors.ECODE_INVAL)
9486
9487         disk_info.append((host, port, magic))
9488
9489       assert len(disk_info) == len(self.op.target_node)
9490       self.dest_disk_info = disk_info
9491
9492     else:
9493       raise errors.ProgrammerError("Unhandled export mode %r" %
9494                                    self.op.mode)
9495
9496     # instance disk type verification
9497     # TODO: Implement export support for file-based disks
9498     for disk in self.instance.disks:
9499       if disk.dev_type == constants.LD_FILE:
9500         raise errors.OpPrereqError("Export not supported for instances with"
9501                                    " file-based disks", errors.ECODE_INVAL)
9502
9503   def _CleanupExports(self, feedback_fn):
9504     """Removes exports of current instance from all other nodes.
9505
9506     If an instance in a cluster with nodes A..D was exported to node C, its
9507     exports will be removed from the nodes A, B and D.
9508
9509     """
9510     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9511
9512     nodelist = self.cfg.GetNodeList()
9513     nodelist.remove(self.dst_node.name)
9514
9515     # on one-node clusters nodelist will be empty after the removal
9516     # if we proceed the backup would be removed because OpQueryExports
9517     # substitutes an empty list with the full cluster node list.
9518     iname = self.instance.name
9519     if nodelist:
9520       feedback_fn("Removing old exports for instance %s" % iname)
9521       exportlist = self.rpc.call_export_list(nodelist)
9522       for node in exportlist:
9523         if exportlist[node].fail_msg:
9524           continue
9525         if iname in exportlist[node].payload:
9526           msg = self.rpc.call_export_remove(node, iname).fail_msg
9527           if msg:
9528             self.LogWarning("Could not remove older export for instance %s"
9529                             " on node %s: %s", iname, node, msg)
9530
9531   def Exec(self, feedback_fn):
9532     """Export an instance to an image in the cluster.
9533
9534     """
9535     assert self.op.mode in constants.EXPORT_MODES
9536
9537     instance = self.instance
9538     src_node = instance.primary_node
9539
9540     if self.op.shutdown:
9541       # shutdown the instance, but not the disks
9542       feedback_fn("Shutting down instance %s" % instance.name)
9543       result = self.rpc.call_instance_shutdown(src_node, instance,
9544                                                self.op.shutdown_timeout)
9545       # TODO: Maybe ignore failures if ignore_remove_failures is set
9546       result.Raise("Could not shutdown instance %s on"
9547                    " node %s" % (instance.name, src_node))
9548
9549     # set the disks ID correctly since call_instance_start needs the
9550     # correct drbd minor to create the symlinks
9551     for disk in instance.disks:
9552       self.cfg.SetDiskID(disk, src_node)
9553
9554     activate_disks = (not instance.admin_up)
9555
9556     if activate_disks:
9557       # Activate the instance disks if we'exporting a stopped instance
9558       feedback_fn("Activating disks for %s" % instance.name)
9559       _StartInstanceDisks(self, instance, None)
9560
9561     try:
9562       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9563                                                      instance)
9564
9565       helper.CreateSnapshots()
9566       try:
9567         if (self.op.shutdown and instance.admin_up and
9568             not self.op.remove_instance):
9569           assert not activate_disks
9570           feedback_fn("Starting instance %s" % instance.name)
9571           result = self.rpc.call_instance_start(src_node, instance, None, None)
9572           msg = result.fail_msg
9573           if msg:
9574             feedback_fn("Failed to start instance: %s" % msg)
9575             _ShutdownInstanceDisks(self, instance)
9576             raise errors.OpExecError("Could not start instance: %s" % msg)
9577
9578         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9579           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9580         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9581           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9582           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9583
9584           (key_name, _, _) = self.x509_key_name
9585
9586           dest_ca_pem = \
9587             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9588                                             self.dest_x509_ca)
9589
9590           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9591                                                      key_name, dest_ca_pem,
9592                                                      timeouts)
9593       finally:
9594         helper.Cleanup()
9595
9596       # Check for backwards compatibility
9597       assert len(dresults) == len(instance.disks)
9598       assert compat.all(isinstance(i, bool) for i in dresults), \
9599              "Not all results are boolean: %r" % dresults
9600
9601     finally:
9602       if activate_disks:
9603         feedback_fn("Deactivating disks for %s" % instance.name)
9604         _ShutdownInstanceDisks(self, instance)
9605
9606     if not (compat.all(dresults) and fin_resu):
9607       failures = []
9608       if not fin_resu:
9609         failures.append("export finalization")
9610       if not compat.all(dresults):
9611         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9612                                if not dsk)
9613         failures.append("disk export: disk(s) %s" % fdsk)
9614
9615       raise errors.OpExecError("Export failed, errors in %s" %
9616                                utils.CommaJoin(failures))
9617
9618     # At this point, the export was successful, we can cleanup/finish
9619
9620     # Remove instance if requested
9621     if self.op.remove_instance:
9622       feedback_fn("Removing instance %s" % instance.name)
9623       _RemoveInstance(self, feedback_fn, instance,
9624                       self.op.ignore_remove_failures)
9625
9626     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9627       self._CleanupExports(feedback_fn)
9628
9629     return fin_resu, dresults
9630
9631
9632 class LURemoveExport(NoHooksLU):
9633   """Remove exports related to the named instance.
9634
9635   """
9636   _OP_PARAMS = [
9637     _PInstanceName,
9638     ]
9639   REQ_BGL = False
9640
9641   def ExpandNames(self):
9642     self.needed_locks = {}
9643     # We need all nodes to be locked in order for RemoveExport to work, but we
9644     # don't need to lock the instance itself, as nothing will happen to it (and
9645     # we can remove exports also for a removed instance)
9646     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9647
9648   def Exec(self, feedback_fn):
9649     """Remove any export.
9650
9651     """
9652     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9653     # If the instance was not found we'll try with the name that was passed in.
9654     # This will only work if it was an FQDN, though.
9655     fqdn_warn = False
9656     if not instance_name:
9657       fqdn_warn = True
9658       instance_name = self.op.instance_name
9659
9660     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9661     exportlist = self.rpc.call_export_list(locked_nodes)
9662     found = False
9663     for node in exportlist:
9664       msg = exportlist[node].fail_msg
9665       if msg:
9666         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9667         continue
9668       if instance_name in exportlist[node].payload:
9669         found = True
9670         result = self.rpc.call_export_remove(node, instance_name)
9671         msg = result.fail_msg
9672         if msg:
9673           logging.error("Could not remove export for instance %s"
9674                         " on node %s: %s", instance_name, node, msg)
9675
9676     if fqdn_warn and not found:
9677       feedback_fn("Export not found. If trying to remove an export belonging"
9678                   " to a deleted instance please use its Fully Qualified"
9679                   " Domain Name.")
9680
9681
9682 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9683   """Generic tags LU.
9684
9685   This is an abstract class which is the parent of all the other tags LUs.
9686
9687   """
9688
9689   def ExpandNames(self):
9690     self.needed_locks = {}
9691     if self.op.kind == constants.TAG_NODE:
9692       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9693       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9694     elif self.op.kind == constants.TAG_INSTANCE:
9695       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9696       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9697
9698   def CheckPrereq(self):
9699     """Check prerequisites.
9700
9701     """
9702     if self.op.kind == constants.TAG_CLUSTER:
9703       self.target = self.cfg.GetClusterInfo()
9704     elif self.op.kind == constants.TAG_NODE:
9705       self.target = self.cfg.GetNodeInfo(self.op.name)
9706     elif self.op.kind == constants.TAG_INSTANCE:
9707       self.target = self.cfg.GetInstanceInfo(self.op.name)
9708     else:
9709       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9710                                  str(self.op.kind), errors.ECODE_INVAL)
9711
9712
9713 class LUGetTags(TagsLU):
9714   """Returns the tags of a given object.
9715
9716   """
9717   _OP_PARAMS = [
9718     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9719     ("name", _NoDefault, _TNonEmptyString),
9720     ]
9721   REQ_BGL = False
9722
9723   def Exec(self, feedback_fn):
9724     """Returns the tag list.
9725
9726     """
9727     return list(self.target.GetTags())
9728
9729
9730 class LUSearchTags(NoHooksLU):
9731   """Searches the tags for a given pattern.
9732
9733   """
9734   _OP_PARAMS = [
9735     ("pattern", _NoDefault, _TNonEmptyString),
9736     ]
9737   REQ_BGL = False
9738
9739   def ExpandNames(self):
9740     self.needed_locks = {}
9741
9742   def CheckPrereq(self):
9743     """Check prerequisites.
9744
9745     This checks the pattern passed for validity by compiling it.
9746
9747     """
9748     try:
9749       self.re = re.compile(self.op.pattern)
9750     except re.error, err:
9751       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9752                                  (self.op.pattern, err), errors.ECODE_INVAL)
9753
9754   def Exec(self, feedback_fn):
9755     """Returns the tag list.
9756
9757     """
9758     cfg = self.cfg
9759     tgts = [("/cluster", cfg.GetClusterInfo())]
9760     ilist = cfg.GetAllInstancesInfo().values()
9761     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9762     nlist = cfg.GetAllNodesInfo().values()
9763     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9764     results = []
9765     for path, target in tgts:
9766       for tag in target.GetTags():
9767         if self.re.search(tag):
9768           results.append((path, tag))
9769     return results
9770
9771
9772 class LUAddTags(TagsLU):
9773   """Sets a tag on a given object.
9774
9775   """
9776   _OP_PARAMS = [
9777     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9778     ("name", _NoDefault, _TNonEmptyString),
9779     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9780     ]
9781   REQ_BGL = False
9782
9783   def CheckPrereq(self):
9784     """Check prerequisites.
9785
9786     This checks the type and length of the tag name and value.
9787
9788     """
9789     TagsLU.CheckPrereq(self)
9790     for tag in self.op.tags:
9791       objects.TaggableObject.ValidateTag(tag)
9792
9793   def Exec(self, feedback_fn):
9794     """Sets the tag.
9795
9796     """
9797     try:
9798       for tag in self.op.tags:
9799         self.target.AddTag(tag)
9800     except errors.TagError, err:
9801       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9802     self.cfg.Update(self.target, feedback_fn)
9803
9804
9805 class LUDelTags(TagsLU):
9806   """Delete a list of tags from a given object.
9807
9808   """
9809   _OP_PARAMS = [
9810     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9811     ("name", _NoDefault, _TNonEmptyString),
9812     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9813     ]
9814   REQ_BGL = False
9815
9816   def CheckPrereq(self):
9817     """Check prerequisites.
9818
9819     This checks that we have the given tag.
9820
9821     """
9822     TagsLU.CheckPrereq(self)
9823     for tag in self.op.tags:
9824       objects.TaggableObject.ValidateTag(tag)
9825     del_tags = frozenset(self.op.tags)
9826     cur_tags = self.target.GetTags()
9827     if not del_tags <= cur_tags:
9828       diff_tags = del_tags - cur_tags
9829       diff_names = ["'%s'" % tag for tag in diff_tags]
9830       diff_names.sort()
9831       raise errors.OpPrereqError("Tag(s) %s not found" %
9832                                  (",".join(diff_names)), errors.ECODE_NOENT)
9833
9834   def Exec(self, feedback_fn):
9835     """Remove the tag from the object.
9836
9837     """
9838     for tag in self.op.tags:
9839       self.target.RemoveTag(tag)
9840     self.cfg.Update(self.target, feedback_fn)
9841
9842
9843 class LUTestDelay(NoHooksLU):
9844   """Sleep for a specified amount of time.
9845
9846   This LU sleeps on the master and/or nodes for a specified amount of
9847   time.
9848
9849   """
9850   _OP_PARAMS = [
9851     ("duration", _NoDefault, _TFloat),
9852     ("on_master", True, _TBool),
9853     ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9854     ("repeat", 0, _TPositiveInt)
9855     ]
9856   REQ_BGL = False
9857
9858   def ExpandNames(self):
9859     """Expand names and set required locks.
9860
9861     This expands the node list, if any.
9862
9863     """
9864     self.needed_locks = {}
9865     if self.op.on_nodes:
9866       # _GetWantedNodes can be used here, but is not always appropriate to use
9867       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9868       # more information.
9869       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9870       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9871
9872   def _TestDelay(self):
9873     """Do the actual sleep.
9874
9875     """
9876     if self.op.on_master:
9877       if not utils.TestDelay(self.op.duration):
9878         raise errors.OpExecError("Error during master delay test")
9879     if self.op.on_nodes:
9880       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9881       for node, node_result in result.items():
9882         node_result.Raise("Failure during rpc call to node %s" % node)
9883
9884   def Exec(self, feedback_fn):
9885     """Execute the test delay opcode, with the wanted repetitions.
9886
9887     """
9888     if self.op.repeat == 0:
9889       self._TestDelay()
9890     else:
9891       top_value = self.op.repeat - 1
9892       for i in range(self.op.repeat):
9893         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9894         self._TestDelay()
9895
9896
9897 class LUTestJobqueue(NoHooksLU):
9898   """Utility LU to test some aspects of the job queue.
9899
9900   """
9901   _OP_PARAMS = [
9902     ("notify_waitlock", False, _TBool),
9903     ("notify_exec", False, _TBool),
9904     ("log_messages", _EmptyList, _TListOf(_TString)),
9905     ("fail", False, _TBool),
9906     ]
9907   REQ_BGL = False
9908
9909   # Must be lower than default timeout for WaitForJobChange to see whether it
9910   # notices changed jobs
9911   _CLIENT_CONNECT_TIMEOUT = 20.0
9912   _CLIENT_CONFIRM_TIMEOUT = 60.0
9913
9914   @classmethod
9915   def _NotifyUsingSocket(cls, cb, errcls):
9916     """Opens a Unix socket and waits for another program to connect.
9917
9918     @type cb: callable
9919     @param cb: Callback to send socket name to client
9920     @type errcls: class
9921     @param errcls: Exception class to use for errors
9922
9923     """
9924     # Using a temporary directory as there's no easy way to create temporary
9925     # sockets without writing a custom loop around tempfile.mktemp and
9926     # socket.bind
9927     tmpdir = tempfile.mkdtemp()
9928     try:
9929       tmpsock = utils.PathJoin(tmpdir, "sock")
9930
9931       logging.debug("Creating temporary socket at %s", tmpsock)
9932       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
9933       try:
9934         sock.bind(tmpsock)
9935         sock.listen(1)
9936
9937         # Send details to client
9938         cb(tmpsock)
9939
9940         # Wait for client to connect before continuing
9941         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
9942         try:
9943           (conn, _) = sock.accept()
9944         except socket.error, err:
9945           raise errcls("Client didn't connect in time (%s)" % err)
9946       finally:
9947         sock.close()
9948     finally:
9949       # Remove as soon as client is connected
9950       shutil.rmtree(tmpdir)
9951
9952     # Wait for client to close
9953     try:
9954       try:
9955         # pylint: disable-msg=E1101
9956         # Instance of '_socketobject' has no ... member
9957         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
9958         conn.recv(1)
9959       except socket.error, err:
9960         raise errcls("Client failed to confirm notification (%s)" % err)
9961     finally:
9962       conn.close()
9963
9964   def _SendNotification(self, test, arg, sockname):
9965     """Sends a notification to the client.
9966
9967     @type test: string
9968     @param test: Test name
9969     @param arg: Test argument (depends on test)
9970     @type sockname: string
9971     @param sockname: Socket path
9972
9973     """
9974     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
9975
9976   def _Notify(self, prereq, test, arg):
9977     """Notifies the client of a test.
9978
9979     @type prereq: bool
9980     @param prereq: Whether this is a prereq-phase test
9981     @type test: string
9982     @param test: Test name
9983     @param arg: Test argument (depends on test)
9984
9985     """
9986     if prereq:
9987       errcls = errors.OpPrereqError
9988     else:
9989       errcls = errors.OpExecError
9990
9991     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
9992                                                   test, arg),
9993                                    errcls)
9994
9995   def CheckArguments(self):
9996     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
9997     self.expandnames_calls = 0
9998
9999   def ExpandNames(self):
10000     checkargs_calls = getattr(self, "checkargs_calls", 0)
10001     if checkargs_calls < 1:
10002       raise errors.ProgrammerError("CheckArguments was not called")
10003
10004     self.expandnames_calls += 1
10005
10006     if self.op.notify_waitlock:
10007       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10008
10009     self.LogInfo("Expanding names")
10010
10011     # Get lock on master node (just to get a lock, not for a particular reason)
10012     self.needed_locks = {
10013       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10014       }
10015
10016   def Exec(self, feedback_fn):
10017     if self.expandnames_calls < 1:
10018       raise errors.ProgrammerError("ExpandNames was not called")
10019
10020     if self.op.notify_exec:
10021       self._Notify(False, constants.JQT_EXEC, None)
10022
10023     self.LogInfo("Executing")
10024
10025     if self.op.log_messages:
10026       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10027       for idx, msg in enumerate(self.op.log_messages):
10028         self.LogInfo("Sending log message %s", idx + 1)
10029         feedback_fn(constants.JQT_MSGPREFIX + msg)
10030         # Report how many test messages have been sent
10031         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10032
10033     if self.op.fail:
10034       raise errors.OpExecError("Opcode failure was requested")
10035
10036     return True
10037
10038
10039 class IAllocator(object):
10040   """IAllocator framework.
10041
10042   An IAllocator instance has three sets of attributes:
10043     - cfg that is needed to query the cluster
10044     - input data (all members of the _KEYS class attribute are required)
10045     - four buffer attributes (in|out_data|text), that represent the
10046       input (to the external script) in text and data structure format,
10047       and the output from it, again in two formats
10048     - the result variables from the script (success, info, nodes) for
10049       easy usage
10050
10051   """
10052   # pylint: disable-msg=R0902
10053   # lots of instance attributes
10054   _ALLO_KEYS = [
10055     "name", "mem_size", "disks", "disk_template",
10056     "os", "tags", "nics", "vcpus", "hypervisor",
10057     ]
10058   _RELO_KEYS = [
10059     "name", "relocate_from",
10060     ]
10061   _EVAC_KEYS = [
10062     "evac_nodes",
10063     ]
10064
10065   def __init__(self, cfg, rpc, mode, **kwargs):
10066     self.cfg = cfg
10067     self.rpc = rpc
10068     # init buffer variables
10069     self.in_text = self.out_text = self.in_data = self.out_data = None
10070     # init all input fields so that pylint is happy
10071     self.mode = mode
10072     self.mem_size = self.disks = self.disk_template = None
10073     self.os = self.tags = self.nics = self.vcpus = None
10074     self.hypervisor = None
10075     self.relocate_from = None
10076     self.name = None
10077     self.evac_nodes = None
10078     # computed fields
10079     self.required_nodes = None
10080     # init result fields
10081     self.success = self.info = self.result = None
10082     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10083       keyset = self._ALLO_KEYS
10084       fn = self._AddNewInstance
10085     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10086       keyset = self._RELO_KEYS
10087       fn = self._AddRelocateInstance
10088     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10089       keyset = self._EVAC_KEYS
10090       fn = self._AddEvacuateNodes
10091     else:
10092       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10093                                    " IAllocator" % self.mode)
10094     for key in kwargs:
10095       if key not in keyset:
10096         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10097                                      " IAllocator" % key)
10098       setattr(self, key, kwargs[key])
10099
10100     for key in keyset:
10101       if key not in kwargs:
10102         raise errors.ProgrammerError("Missing input parameter '%s' to"
10103                                      " IAllocator" % key)
10104     self._BuildInputData(fn)
10105
10106   def _ComputeClusterData(self):
10107     """Compute the generic allocator input data.
10108
10109     This is the data that is independent of the actual operation.
10110
10111     """
10112     cfg = self.cfg
10113     cluster_info = cfg.GetClusterInfo()
10114     # cluster data
10115     data = {
10116       "version": constants.IALLOCATOR_VERSION,
10117       "cluster_name": cfg.GetClusterName(),
10118       "cluster_tags": list(cluster_info.GetTags()),
10119       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10120       # we don't have job IDs
10121       }
10122     iinfo = cfg.GetAllInstancesInfo().values()
10123     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10124
10125     # node data
10126     node_results = {}
10127     node_list = cfg.GetNodeList()
10128
10129     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10130       hypervisor_name = self.hypervisor
10131     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10132       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10133     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10134       hypervisor_name = cluster_info.enabled_hypervisors[0]
10135
10136     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10137                                         hypervisor_name)
10138     node_iinfo = \
10139       self.rpc.call_all_instances_info(node_list,
10140                                        cluster_info.enabled_hypervisors)
10141     for nname, nresult in node_data.items():
10142       # first fill in static (config-based) values
10143       ninfo = cfg.GetNodeInfo(nname)
10144       pnr = {
10145         "tags": list(ninfo.GetTags()),
10146         "primary_ip": ninfo.primary_ip,
10147         "secondary_ip": ninfo.secondary_ip,
10148         "offline": ninfo.offline,
10149         "drained": ninfo.drained,
10150         "master_candidate": ninfo.master_candidate,
10151         }
10152
10153       if not (ninfo.offline or ninfo.drained):
10154         nresult.Raise("Can't get data for node %s" % nname)
10155         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10156                                 nname)
10157         remote_info = nresult.payload
10158
10159         for attr in ['memory_total', 'memory_free', 'memory_dom0',
10160                      'vg_size', 'vg_free', 'cpu_total']:
10161           if attr not in remote_info:
10162             raise errors.OpExecError("Node '%s' didn't return attribute"
10163                                      " '%s'" % (nname, attr))
10164           if not isinstance(remote_info[attr], int):
10165             raise errors.OpExecError("Node '%s' returned invalid value"
10166                                      " for '%s': %s" %
10167                                      (nname, attr, remote_info[attr]))
10168         # compute memory used by primary instances
10169         i_p_mem = i_p_up_mem = 0
10170         for iinfo, beinfo in i_list:
10171           if iinfo.primary_node == nname:
10172             i_p_mem += beinfo[constants.BE_MEMORY]
10173             if iinfo.name not in node_iinfo[nname].payload:
10174               i_used_mem = 0
10175             else:
10176               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10177             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10178             remote_info['memory_free'] -= max(0, i_mem_diff)
10179
10180             if iinfo.admin_up:
10181               i_p_up_mem += beinfo[constants.BE_MEMORY]
10182
10183         # compute memory used by instances
10184         pnr_dyn = {
10185           "total_memory": remote_info['memory_total'],
10186           "reserved_memory": remote_info['memory_dom0'],
10187           "free_memory": remote_info['memory_free'],
10188           "total_disk": remote_info['vg_size'],
10189           "free_disk": remote_info['vg_free'],
10190           "total_cpus": remote_info['cpu_total'],
10191           "i_pri_memory": i_p_mem,
10192           "i_pri_up_memory": i_p_up_mem,
10193           }
10194         pnr.update(pnr_dyn)
10195
10196       node_results[nname] = pnr
10197     data["nodes"] = node_results
10198
10199     # instance data
10200     instance_data = {}
10201     for iinfo, beinfo in i_list:
10202       nic_data = []
10203       for nic in iinfo.nics:
10204         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10205         nic_dict = {"mac": nic.mac,
10206                     "ip": nic.ip,
10207                     "mode": filled_params[constants.NIC_MODE],
10208                     "link": filled_params[constants.NIC_LINK],
10209                    }
10210         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10211           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10212         nic_data.append(nic_dict)
10213       pir = {
10214         "tags": list(iinfo.GetTags()),
10215         "admin_up": iinfo.admin_up,
10216         "vcpus": beinfo[constants.BE_VCPUS],
10217         "memory": beinfo[constants.BE_MEMORY],
10218         "os": iinfo.os,
10219         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10220         "nics": nic_data,
10221         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10222         "disk_template": iinfo.disk_template,
10223         "hypervisor": iinfo.hypervisor,
10224         }
10225       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10226                                                  pir["disks"])
10227       instance_data[iinfo.name] = pir
10228
10229     data["instances"] = instance_data
10230
10231     self.in_data = data
10232
10233   def _AddNewInstance(self):
10234     """Add new instance data to allocator structure.
10235
10236     This in combination with _AllocatorGetClusterData will create the
10237     correct structure needed as input for the allocator.
10238
10239     The checks for the completeness of the opcode must have already been
10240     done.
10241
10242     """
10243     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10244
10245     if self.disk_template in constants.DTS_NET_MIRROR:
10246       self.required_nodes = 2
10247     else:
10248       self.required_nodes = 1
10249     request = {
10250       "name": self.name,
10251       "disk_template": self.disk_template,
10252       "tags": self.tags,
10253       "os": self.os,
10254       "vcpus": self.vcpus,
10255       "memory": self.mem_size,
10256       "disks": self.disks,
10257       "disk_space_total": disk_space,
10258       "nics": self.nics,
10259       "required_nodes": self.required_nodes,
10260       }
10261     return request
10262
10263   def _AddRelocateInstance(self):
10264     """Add relocate instance data to allocator structure.
10265
10266     This in combination with _IAllocatorGetClusterData will create the
10267     correct structure needed as input for the allocator.
10268
10269     The checks for the completeness of the opcode must have already been
10270     done.
10271
10272     """
10273     instance = self.cfg.GetInstanceInfo(self.name)
10274     if instance is None:
10275       raise errors.ProgrammerError("Unknown instance '%s' passed to"
10276                                    " IAllocator" % self.name)
10277
10278     if instance.disk_template not in constants.DTS_NET_MIRROR:
10279       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10280                                  errors.ECODE_INVAL)
10281
10282     if len(instance.secondary_nodes) != 1:
10283       raise errors.OpPrereqError("Instance has not exactly one secondary node",
10284                                  errors.ECODE_STATE)
10285
10286     self.required_nodes = 1
10287     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10288     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10289
10290     request = {
10291       "name": self.name,
10292       "disk_space_total": disk_space,
10293       "required_nodes": self.required_nodes,
10294       "relocate_from": self.relocate_from,
10295       }
10296     return request
10297
10298   def _AddEvacuateNodes(self):
10299     """Add evacuate nodes data to allocator structure.
10300
10301     """
10302     request = {
10303       "evac_nodes": self.evac_nodes
10304       }
10305     return request
10306
10307   def _BuildInputData(self, fn):
10308     """Build input data structures.
10309
10310     """
10311     self._ComputeClusterData()
10312
10313     request = fn()
10314     request["type"] = self.mode
10315     self.in_data["request"] = request
10316
10317     self.in_text = serializer.Dump(self.in_data)
10318
10319   def Run(self, name, validate=True, call_fn=None):
10320     """Run an instance allocator and return the results.
10321
10322     """
10323     if call_fn is None:
10324       call_fn = self.rpc.call_iallocator_runner
10325
10326     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10327     result.Raise("Failure while running the iallocator script")
10328
10329     self.out_text = result.payload
10330     if validate:
10331       self._ValidateResult()
10332
10333   def _ValidateResult(self):
10334     """Process the allocator results.
10335
10336     This will process and if successful save the result in
10337     self.out_data and the other parameters.
10338
10339     """
10340     try:
10341       rdict = serializer.Load(self.out_text)
10342     except Exception, err:
10343       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10344
10345     if not isinstance(rdict, dict):
10346       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10347
10348     # TODO: remove backwards compatiblity in later versions
10349     if "nodes" in rdict and "result" not in rdict:
10350       rdict["result"] = rdict["nodes"]
10351       del rdict["nodes"]
10352
10353     for key in "success", "info", "result":
10354       if key not in rdict:
10355         raise errors.OpExecError("Can't parse iallocator results:"
10356                                  " missing key '%s'" % key)
10357       setattr(self, key, rdict[key])
10358
10359     if not isinstance(rdict["result"], list):
10360       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10361                                " is not a list")
10362     self.out_data = rdict
10363
10364
10365 class LUTestAllocator(NoHooksLU):
10366   """Run allocator tests.
10367
10368   This LU runs the allocator tests
10369
10370   """
10371   _OP_PARAMS = [
10372     ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10373     ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10374     ("name", _NoDefault, _TNonEmptyString),
10375     ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10376       _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10377                _TOr(_TNone, _TNonEmptyString))))),
10378     ("disks", _NoDefault, _TOr(_TNone, _TList)),
10379     ("hypervisor", None, _TMaybeString),
10380     ("allocator", None, _TMaybeString),
10381     ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10382     ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10383     ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10384     ("os", None, _TMaybeString),
10385     ("disk_template", None, _TMaybeString),
10386     ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10387     ]
10388
10389   def CheckPrereq(self):
10390     """Check prerequisites.
10391
10392     This checks the opcode parameters depending on the director and mode test.
10393
10394     """
10395     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10396       for attr in ["mem_size", "disks", "disk_template",
10397                    "os", "tags", "nics", "vcpus"]:
10398         if not hasattr(self.op, attr):
10399           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10400                                      attr, errors.ECODE_INVAL)
10401       iname = self.cfg.ExpandInstanceName(self.op.name)
10402       if iname is not None:
10403         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10404                                    iname, errors.ECODE_EXISTS)
10405       if not isinstance(self.op.nics, list):
10406         raise errors.OpPrereqError("Invalid parameter 'nics'",
10407                                    errors.ECODE_INVAL)
10408       if not isinstance(self.op.disks, list):
10409         raise errors.OpPrereqError("Invalid parameter 'disks'",
10410                                    errors.ECODE_INVAL)
10411       for row in self.op.disks:
10412         if (not isinstance(row, dict) or
10413             "size" not in row or
10414             not isinstance(row["size"], int) or
10415             "mode" not in row or
10416             row["mode"] not in ['r', 'w']):
10417           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10418                                      " parameter", errors.ECODE_INVAL)
10419       if self.op.hypervisor is None:
10420         self.op.hypervisor = self.cfg.GetHypervisorType()
10421     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10422       fname = _ExpandInstanceName(self.cfg, self.op.name)
10423       self.op.name = fname
10424       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10425     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10426       if not hasattr(self.op, "evac_nodes"):
10427         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10428                                    " opcode input", errors.ECODE_INVAL)
10429     else:
10430       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10431                                  self.op.mode, errors.ECODE_INVAL)
10432
10433     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10434       if self.op.allocator is None:
10435         raise errors.OpPrereqError("Missing allocator name",
10436                                    errors.ECODE_INVAL)
10437     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10438       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10439                                  self.op.direction, errors.ECODE_INVAL)
10440
10441   def Exec(self, feedback_fn):
10442     """Run the allocator test.
10443
10444     """
10445     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10446       ial = IAllocator(self.cfg, self.rpc,
10447                        mode=self.op.mode,
10448                        name=self.op.name,
10449                        mem_size=self.op.mem_size,
10450                        disks=self.op.disks,
10451                        disk_template=self.op.disk_template,
10452                        os=self.op.os,
10453                        tags=self.op.tags,
10454                        nics=self.op.nics,
10455                        vcpus=self.op.vcpus,
10456                        hypervisor=self.op.hypervisor,
10457                        )
10458     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10459       ial = IAllocator(self.cfg, self.rpc,
10460                        mode=self.op.mode,
10461                        name=self.op.name,
10462                        relocate_from=list(self.relocate_from),
10463                        )
10464     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10465       ial = IAllocator(self.cfg, self.rpc,
10466                        mode=self.op.mode,
10467                        evac_nodes=self.op.evac_nodes)
10468     else:
10469       raise errors.ProgrammerError("Uncatched mode %s in"
10470                                    " LUTestAllocator.Exec", self.op.mode)
10471
10472     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10473       result = ial.in_text
10474     else:
10475       ial.Run(self.op.allocator, validate=False)
10476       result = ial.out_text
10477     return result