code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42
  43 from ganeti import ssh
  44 from ganeti import utils
  45 from ganeti import errors
  46 from ganeti import hypervisor
  47 from ganeti import locking
  48 from ganeti import constants
  49 from ganeti import objects
  50 from ganeti import serializer
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56
  57 import ganeti.masterd.instance # pylint: disable-msg=W0611
  58
  59
  60 # Modifiable default values; need to define these here before the
  61 # actual LUs
  62
  63 def _EmptyList():
  64   """Returns an empty list.
  65
  66   """
  67   return []
  68
  69
  70 def _EmptyDict():
  71   """Returns an empty dict.
  72
  73   """
  74   return {}
  75
  76
  77 #: The without-default default value
  78 _NoDefault = object()
  79
  80
  81 #: The no-type (value to complex to check it in the type system)
  82 _NoType = object()
  83
  84
  85 # Some basic types
  86 def _TNotNone(val):
  87   """Checks if the given value is not None.
  88
  89   """
  90   return val is not None
  91
  92
  93 def _TNone(val):
  94   """Checks if the given value is None.
  95
  96   """
  97   return val is None
  98
  99
 100 def _TBool(val):
 101   """Checks if the given value is a boolean.
 102
 103   """
 104   return isinstance(val, bool)
 105
 106
 107 def _TInt(val):
 108   """Checks if the given value is an integer.
 109
 110   """
 111   return isinstance(val, int)
 112
 113
 114 def _TFloat(val):
 115   """Checks if the given value is a float.
 116
 117   """
 118   return isinstance(val, float)
 119
 120
 121 def _TString(val):
 122   """Checks if the given value is a string.
 123
 124   """
 125   return isinstance(val, basestring)
 126
 127
 128 def _TTrue(val):
 129   """Checks if a given value evaluates to a boolean True value.
 130
 131   """
 132   return bool(val)
 133
 134
 135 def _TElemOf(target_list):
 136   """Builds a function that checks if a given value is a member of a list.
 137
 138   """
 139   return lambda val: val in target_list
 140
 141
 142 # Container types
 143 def _TList(val):
 144   """Checks if the given value is a list.
 145
 146   """
 147   return isinstance(val, list)
 148
 149
 150 def _TDict(val):
 151   """Checks if the given value is a dictionary.
 152
 153   """
 154   return isinstance(val, dict)
 155
 156
 157 def _TIsLength(size):
 158   """Check is the given container is of the given size.
 159
 160   """
 161   return lambda container: len(container) == size
 162
 163
 164 # Combinator types
 165 def _TAnd(*args):
 166   """Combine multiple functions using an AND operation.
 167
 168   """
 169   def fn(val):
 170     return compat.all(t(val) for t in args)
 171   return fn
 172
 173
 174 def _TOr(*args):
 175   """Combine multiple functions using an AND operation.
 176
 177   """
 178   def fn(val):
 179     return compat.any(t(val) for t in args)
 180   return fn
 181
 182
 183 def _TMap(fn, test):
 184   """Checks that a modified version of the argument passes the given test.
 185
 186   """
 187   return lambda val: test(fn(val))
 188
 189
 190 # Type aliases
 191
 192 #: a non-empty string
 193 _TNonEmptyString = _TAnd(_TString, _TTrue)
 194
 195
 196 #: a maybe non-empty string
 197 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
 198
 199
 200 #: a maybe boolean (bool or none)
 201 _TMaybeBool = _TOr(_TBool, _TNone)
 202
 203
 204 #: a positive integer
 205 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
 206
 207 #: a strictly positive integer
 208 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
 209
 210
 211 def _TListOf(my_type):
 212   """Checks if a given value is a list with all elements of the same type.
 213
 214   """
 215   return _TAnd(_TList,
 216                lambda lst: compat.all(my_type(v) for v in lst))
 217
 218
 219 def _TDictOf(key_type, val_type):
 220   """Checks a dict type for the type of its key/values.
 221
 222   """
 223   return _TAnd(_TDict,
 224                lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
 225                                 and compat.all(val_type(v)
 226                                                for v in my_dict.values())))
 227
 228
 229 # Common opcode attributes
 230
 231 #: output fields for a query operation
 232 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
 233
 234
 235 #: the shutdown timeout
 236 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
 237                      _TPositiveInt)
 238
 239 #: the force parameter
 240 _PForce = ("force", False, _TBool)
 241
 242 #: a required instance name (for single-instance LUs)
 243 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
 244
 245
 246 #: a required node name (for single-node LUs)
 247 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
 248
 249 #: the migration type (live/non-live)
 250 _PMigrationMode = ("mode", None, _TOr(_TNone,
 251                                       _TElemOf(constants.HT_MIGRATION_MODES)))
 252
 253 #: the obsolete 'live' mode (boolean)
 254 _PMigrationLive = ("live", None, _TMaybeBool)
 255
 256
 257 # End types
 258 class LogicalUnit(object):
 259   """Logical Unit base class.
 260
 261   Subclasses must follow these rules:
 262     - implement ExpandNames
 263     - implement CheckPrereq (except when tasklets are used)
 264     - implement Exec (except when tasklets are used)
 265     - implement BuildHooksEnv
 266     - redefine HPATH and HTYPE
 267     - optionally redefine their run requirements:
 268         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 269
 270   Note that all commands require root permissions.
 271
 272   @ivar dry_run_result: the value (if any) that will be returned to the caller
 273       in dry-run mode (signalled by opcode dry_run parameter)
 274   @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
 275       they should get if not already defined, and types they must match
 276
 277   """
 278   HPATH = None
 279   HTYPE = None
 280   _OP_PARAMS = []
 281   REQ_BGL = True
 282
 283   def __init__(self, processor, op, context, rpc):
 284     """Constructor for LogicalUnit.
 285
 286     This needs to be overridden in derived classes in order to check op
 287     validity.
 288
 289     """
 290     self.proc = processor
 291     self.op = op
 292     self.cfg = context.cfg
 293     self.context = context
 294     self.rpc = rpc
 295     # Dicts used to declare locking needs to mcpu
 296     self.needed_locks = None
 297     self.acquired_locks = {}
 298     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 299     self.add_locks = {}
 300     self.remove_locks = {}
 301     # Used to force good behavior when calling helper functions
 302     self.recalculate_locks = {}
 303     self.__ssh = None
 304     # logging
 305     self.Log = processor.Log # pylint: disable-msg=C0103
 306     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 307     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 308     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 309     # support for dry-run
 310     self.dry_run_result = None
 311     # support for generic debug attribute
 312     if (not hasattr(self.op, "debug_level") or
 313         not isinstance(self.op.debug_level, int)):
 314       self.op.debug_level = 0
 315
 316     # Tasklets
 317     self.tasklets = None
 318
 319     # The new kind-of-type-system
 320     op_id = self.op.OP_ID
 321     for attr_name, aval, test in self._OP_PARAMS:
 322       if not hasattr(op, attr_name):
 323         if aval == _NoDefault:
 324           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 325                                      (op_id, attr_name), errors.ECODE_INVAL)
 326         else:
 327           if callable(aval):
 328             dval = aval()
 329           else:
 330             dval = aval
 331           setattr(self.op, attr_name, dval)
 332       attr_val = getattr(op, attr_name)
 333       if test == _NoType:
 334         # no tests here
 335         continue
 336       if not callable(test):
 337         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 338                                      " given type is not a proper type (%s)" %
 339                                      (op_id, attr_name, test))
 340       if not test(attr_val):
 341         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 342                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 343         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 344                                    (op_id, attr_name), errors.ECODE_INVAL)
 345
 346     self.CheckArguments()
 347
 348   def __GetSSH(self):
 349     """Returns the SshRunner object
 350
 351     """
 352     if not self.__ssh:
 353       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 354     return self.__ssh
 355
 356   ssh = property(fget=__GetSSH)
 357
 358   def CheckArguments(self):
 359     """Check syntactic validity for the opcode arguments.
 360
 361     This method is for doing a simple syntactic check and ensure
 362     validity of opcode parameters, without any cluster-related
 363     checks. While the same can be accomplished in ExpandNames and/or
 364     CheckPrereq, doing these separate is better because:
 365
 366       - ExpandNames is left as as purely a lock-related function
 367       - CheckPrereq is run after we have acquired locks (and possible
 368         waited for them)
 369
 370     The function is allowed to change the self.op attribute so that
 371     later methods can no longer worry about missing parameters.
 372
 373     """
 374     pass
 375
 376   def ExpandNames(self):
 377     """Expand names for this LU.
 378
 379     This method is called before starting to execute the opcode, and it should
 380     update all the parameters of the opcode to their canonical form (e.g. a
 381     short node name must be fully expanded after this method has successfully
 382     completed). This way locking, hooks, logging, ecc. can work correctly.
 383
 384     LUs which implement this method must also populate the self.needed_locks
 385     member, as a dict with lock levels as keys, and a list of needed lock names
 386     as values. Rules:
 387
 388       - use an empty dict if you don't need any lock
 389       - if you don't need any lock at a particular level omit that level
 390       - don't put anything for the BGL level
 391       - if you want all locks at a level use locking.ALL_SET as a value
 392
 393     If you need to share locks (rather than acquire them exclusively) at one
 394     level you can modify self.share_locks, setting a true value (usually 1) for
 395     that level. By default locks are not shared.
 396
 397     This function can also define a list of tasklets, which then will be
 398     executed in order instead of the usual LU-level CheckPrereq and Exec
 399     functions, if those are not defined by the LU.
 400
 401     Examples::
 402
 403       # Acquire all nodes and one instance
 404       self.needed_locks = {
 405         locking.LEVEL_NODE: locking.ALL_SET,
 406         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 407       }
 408       # Acquire just two nodes
 409       self.needed_locks = {
 410         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 411       }
 412       # Acquire no locks
 413       self.needed_locks = {} # No, you can't leave it to the default value None
 414
 415     """
 416     # The implementation of this method is mandatory only if the new LU is
 417     # concurrent, so that old LUs don't need to be changed all at the same
 418     # time.
 419     if self.REQ_BGL:
 420       self.needed_locks = {} # Exclusive LUs don't need locks.
 421     else:
 422       raise NotImplementedError
 423
 424   def DeclareLocks(self, level):
 425     """Declare LU locking needs for a level
 426
 427     While most LUs can just declare their locking needs at ExpandNames time,
 428     sometimes there's the need to calculate some locks after having acquired
 429     the ones before. This function is called just before acquiring locks at a
 430     particular level, but after acquiring the ones at lower levels, and permits
 431     such calculations. It can be used to modify self.needed_locks, and by
 432     default it does nothing.
 433
 434     This function is only called if you have something already set in
 435     self.needed_locks for the level.
 436
 437     @param level: Locking level which is going to be locked
 438     @type level: member of ganeti.locking.LEVELS
 439
 440     """
 441
 442   def CheckPrereq(self):
 443     """Check prerequisites for this LU.
 444
 445     This method should check that the prerequisites for the execution
 446     of this LU are fulfilled. It can do internode communication, but
 447     it should be idempotent - no cluster or system changes are
 448     allowed.
 449
 450     The method should raise errors.OpPrereqError in case something is
 451     not fulfilled. Its return value is ignored.
 452
 453     This method should also update all the parameters of the opcode to
 454     their canonical form if it hasn't been done by ExpandNames before.
 455
 456     """
 457     if self.tasklets is not None:
 458       for (idx, tl) in enumerate(self.tasklets):
 459         logging.debug("Checking prerequisites for tasklet %s/%s",
 460                       idx + 1, len(self.tasklets))
 461         tl.CheckPrereq()
 462     else:
 463       pass
 464
 465   def Exec(self, feedback_fn):
 466     """Execute the LU.
 467
 468     This method should implement the actual work. It should raise
 469     errors.OpExecError for failures that are somewhat dealt with in
 470     code, or expected.
 471
 472     """
 473     if self.tasklets is not None:
 474       for (idx, tl) in enumerate(self.tasklets):
 475         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 476         tl.Exec(feedback_fn)
 477     else:
 478       raise NotImplementedError
 479
 480   def BuildHooksEnv(self):
 481     """Build hooks environment for this LU.
 482
 483     This method should return a three-node tuple consisting of: a dict
 484     containing the environment that will be used for running the
 485     specific hook for this LU, a list of node names on which the hook
 486     should run before the execution, and a list of node names on which
 487     the hook should run after the execution.
 488
 489     The keys of the dict must not have 'GANETI_' prefixed as this will
 490     be handled in the hooks runner. Also note additional keys will be
 491     added by the hooks runner. If the LU doesn't define any
 492     environment, an empty dict (and not None) should be returned.
 493
 494     No nodes should be returned as an empty list (and not None).
 495
 496     Note that if the HPATH for a LU class is None, this function will
 497     not be called.
 498
 499     """
 500     raise NotImplementedError
 501
 502   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 503     """Notify the LU about the results of its hooks.
 504
 505     This method is called every time a hooks phase is executed, and notifies
 506     the Logical Unit about the hooks' result. The LU can then use it to alter
 507     its result based on the hooks.  By default the method does nothing and the
 508     previous result is passed back unchanged but any LU can define it if it
 509     wants to use the local cluster hook-scripts somehow.
 510
 511     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 512         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 513     @param hook_results: the results of the multi-node hooks rpc call
 514     @param feedback_fn: function used send feedback back to the caller
 515     @param lu_result: the previous Exec result this LU had, or None
 516         in the PRE phase
 517     @return: the new Exec result, based on the previous result
 518         and hook results
 519
 520     """
 521     # API must be kept, thus we ignore the unused argument and could
 522     # be a function warnings
 523     # pylint: disable-msg=W0613,R0201
 524     return lu_result
 525
 526   def _ExpandAndLockInstance(self):
 527     """Helper function to expand and lock an instance.
 528
 529     Many LUs that work on an instance take its name in self.op.instance_name
 530     and need to expand it and then declare the expanded name for locking. This
 531     function does it, and then updates self.op.instance_name to the expanded
 532     name. It also initializes needed_locks as a dict, if this hasn't been done
 533     before.
 534
 535     """
 536     if self.needed_locks is None:
 537       self.needed_locks = {}
 538     else:
 539       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 540         "_ExpandAndLockInstance called with instance-level locks set"
 541     self.op.instance_name = _ExpandInstanceName(self.cfg,
 542                                                 self.op.instance_name)
 543     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 544
 545   def _LockInstancesNodes(self, primary_only=False):
 546     """Helper function to declare instances' nodes for locking.
 547
 548     This function should be called after locking one or more instances to lock
 549     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 550     with all primary or secondary nodes for instances already locked and
 551     present in self.needed_locks[locking.LEVEL_INSTANCE].
 552
 553     It should be called from DeclareLocks, and for safety only works if
 554     self.recalculate_locks[locking.LEVEL_NODE] is set.
 555
 556     In the future it may grow parameters to just lock some instance's nodes, or
 557     to just lock primaries or secondary nodes, if needed.
 558
 559     If should be called in DeclareLocks in a way similar to::
 560
 561       if level == locking.LEVEL_NODE:
 562         self._LockInstancesNodes()
 563
 564     @type primary_only: boolean
 565     @param primary_only: only lock primary nodes of locked instances
 566
 567     """
 568     assert locking.LEVEL_NODE in self.recalculate_locks, \
 569       "_LockInstancesNodes helper function called with no nodes to recalculate"
 570
 571     # TODO: check if we're really been called with the instance locks held
 572
 573     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 574     # future we might want to have different behaviors depending on the value
 575     # of self.recalculate_locks[locking.LEVEL_NODE]
 576     wanted_nodes = []
 577     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 578       instance = self.context.cfg.GetInstanceInfo(instance_name)
 579       wanted_nodes.append(instance.primary_node)
 580       if not primary_only:
 581         wanted_nodes.extend(instance.secondary_nodes)
 582
 583     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 584       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 585     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 586       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 587
 588     del self.recalculate_locks[locking.LEVEL_NODE]
 589
 590
 591 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 592   """Simple LU which runs no hooks.
 593
 594   This LU is intended as a parent for other LogicalUnits which will
 595   run no hooks, in order to reduce duplicate code.
 596
 597   """
 598   HPATH = None
 599   HTYPE = None
 600
 601   def BuildHooksEnv(self):
 602     """Empty BuildHooksEnv for NoHooksLu.
 603
 604     This just raises an error.
 605
 606     """
 607     assert False, "BuildHooksEnv called for NoHooksLUs"
 608
 609
 610 class Tasklet:
 611   """Tasklet base class.
 612
 613   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 614   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 615   tasklets know nothing about locks.
 616
 617   Subclasses must follow these rules:
 618     - Implement CheckPrereq
 619     - Implement Exec
 620
 621   """
 622   def __init__(self, lu):
 623     self.lu = lu
 624
 625     # Shortcuts
 626     self.cfg = lu.cfg
 627     self.rpc = lu.rpc
 628
 629   def CheckPrereq(self):
 630     """Check prerequisites for this tasklets.
 631
 632     This method should check whether the prerequisites for the execution of
 633     this tasklet are fulfilled. It can do internode communication, but it
 634     should be idempotent - no cluster or system changes are allowed.
 635
 636     The method should raise errors.OpPrereqError in case something is not
 637     fulfilled. Its return value is ignored.
 638
 639     This method should also update all parameters to their canonical form if it
 640     hasn't been done before.
 641
 642     """
 643     pass
 644
 645   def Exec(self, feedback_fn):
 646     """Execute the tasklet.
 647
 648     This method should implement the actual work. It should raise
 649     errors.OpExecError for failures that are somewhat dealt with in code, or
 650     expected.
 651
 652     """
 653     raise NotImplementedError
 654
 655
 656 def _GetWantedNodes(lu, nodes):
 657   """Returns list of checked and expanded node names.
 658
 659   @type lu: L{LogicalUnit}
 660   @param lu: the logical unit on whose behalf we execute
 661   @type nodes: list
 662   @param nodes: list of node names or None for all nodes
 663   @rtype: list
 664   @return: the list of nodes, sorted
 665   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 666
 667   """
 668   if not nodes:
 669     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 670       " non-empty list of nodes whose name is to be expanded.")
 671
 672   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 673   return utils.NiceSort(wanted)
 674
 675
 676 def _GetWantedInstances(lu, instances):
 677   """Returns list of checked and expanded instance names.
 678
 679   @type lu: L{LogicalUnit}
 680   @param lu: the logical unit on whose behalf we execute
 681   @type instances: list
 682   @param instances: list of instance names or None for all instances
 683   @rtype: list
 684   @return: the list of instances, sorted
 685   @raise errors.OpPrereqError: if the instances parameter is wrong type
 686   @raise errors.OpPrereqError: if any of the passed instances is not found
 687
 688   """
 689   if instances:
 690     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 691   else:
 692     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 693   return wanted
 694
 695
 696 def _GetUpdatedParams(old_params, update_dict,
 697                       use_default=True, use_none=False):
 698   """Return the new version of a parameter dictionary.
 699
 700   @type old_params: dict
 701   @param old_params: old parameters
 702   @type update_dict: dict
 703   @param update_dict: dict containing new parameter values, or
 704       constants.VALUE_DEFAULT to reset the parameter to its default
 705       value
 706   @param use_default: boolean
 707   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 708       values as 'to be deleted' values
 709   @param use_none: boolean
 710   @type use_none: whether to recognise C{None} values as 'to be
 711       deleted' values
 712   @rtype: dict
 713   @return: the new parameter dictionary
 714
 715   """
 716   params_copy = copy.deepcopy(old_params)
 717   for key, val in update_dict.iteritems():
 718     if ((use_default and val == constants.VALUE_DEFAULT) or
 719         (use_none and val is None)):
 720       try:
 721         del params_copy[key]
 722       except KeyError:
 723         pass
 724     else:
 725       params_copy[key] = val
 726   return params_copy
 727
 728
 729 def _CheckOutputFields(static, dynamic, selected):
 730   """Checks whether all selected fields are valid.
 731
 732   @type static: L{utils.FieldSet}
 733   @param static: static fields set
 734   @type dynamic: L{utils.FieldSet}
 735   @param dynamic: dynamic fields set
 736
 737   """
 738   f = utils.FieldSet()
 739   f.Extend(static)
 740   f.Extend(dynamic)
 741
 742   delta = f.NonMatching(selected)
 743   if delta:
 744     raise errors.OpPrereqError("Unknown output fields selected: %s"
 745                                % ",".join(delta), errors.ECODE_INVAL)
 746
 747
 748 def _CheckGlobalHvParams(params):
 749   """Validates that given hypervisor params are not global ones.
 750
 751   This will ensure that instances don't get customised versions of
 752   global params.
 753
 754   """
 755   used_globals = constants.HVC_GLOBALS.intersection(params)
 756   if used_globals:
 757     msg = ("The following hypervisor parameters are global and cannot"
 758            " be customized at instance level, please modify them at"
 759            " cluster level: %s" % utils.CommaJoin(used_globals))
 760     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 761
 762
 763 def _CheckNodeOnline(lu, node):
 764   """Ensure that a given node is online.
 765
 766   @param lu: the LU on behalf of which we make the check
 767   @param node: the node to check
 768   @raise errors.OpPrereqError: if the node is offline
 769
 770   """
 771   if lu.cfg.GetNodeInfo(node).offline:
 772     raise errors.OpPrereqError("Can't use offline node %s" % node,
 773                                errors.ECODE_INVAL)
 774
 775
 776 def _CheckNodeNotDrained(lu, node):
 777   """Ensure that a given node is not drained.
 778
 779   @param lu: the LU on behalf of which we make the check
 780   @param node: the node to check
 781   @raise errors.OpPrereqError: if the node is drained
 782
 783   """
 784   if lu.cfg.GetNodeInfo(node).drained:
 785     raise errors.OpPrereqError("Can't use drained node %s" % node,
 786                                errors.ECODE_INVAL)
 787
 788
 789 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 790   """Ensure that a node supports a given OS.
 791
 792   @param lu: the LU on behalf of which we make the check
 793   @param node: the node to check
 794   @param os_name: the OS to query about
 795   @param force_variant: whether to ignore variant errors
 796   @raise errors.OpPrereqError: if the node is not supporting the OS
 797
 798   """
 799   result = lu.rpc.call_os_get(node, os_name)
 800   result.Raise("OS '%s' not in supported OS list for node %s" %
 801                (os_name, node),
 802                prereq=True, ecode=errors.ECODE_INVAL)
 803   if not force_variant:
 804     _CheckOSVariant(result.payload, os_name)
 805
 806
 807 def _RequireFileStorage():
 808   """Checks that file storage is enabled.
 809
 810   @raise errors.OpPrereqError: when file storage is disabled
 811
 812   """
 813   if not constants.ENABLE_FILE_STORAGE:
 814     raise errors.OpPrereqError("File storage disabled at configure time",
 815                                errors.ECODE_INVAL)
 816
 817
 818 def _CheckDiskTemplate(template):
 819   """Ensure a given disk template is valid.
 820
 821   """
 822   if template not in constants.DISK_TEMPLATES:
 823     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 824            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 825     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 826   if template == constants.DT_FILE:
 827     _RequireFileStorage()
 828   return True
 829
 830
 831 def _CheckStorageType(storage_type):
 832   """Ensure a given storage type is valid.
 833
 834   """
 835   if storage_type not in constants.VALID_STORAGE_TYPES:
 836     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 837                                errors.ECODE_INVAL)
 838   if storage_type == constants.ST_FILE:
 839     _RequireFileStorage()
 840   return True
 841
 842
 843 def _GetClusterDomainSecret():
 844   """Reads the cluster domain secret.
 845
 846   """
 847   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 848                                strict=True)
 849
 850
 851 def _CheckInstanceDown(lu, instance, reason):
 852   """Ensure that an instance is not running."""
 853   if instance.admin_up:
 854     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 855                                (instance.name, reason), errors.ECODE_STATE)
 856
 857   pnode = instance.primary_node
 858   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 859   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 860               prereq=True, ecode=errors.ECODE_ENVIRON)
 861
 862   if instance.name in ins_l.payload:
 863     raise errors.OpPrereqError("Instance %s is running, %s" %
 864                                (instance.name, reason), errors.ECODE_STATE)
 865
 866
 867 def _ExpandItemName(fn, name, kind):
 868   """Expand an item name.
 869
 870   @param fn: the function to use for expansion
 871   @param name: requested item name
 872   @param kind: text description ('Node' or 'Instance')
 873   @return: the resolved (full) name
 874   @raise errors.OpPrereqError: if the item is not found
 875
 876   """
 877   full_name = fn(name)
 878   if full_name is None:
 879     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 880                                errors.ECODE_NOENT)
 881   return full_name
 882
 883
 884 def _ExpandNodeName(cfg, name):
 885   """Wrapper over L{_ExpandItemName} for nodes."""
 886   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 887
 888
 889 def _ExpandInstanceName(cfg, name):
 890   """Wrapper over L{_ExpandItemName} for instance."""
 891   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 892
 893
 894 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 895                           memory, vcpus, nics, disk_template, disks,
 896                           bep, hvp, hypervisor_name):
 897   """Builds instance related env variables for hooks
 898
 899   This builds the hook environment from individual variables.
 900
 901   @type name: string
 902   @param name: the name of the instance
 903   @type primary_node: string
 904   @param primary_node: the name of the instance's primary node
 905   @type secondary_nodes: list
 906   @param secondary_nodes: list of secondary nodes as strings
 907   @type os_type: string
 908   @param os_type: the name of the instance's OS
 909   @type status: boolean
 910   @param status: the should_run status of the instance
 911   @type memory: string
 912   @param memory: the memory size of the instance
 913   @type vcpus: string
 914   @param vcpus: the count of VCPUs the instance has
 915   @type nics: list
 916   @param nics: list of tuples (ip, mac, mode, link) representing
 917       the NICs the instance has
 918   @type disk_template: string
 919   @param disk_template: the disk template of the instance
 920   @type disks: list
 921   @param disks: the list of (size, mode) pairs
 922   @type bep: dict
 923   @param bep: the backend parameters for the instance
 924   @type hvp: dict
 925   @param hvp: the hypervisor parameters for the instance
 926   @type hypervisor_name: string
 927   @param hypervisor_name: the hypervisor for the instance
 928   @rtype: dict
 929   @return: the hook environment for this instance
 930
 931   """
 932   if status:
 933     str_status = "up"
 934   else:
 935     str_status = "down"
 936   env = {
 937     "OP_TARGET": name,
 938     "INSTANCE_NAME": name,
 939     "INSTANCE_PRIMARY": primary_node,
 940     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 941     "INSTANCE_OS_TYPE": os_type,
 942     "INSTANCE_STATUS": str_status,
 943     "INSTANCE_MEMORY": memory,
 944     "INSTANCE_VCPUS": vcpus,
 945     "INSTANCE_DISK_TEMPLATE": disk_template,
 946     "INSTANCE_HYPERVISOR": hypervisor_name,
 947   }
 948
 949   if nics:
 950     nic_count = len(nics)
 951     for idx, (ip, mac, mode, link) in enumerate(nics):
 952       if ip is None:
 953         ip = ""
 954       env["INSTANCE_NIC%d_IP" % idx] = ip
 955       env["INSTANCE_NIC%d_MAC" % idx] = mac
 956       env["INSTANCE_NIC%d_MODE" % idx] = mode
 957       env["INSTANCE_NIC%d_LINK" % idx] = link
 958       if mode == constants.NIC_MODE_BRIDGED:
 959         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 960   else:
 961     nic_count = 0
 962
 963   env["INSTANCE_NIC_COUNT"] = nic_count
 964
 965   if disks:
 966     disk_count = len(disks)
 967     for idx, (size, mode) in enumerate(disks):
 968       env["INSTANCE_DISK%d_SIZE" % idx] = size
 969       env["INSTANCE_DISK%d_MODE" % idx] = mode
 970   else:
 971     disk_count = 0
 972
 973   env["INSTANCE_DISK_COUNT"] = disk_count
 974
 975   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 976     for key, value in source.items():
 977       env["INSTANCE_%s_%s" % (kind, key)] = value
 978
 979   return env
 980
 981
 982 def _NICListToTuple(lu, nics):
 983   """Build a list of nic information tuples.
 984
 985   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 986   value in LUQueryInstanceData.
 987
 988   @type lu:  L{LogicalUnit}
 989   @param lu: the logical unit on whose behalf we execute
 990   @type nics: list of L{objects.NIC}
 991   @param nics: list of nics to convert to hooks tuples
 992
 993   """
 994   hooks_nics = []
 995   cluster = lu.cfg.GetClusterInfo()
 996   for nic in nics:
 997     ip = nic.ip
 998     mac = nic.mac
 999     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000     mode = filled_params[constants.NIC_MODE]
1001     link = filled_params[constants.NIC_LINK]
1002     hooks_nics.append((ip, mac, mode, link))
1003   return hooks_nics
1004
1005
1006 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007   """Builds instance related env variables for hooks from an object.
1008
1009   @type lu: L{LogicalUnit}
1010   @param lu: the logical unit on whose behalf we execute
1011   @type instance: L{objects.Instance}
1012   @param instance: the instance for which we should build the
1013       environment
1014   @type override: dict
1015   @param override: dictionary with key/values that will override
1016       our values
1017   @rtype: dict
1018   @return: the hook environment dictionary
1019
1020   """
1021   cluster = lu.cfg.GetClusterInfo()
1022   bep = cluster.FillBE(instance)
1023   hvp = cluster.FillHV(instance)
1024   args = {
1025     'name': instance.name,
1026     'primary_node': instance.primary_node,
1027     'secondary_nodes': instance.secondary_nodes,
1028     'os_type': instance.os,
1029     'status': instance.admin_up,
1030     'memory': bep[constants.BE_MEMORY],
1031     'vcpus': bep[constants.BE_VCPUS],
1032     'nics': _NICListToTuple(lu, instance.nics),
1033     'disk_template': instance.disk_template,
1034     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1035     'bep': bep,
1036     'hvp': hvp,
1037     'hypervisor_name': instance.hypervisor,
1038   }
1039   if override:
1040     args.update(override)
1041   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1042
1043
1044 def _AdjustCandidatePool(lu, exceptions):
1045   """Adjust the candidate pool after node operations.
1046
1047   """
1048   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1049   if mod_list:
1050     lu.LogInfo("Promoted nodes to master candidate role: %s",
1051                utils.CommaJoin(node.name for node in mod_list))
1052     for name in mod_list:
1053       lu.context.ReaddNode(name)
1054   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1055   if mc_now > mc_max:
1056     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1057                (mc_now, mc_max))
1058
1059
1060 def _DecideSelfPromotion(lu, exceptions=None):
1061   """Decide whether I should promote myself as a master candidate.
1062
1063   """
1064   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1065   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1066   # the new node will increase mc_max with one, so:
1067   mc_should = min(mc_should + 1, cp_size)
1068   return mc_now < mc_should
1069
1070
1071 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072   """Check that the brigdes needed by a list of nics exist.
1073
1074   """
1075   cluster = lu.cfg.GetClusterInfo()
1076   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1077   brlist = [params[constants.NIC_LINK] for params in paramslist
1078             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1079   if brlist:
1080     result = lu.rpc.call_bridges_exist(target_node, brlist)
1081     result.Raise("Error checking bridges on destination node '%s'" %
1082                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1083
1084
1085 def _CheckInstanceBridgesExist(lu, instance, node=None):
1086   """Check that the brigdes needed by an instance exist.
1087
1088   """
1089   if node is None:
1090     node = instance.primary_node
1091   _CheckNicsBridgesExist(lu, instance.nics, node)
1092
1093
1094 def _CheckOSVariant(os_obj, name):
1095   """Check whether an OS name conforms to the os variants specification.
1096
1097   @type os_obj: L{objects.OS}
1098   @param os_obj: OS object to check
1099   @type name: string
1100   @param name: OS name passed by the user, to check for validity
1101
1102   """
1103   if not os_obj.supported_variants:
1104     return
1105   variant = objects.OS.GetVariant(name)
1106   if not variant:
1107     raise errors.OpPrereqError("OS name must include a variant",
1108                                errors.ECODE_INVAL)
1109
1110   if variant not in os_obj.supported_variants:
1111     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1112
1113
1114 def _GetNodeInstancesInner(cfg, fn):
1115   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1116
1117
1118 def _GetNodeInstances(cfg, node_name):
1119   """Returns a list of all primary and secondary instances on a node.
1120
1121   """
1122
1123   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1124
1125
1126 def _GetNodePrimaryInstances(cfg, node_name):
1127   """Returns primary instances on a node.
1128
1129   """
1130   return _GetNodeInstancesInner(cfg,
1131                                 lambda inst: node_name == inst.primary_node)
1132
1133
1134 def _GetNodeSecondaryInstances(cfg, node_name):
1135   """Returns secondary instances on a node.
1136
1137   """
1138   return _GetNodeInstancesInner(cfg,
1139                                 lambda inst: node_name in inst.secondary_nodes)
1140
1141
1142 def _GetStorageTypeArgs(cfg, storage_type):
1143   """Returns the arguments for a storage type.
1144
1145   """
1146   # Special case for file storage
1147   if storage_type == constants.ST_FILE:
1148     # storage.FileStorage wants a list of storage directories
1149     return [[cfg.GetFileStorageDir()]]
1150
1151   return []
1152
1153
1154 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1155   faulty = []
1156
1157   for dev in instance.disks:
1158     cfg.SetDiskID(dev, node_name)
1159
1160   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161   result.Raise("Failed to get disk status from node %s" % node_name,
1162                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1163
1164   for idx, bdev_status in enumerate(result.payload):
1165     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1166       faulty.append(idx)
1167
1168   return faulty
1169
1170
1171 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172   """Check the sanity of iallocator and node arguments and use the
1173   cluster-wide iallocator if appropriate.
1174
1175   Check that at most one of (iallocator, node) is specified. If none is
1176   specified, then the LU's opcode's iallocator slot is filled with the
1177   cluster-wide default iallocator.
1178
1179   @type iallocator_slot: string
1180   @param iallocator_slot: the name of the opcode iallocator slot
1181   @type node_slot: string
1182   @param node_slot: the name of the opcode target node slot
1183
1184   """
1185   node = getattr(lu.op, node_slot, None)
1186   iallocator = getattr(lu.op, iallocator_slot, None)
1187
1188   if node is not None and iallocator is not None:
1189     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1190                                errors.ECODE_INVAL)
1191   elif node is None and iallocator is None:
1192     default_iallocator = lu.cfg.GetDefaultIAllocator()
1193     if default_iallocator:
1194       setattr(lu.op, iallocator_slot, default_iallocator)
1195     else:
1196       raise errors.OpPrereqError("No iallocator or node given and no"
1197                                  " cluster-wide default iallocator found."
1198                                  " Please specify either an iallocator or a"
1199                                  " node, or set a cluster-wide default"
1200                                  " iallocator.")
1201
1202
1203 class LUPostInitCluster(LogicalUnit):
1204   """Logical unit for running hooks after cluster initialization.
1205
1206   """
1207   HPATH = "cluster-init"
1208   HTYPE = constants.HTYPE_CLUSTER
1209
1210   def BuildHooksEnv(self):
1211     """Build hooks env.
1212
1213     """
1214     env = {"OP_TARGET": self.cfg.GetClusterName()}
1215     mn = self.cfg.GetMasterNode()
1216     return env, [], [mn]
1217
1218   def Exec(self, feedback_fn):
1219     """Nothing to do.
1220
1221     """
1222     return True
1223
1224
1225 class LUDestroyCluster(LogicalUnit):
1226   """Logical unit for destroying the cluster.
1227
1228   """
1229   HPATH = "cluster-destroy"
1230   HTYPE = constants.HTYPE_CLUSTER
1231
1232   def BuildHooksEnv(self):
1233     """Build hooks env.
1234
1235     """
1236     env = {"OP_TARGET": self.cfg.GetClusterName()}
1237     return env, [], []
1238
1239   def CheckPrereq(self):
1240     """Check prerequisites.
1241
1242     This checks whether the cluster is empty.
1243
1244     Any errors are signaled by raising errors.OpPrereqError.
1245
1246     """
1247     master = self.cfg.GetMasterNode()
1248
1249     nodelist = self.cfg.GetNodeList()
1250     if len(nodelist) != 1 or nodelist[0] != master:
1251       raise errors.OpPrereqError("There are still %d node(s) in"
1252                                  " this cluster." % (len(nodelist) - 1),
1253                                  errors.ECODE_INVAL)
1254     instancelist = self.cfg.GetInstanceList()
1255     if instancelist:
1256       raise errors.OpPrereqError("There are still %d instance(s) in"
1257                                  " this cluster." % len(instancelist),
1258                                  errors.ECODE_INVAL)
1259
1260   def Exec(self, feedback_fn):
1261     """Destroys the cluster.
1262
1263     """
1264     master = self.cfg.GetMasterNode()
1265
1266     # Run post hooks on master node before it's removed
1267     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1268     try:
1269       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1270     except:
1271       # pylint: disable-msg=W0702
1272       self.LogWarning("Errors occurred running hooks on %s" % master)
1273
1274     result = self.rpc.call_node_stop_master(master, False)
1275     result.Raise("Could not disable the master role")
1276
1277     return master
1278
1279
1280 def _VerifyCertificate(filename):
1281   """Verifies a certificate for LUVerifyCluster.
1282
1283   @type filename: string
1284   @param filename: Path to PEM file
1285
1286   """
1287   try:
1288     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1289                                            utils.ReadFile(filename))
1290   except Exception, err: # pylint: disable-msg=W0703
1291     return (LUVerifyCluster.ETYPE_ERROR,
1292             "Failed to load X509 certificate %s: %s" % (filename, err))
1293
1294   (errcode, msg) = \
1295     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1296                                 constants.SSL_CERT_EXPIRATION_ERROR)
1297
1298   if msg:
1299     fnamemsg = "While verifying %s: %s" % (filename, msg)
1300   else:
1301     fnamemsg = None
1302
1303   if errcode is None:
1304     return (None, fnamemsg)
1305   elif errcode == utils.CERT_WARNING:
1306     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1307   elif errcode == utils.CERT_ERROR:
1308     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1309
1310   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1311
1312
1313 class LUVerifyCluster(LogicalUnit):
1314   """Verifies the cluster status.
1315
1316   """
1317   HPATH = "cluster-verify"
1318   HTYPE = constants.HTYPE_CLUSTER
1319   _OP_PARAMS = [
1320     ("skip_checks", _EmptyList,
1321      _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1322     ("verbose", False, _TBool),
1323     ("error_codes", False, _TBool),
1324     ("debug_simulate_errors", False, _TBool),
1325     ]
1326   REQ_BGL = False
1327
1328   TCLUSTER = "cluster"
1329   TNODE = "node"
1330   TINSTANCE = "instance"
1331
1332   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1333   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1334   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1335   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1336   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1337   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1338   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1339   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1340   ENODEDRBD = (TNODE, "ENODEDRBD")
1341   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1342   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1343   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1344   ENODEHV = (TNODE, "ENODEHV")
1345   ENODELVM = (TNODE, "ENODELVM")
1346   ENODEN1 = (TNODE, "ENODEN1")
1347   ENODENET = (TNODE, "ENODENET")
1348   ENODEOS = (TNODE, "ENODEOS")
1349   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1350   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1351   ENODERPC = (TNODE, "ENODERPC")
1352   ENODESSH = (TNODE, "ENODESSH")
1353   ENODEVERSION = (TNODE, "ENODEVERSION")
1354   ENODESETUP = (TNODE, "ENODESETUP")
1355   ENODETIME = (TNODE, "ENODETIME")
1356
1357   ETYPE_FIELD = "code"
1358   ETYPE_ERROR = "ERROR"
1359   ETYPE_WARNING = "WARNING"
1360
1361   class NodeImage(object):
1362     """A class representing the logical and physical status of a node.
1363
1364     @type name: string
1365     @ivar name: the node name to which this object refers
1366     @ivar volumes: a structure as returned from
1367         L{ganeti.backend.GetVolumeList} (runtime)
1368     @ivar instances: a list of running instances (runtime)
1369     @ivar pinst: list of configured primary instances (config)
1370     @ivar sinst: list of configured secondary instances (config)
1371     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1372         of this node (config)
1373     @ivar mfree: free memory, as reported by hypervisor (runtime)
1374     @ivar dfree: free disk, as reported by the node (runtime)
1375     @ivar offline: the offline status (config)
1376     @type rpc_fail: boolean
1377     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1378         not whether the individual keys were correct) (runtime)
1379     @type lvm_fail: boolean
1380     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1381     @type hyp_fail: boolean
1382     @ivar hyp_fail: whether the RPC call didn't return the instance list
1383     @type ghost: boolean
1384     @ivar ghost: whether this is a known node or not (config)
1385     @type os_fail: boolean
1386     @ivar os_fail: whether the RPC call didn't return valid OS data
1387     @type oslist: list
1388     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1389
1390     """
1391     def __init__(self, offline=False, name=None):
1392       self.name = name
1393       self.volumes = {}
1394       self.instances = []
1395       self.pinst = []
1396       self.sinst = []
1397       self.sbp = {}
1398       self.mfree = 0
1399       self.dfree = 0
1400       self.offline = offline
1401       self.rpc_fail = False
1402       self.lvm_fail = False
1403       self.hyp_fail = False
1404       self.ghost = False
1405       self.os_fail = False
1406       self.oslist = {}
1407
1408   def ExpandNames(self):
1409     self.needed_locks = {
1410       locking.LEVEL_NODE: locking.ALL_SET,
1411       locking.LEVEL_INSTANCE: locking.ALL_SET,
1412     }
1413     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1414
1415   def _Error(self, ecode, item, msg, *args, **kwargs):
1416     """Format an error message.
1417
1418     Based on the opcode's error_codes parameter, either format a
1419     parseable error code, or a simpler error string.
1420
1421     This must be called only from Exec and functions called from Exec.
1422
1423     """
1424     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1425     itype, etxt = ecode
1426     # first complete the msg
1427     if args:
1428       msg = msg % args
1429     # then format the whole message
1430     if self.op.error_codes:
1431       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1432     else:
1433       if item:
1434         item = " " + item
1435       else:
1436         item = ""
1437       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1438     # and finally report it via the feedback_fn
1439     self._feedback_fn("  - %s" % msg)
1440
1441   def _ErrorIf(self, cond, *args, **kwargs):
1442     """Log an error message if the passed condition is True.
1443
1444     """
1445     cond = bool(cond) or self.op.debug_simulate_errors
1446     if cond:
1447       self._Error(*args, **kwargs)
1448     # do not mark the operation as failed for WARN cases only
1449     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1450       self.bad = self.bad or cond
1451
1452   def _VerifyNode(self, ninfo, nresult):
1453     """Perform some basic validation on data returned from a node.
1454
1455       - check the result data structure is well formed and has all the
1456         mandatory fields
1457       - check ganeti version
1458
1459     @type ninfo: L{objects.Node}
1460     @param ninfo: the node to check
1461     @param nresult: the results from the node
1462     @rtype: boolean
1463     @return: whether overall this call was successful (and we can expect
1464          reasonable values in the respose)
1465
1466     """
1467     node = ninfo.name
1468     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1469
1470     # main result, nresult should be a non-empty dict
1471     test = not nresult or not isinstance(nresult, dict)
1472     _ErrorIf(test, self.ENODERPC, node,
1473                   "unable to verify node: no data returned")
1474     if test:
1475       return False
1476
1477     # compares ganeti version
1478     local_version = constants.PROTOCOL_VERSION
1479     remote_version = nresult.get("version", None)
1480     test = not (remote_version and
1481                 isinstance(remote_version, (list, tuple)) and
1482                 len(remote_version) == 2)
1483     _ErrorIf(test, self.ENODERPC, node,
1484              "connection to node returned invalid data")
1485     if test:
1486       return False
1487
1488     test = local_version != remote_version[0]
1489     _ErrorIf(test, self.ENODEVERSION, node,
1490              "incompatible protocol versions: master %s,"
1491              " node %s", local_version, remote_version[0])
1492     if test:
1493       return False
1494
1495     # node seems compatible, we can actually try to look into its results
1496
1497     # full package version
1498     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1499                   self.ENODEVERSION, node,
1500                   "software version mismatch: master %s, node %s",
1501                   constants.RELEASE_VERSION, remote_version[1],
1502                   code=self.ETYPE_WARNING)
1503
1504     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1505     if isinstance(hyp_result, dict):
1506       for hv_name, hv_result in hyp_result.iteritems():
1507         test = hv_result is not None
1508         _ErrorIf(test, self.ENODEHV, node,
1509                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1510
1511
1512     test = nresult.get(constants.NV_NODESETUP,
1513                            ["Missing NODESETUP results"])
1514     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1515              "; ".join(test))
1516
1517     return True
1518
1519   def _VerifyNodeTime(self, ninfo, nresult,
1520                       nvinfo_starttime, nvinfo_endtime):
1521     """Check the node time.
1522
1523     @type ninfo: L{objects.Node}
1524     @param ninfo: the node to check
1525     @param nresult: the remote results for the node
1526     @param nvinfo_starttime: the start time of the RPC call
1527     @param nvinfo_endtime: the end time of the RPC call
1528
1529     """
1530     node = ninfo.name
1531     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1532
1533     ntime = nresult.get(constants.NV_TIME, None)
1534     try:
1535       ntime_merged = utils.MergeTime(ntime)
1536     except (ValueError, TypeError):
1537       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1538       return
1539
1540     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1541       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1542     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1543       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1544     else:
1545       ntime_diff = None
1546
1547     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1548              "Node time diverges by at least %s from master node time",
1549              ntime_diff)
1550
1551   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1552     """Check the node time.
1553
1554     @type ninfo: L{objects.Node}
1555     @param ninfo: the node to check
1556     @param nresult: the remote results for the node
1557     @param vg_name: the configured VG name
1558
1559     """
1560     if vg_name is None:
1561       return
1562
1563     node = ninfo.name
1564     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1565
1566     # checks vg existence and size > 20G
1567     vglist = nresult.get(constants.NV_VGLIST, None)
1568     test = not vglist
1569     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1570     if not test:
1571       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1572                                             constants.MIN_VG_SIZE)
1573       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1574
1575     # check pv names
1576     pvlist = nresult.get(constants.NV_PVLIST, None)
1577     test = pvlist is None
1578     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1579     if not test:
1580       # check that ':' is not present in PV names, since it's a
1581       # special character for lvcreate (denotes the range of PEs to
1582       # use on the PV)
1583       for _, pvname, owner_vg in pvlist:
1584         test = ":" in pvname
1585         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1586                  " '%s' of VG '%s'", pvname, owner_vg)
1587
1588   def _VerifyNodeNetwork(self, ninfo, nresult):
1589     """Check the node time.
1590
1591     @type ninfo: L{objects.Node}
1592     @param ninfo: the node to check
1593     @param nresult: the remote results for the node
1594
1595     """
1596     node = ninfo.name
1597     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1598
1599     test = constants.NV_NODELIST not in nresult
1600     _ErrorIf(test, self.ENODESSH, node,
1601              "node hasn't returned node ssh connectivity data")
1602     if not test:
1603       if nresult[constants.NV_NODELIST]:
1604         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1605           _ErrorIf(True, self.ENODESSH, node,
1606                    "ssh communication with node '%s': %s", a_node, a_msg)
1607
1608     test = constants.NV_NODENETTEST not in nresult
1609     _ErrorIf(test, self.ENODENET, node,
1610              "node hasn't returned node tcp connectivity data")
1611     if not test:
1612       if nresult[constants.NV_NODENETTEST]:
1613         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1614         for anode in nlist:
1615           _ErrorIf(True, self.ENODENET, node,
1616                    "tcp communication with node '%s': %s",
1617                    anode, nresult[constants.NV_NODENETTEST][anode])
1618
1619     test = constants.NV_MASTERIP not in nresult
1620     _ErrorIf(test, self.ENODENET, node,
1621              "node hasn't returned node master IP reachability data")
1622     if not test:
1623       if not nresult[constants.NV_MASTERIP]:
1624         if node == self.master_node:
1625           msg = "the master node cannot reach the master IP (not configured?)"
1626         else:
1627           msg = "cannot reach the master IP"
1628         _ErrorIf(True, self.ENODENET, node, msg)
1629
1630
1631   def _VerifyInstance(self, instance, instanceconfig, node_image):
1632     """Verify an instance.
1633
1634     This function checks to see if the required block devices are
1635     available on the instance's node.
1636
1637     """
1638     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1639     node_current = instanceconfig.primary_node
1640
1641     node_vol_should = {}
1642     instanceconfig.MapLVsByNode(node_vol_should)
1643
1644     for node in node_vol_should:
1645       n_img = node_image[node]
1646       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1647         # ignore missing volumes on offline or broken nodes
1648         continue
1649       for volume in node_vol_should[node]:
1650         test = volume not in n_img.volumes
1651         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1652                  "volume %s missing on node %s", volume, node)
1653
1654     if instanceconfig.admin_up:
1655       pri_img = node_image[node_current]
1656       test = instance not in pri_img.instances and not pri_img.offline
1657       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1658                "instance not running on its primary node %s",
1659                node_current)
1660
1661     for node, n_img in node_image.items():
1662       if (not node == node_current):
1663         test = instance in n_img.instances
1664         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1665                  "instance should not run on node %s", node)
1666
1667   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1668     """Verify if there are any unknown volumes in the cluster.
1669
1670     The .os, .swap and backup volumes are ignored. All other volumes are
1671     reported as unknown.
1672
1673     @type reserved: L{ganeti.utils.FieldSet}
1674     @param reserved: a FieldSet of reserved volume names
1675
1676     """
1677     for node, n_img in node_image.items():
1678       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1679         # skip non-healthy nodes
1680         continue
1681       for volume in n_img.volumes:
1682         test = ((node not in node_vol_should or
1683                 volume not in node_vol_should[node]) and
1684                 not reserved.Matches(volume))
1685         self._ErrorIf(test, self.ENODEORPHANLV, node,
1686                       "volume %s is unknown", volume)
1687
1688   def _VerifyOrphanInstances(self, instancelist, node_image):
1689     """Verify the list of running instances.
1690
1691     This checks what instances are running but unknown to the cluster.
1692
1693     """
1694     for node, n_img in node_image.items():
1695       for o_inst in n_img.instances:
1696         test = o_inst not in instancelist
1697         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1698                       "instance %s on node %s should not exist", o_inst, node)
1699
1700   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1701     """Verify N+1 Memory Resilience.
1702
1703     Check that if one single node dies we can still start all the
1704     instances it was primary for.
1705
1706     """
1707     for node, n_img in node_image.items():
1708       # This code checks that every node which is now listed as
1709       # secondary has enough memory to host all instances it is
1710       # supposed to should a single other node in the cluster fail.
1711       # FIXME: not ready for failover to an arbitrary node
1712       # FIXME: does not support file-backed instances
1713       # WARNING: we currently take into account down instances as well
1714       # as up ones, considering that even if they're down someone
1715       # might want to start them even in the event of a node failure.
1716       for prinode, instances in n_img.sbp.items():
1717         needed_mem = 0
1718         for instance in instances:
1719           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1720           if bep[constants.BE_AUTO_BALANCE]:
1721             needed_mem += bep[constants.BE_MEMORY]
1722         test = n_img.mfree < needed_mem
1723         self._ErrorIf(test, self.ENODEN1, node,
1724                       "not enough memory on to accommodate"
1725                       " failovers should peer node %s fail", prinode)
1726
1727   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1728                        master_files):
1729     """Verifies and computes the node required file checksums.
1730
1731     @type ninfo: L{objects.Node}
1732     @param ninfo: the node to check
1733     @param nresult: the remote results for the node
1734     @param file_list: required list of files
1735     @param local_cksum: dictionary of local files and their checksums
1736     @param master_files: list of files that only masters should have
1737
1738     """
1739     node = ninfo.name
1740     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1741
1742     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1743     test = not isinstance(remote_cksum, dict)
1744     _ErrorIf(test, self.ENODEFILECHECK, node,
1745              "node hasn't returned file checksum data")
1746     if test:
1747       return
1748
1749     for file_name in file_list:
1750       node_is_mc = ninfo.master_candidate
1751       must_have = (file_name not in master_files) or node_is_mc
1752       # missing
1753       test1 = file_name not in remote_cksum
1754       # invalid checksum
1755       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1756       # existing and good
1757       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1758       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1759                "file '%s' missing", file_name)
1760       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1761                "file '%s' has wrong checksum", file_name)
1762       # not candidate and this is not a must-have file
1763       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1764                "file '%s' should not exist on non master"
1765                " candidates (and the file is outdated)", file_name)
1766       # all good, except non-master/non-must have combination
1767       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1768                "file '%s' should not exist"
1769                " on non master candidates", file_name)
1770
1771   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1772                       drbd_map):
1773     """Verifies and the node DRBD status.
1774
1775     @type ninfo: L{objects.Node}
1776     @param ninfo: the node to check
1777     @param nresult: the remote results for the node
1778     @param instanceinfo: the dict of instances
1779     @param drbd_helper: the configured DRBD usermode helper
1780     @param drbd_map: the DRBD map as returned by
1781         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1782
1783     """
1784     node = ninfo.name
1785     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1786
1787     if drbd_helper:
1788       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1789       test = (helper_result == None)
1790       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1791                "no drbd usermode helper returned")
1792       if helper_result:
1793         status, payload = helper_result
1794         test = not status
1795         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1796                  "drbd usermode helper check unsuccessful: %s", payload)
1797         test = status and (payload != drbd_helper)
1798         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1799                  "wrong drbd usermode helper: %s", payload)
1800
1801     # compute the DRBD minors
1802     node_drbd = {}
1803     for minor, instance in drbd_map[node].items():
1804       test = instance not in instanceinfo
1805       _ErrorIf(test, self.ECLUSTERCFG, None,
1806                "ghost instance '%s' in temporary DRBD map", instance)
1807         # ghost instance should not be running, but otherwise we
1808         # don't give double warnings (both ghost instance and
1809         # unallocated minor in use)
1810       if test:
1811         node_drbd[minor] = (instance, False)
1812       else:
1813         instance = instanceinfo[instance]
1814         node_drbd[minor] = (instance.name, instance.admin_up)
1815
1816     # and now check them
1817     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1818     test = not isinstance(used_minors, (tuple, list))
1819     _ErrorIf(test, self.ENODEDRBD, node,
1820              "cannot parse drbd status file: %s", str(used_minors))
1821     if test:
1822       # we cannot check drbd status
1823       return
1824
1825     for minor, (iname, must_exist) in node_drbd.items():
1826       test = minor not in used_minors and must_exist
1827       _ErrorIf(test, self.ENODEDRBD, node,
1828                "drbd minor %d of instance %s is not active", minor, iname)
1829     for minor in used_minors:
1830       test = minor not in node_drbd
1831       _ErrorIf(test, self.ENODEDRBD, node,
1832                "unallocated drbd minor %d is in use", minor)
1833
1834   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1835     """Builds the node OS structures.
1836
1837     @type ninfo: L{objects.Node}
1838     @param ninfo: the node to check
1839     @param nresult: the remote results for the node
1840     @param nimg: the node image object
1841
1842     """
1843     node = ninfo.name
1844     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1845
1846     remote_os = nresult.get(constants.NV_OSLIST, None)
1847     test = (not isinstance(remote_os, list) or
1848             not compat.all(isinstance(v, list) and len(v) == 7
1849                            for v in remote_os))
1850
1851     _ErrorIf(test, self.ENODEOS, node,
1852              "node hasn't returned valid OS data")
1853
1854     nimg.os_fail = test
1855
1856     if test:
1857       return
1858
1859     os_dict = {}
1860
1861     for (name, os_path, status, diagnose,
1862          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1863
1864       if name not in os_dict:
1865         os_dict[name] = []
1866
1867       # parameters is a list of lists instead of list of tuples due to
1868       # JSON lacking a real tuple type, fix it:
1869       parameters = [tuple(v) for v in parameters]
1870       os_dict[name].append((os_path, status, diagnose,
1871                             set(variants), set(parameters), set(api_ver)))
1872
1873     nimg.oslist = os_dict
1874
1875   def _VerifyNodeOS(self, ninfo, nimg, base):
1876     """Verifies the node OS list.
1877
1878     @type ninfo: L{objects.Node}
1879     @param ninfo: the node to check
1880     @param nimg: the node image object
1881     @param base: the 'template' node we match against (e.g. from the master)
1882
1883     """
1884     node = ninfo.name
1885     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1886
1887     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1888
1889     for os_name, os_data in nimg.oslist.items():
1890       assert os_data, "Empty OS status for OS %s?!" % os_name
1891       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1892       _ErrorIf(not f_status, self.ENODEOS, node,
1893                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1894       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1895                "OS '%s' has multiple entries (first one shadows the rest): %s",
1896                os_name, utils.CommaJoin([v[0] for v in os_data]))
1897       # this will catched in backend too
1898       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1899                and not f_var, self.ENODEOS, node,
1900                "OS %s with API at least %d does not declare any variant",
1901                os_name, constants.OS_API_V15)
1902       # comparisons with the 'base' image
1903       test = os_name not in base.oslist
1904       _ErrorIf(test, self.ENODEOS, node,
1905                "Extra OS %s not present on reference node (%s)",
1906                os_name, base.name)
1907       if test:
1908         continue
1909       assert base.oslist[os_name], "Base node has empty OS status?"
1910       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1911       if not b_status:
1912         # base OS is invalid, skipping
1913         continue
1914       for kind, a, b in [("API version", f_api, b_api),
1915                          ("variants list", f_var, b_var),
1916                          ("parameters", f_param, b_param)]:
1917         _ErrorIf(a != b, self.ENODEOS, node,
1918                  "OS %s %s differs from reference node %s: %s vs. %s",
1919                  kind, os_name, base.name,
1920                  utils.CommaJoin(a), utils.CommaJoin(b))
1921
1922     # check any missing OSes
1923     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1924     _ErrorIf(missing, self.ENODEOS, node,
1925              "OSes present on reference node %s but missing on this node: %s",
1926              base.name, utils.CommaJoin(missing))
1927
1928   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1929     """Verifies and updates the node volume data.
1930
1931     This function will update a L{NodeImage}'s internal structures
1932     with data from the remote call.
1933
1934     @type ninfo: L{objects.Node}
1935     @param ninfo: the node to check
1936     @param nresult: the remote results for the node
1937     @param nimg: the node image object
1938     @param vg_name: the configured VG name
1939
1940     """
1941     node = ninfo.name
1942     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1943
1944     nimg.lvm_fail = True
1945     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1946     if vg_name is None:
1947       pass
1948     elif isinstance(lvdata, basestring):
1949       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1950                utils.SafeEncode(lvdata))
1951     elif not isinstance(lvdata, dict):
1952       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1953     else:
1954       nimg.volumes = lvdata
1955       nimg.lvm_fail = False
1956
1957   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1958     """Verifies and updates the node instance list.
1959
1960     If the listing was successful, then updates this node's instance
1961     list. Otherwise, it marks the RPC call as failed for the instance
1962     list key.
1963
1964     @type ninfo: L{objects.Node}
1965     @param ninfo: the node to check
1966     @param nresult: the remote results for the node
1967     @param nimg: the node image object
1968
1969     """
1970     idata = nresult.get(constants.NV_INSTANCELIST, None)
1971     test = not isinstance(idata, list)
1972     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1973                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1974     if test:
1975       nimg.hyp_fail = True
1976     else:
1977       nimg.instances = idata
1978
1979   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1980     """Verifies and computes a node information map
1981
1982     @type ninfo: L{objects.Node}
1983     @param ninfo: the node to check
1984     @param nresult: the remote results for the node
1985     @param nimg: the node image object
1986     @param vg_name: the configured VG name
1987
1988     """
1989     node = ninfo.name
1990     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1991
1992     # try to read free memory (from the hypervisor)
1993     hv_info = nresult.get(constants.NV_HVINFO, None)
1994     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1995     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1996     if not test:
1997       try:
1998         nimg.mfree = int(hv_info["memory_free"])
1999       except (ValueError, TypeError):
2000         _ErrorIf(True, self.ENODERPC, node,
2001                  "node returned invalid nodeinfo, check hypervisor")
2002
2003     # FIXME: devise a free space model for file based instances as well
2004     if vg_name is not None:
2005       test = (constants.NV_VGLIST not in nresult or
2006               vg_name not in nresult[constants.NV_VGLIST])
2007       _ErrorIf(test, self.ENODELVM, node,
2008                "node didn't return data for the volume group '%s'"
2009                " - it is either missing or broken", vg_name)
2010       if not test:
2011         try:
2012           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2013         except (ValueError, TypeError):
2014           _ErrorIf(True, self.ENODERPC, node,
2015                    "node returned invalid LVM info, check LVM status")
2016
2017   def BuildHooksEnv(self):
2018     """Build hooks env.
2019
2020     Cluster-Verify hooks just ran in the post phase and their failure makes
2021     the output be logged in the verify output and the verification to fail.
2022
2023     """
2024     all_nodes = self.cfg.GetNodeList()
2025     env = {
2026       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2027       }
2028     for node in self.cfg.GetAllNodesInfo().values():
2029       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2030
2031     return env, [], all_nodes
2032
2033   def Exec(self, feedback_fn):
2034     """Verify integrity of cluster, performing various test on nodes.
2035
2036     """
2037     self.bad = False
2038     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2039     verbose = self.op.verbose
2040     self._feedback_fn = feedback_fn
2041     feedback_fn("* Verifying global settings")
2042     for msg in self.cfg.VerifyConfig():
2043       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2044
2045     # Check the cluster certificates
2046     for cert_filename in constants.ALL_CERT_FILES:
2047       (errcode, msg) = _VerifyCertificate(cert_filename)
2048       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2049
2050     vg_name = self.cfg.GetVGName()
2051     drbd_helper = self.cfg.GetDRBDHelper()
2052     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2053     cluster = self.cfg.GetClusterInfo()
2054     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2055     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2056     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2057     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2058                         for iname in instancelist)
2059     i_non_redundant = [] # Non redundant instances
2060     i_non_a_balanced = [] # Non auto-balanced instances
2061     n_offline = 0 # Count of offline nodes
2062     n_drained = 0 # Count of nodes being drained
2063     node_vol_should = {}
2064
2065     # FIXME: verify OS list
2066     # do local checksums
2067     master_files = [constants.CLUSTER_CONF_FILE]
2068     master_node = self.master_node = self.cfg.GetMasterNode()
2069     master_ip = self.cfg.GetMasterIP()
2070
2071     file_names = ssconf.SimpleStore().GetFileList()
2072     file_names.extend(constants.ALL_CERT_FILES)
2073     file_names.extend(master_files)
2074     if cluster.modify_etc_hosts:
2075       file_names.append(constants.ETC_HOSTS)
2076
2077     local_checksums = utils.FingerprintFiles(file_names)
2078
2079     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2080     node_verify_param = {
2081       constants.NV_FILELIST: file_names,
2082       constants.NV_NODELIST: [node.name for node in nodeinfo
2083                               if not node.offline],
2084       constants.NV_HYPERVISOR: hypervisors,
2085       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2086                                   node.secondary_ip) for node in nodeinfo
2087                                  if not node.offline],
2088       constants.NV_INSTANCELIST: hypervisors,
2089       constants.NV_VERSION: None,
2090       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2091       constants.NV_NODESETUP: None,
2092       constants.NV_TIME: None,
2093       constants.NV_MASTERIP: (master_node, master_ip),
2094       constants.NV_OSLIST: None,
2095       }
2096
2097     if vg_name is not None:
2098       node_verify_param[constants.NV_VGLIST] = None
2099       node_verify_param[constants.NV_LVLIST] = vg_name
2100       node_verify_param[constants.NV_PVLIST] = [vg_name]
2101       node_verify_param[constants.NV_DRBDLIST] = None
2102
2103     if drbd_helper:
2104       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2105
2106     # Build our expected cluster state
2107     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2108                                                  name=node.name))
2109                       for node in nodeinfo)
2110
2111     for instance in instancelist:
2112       inst_config = instanceinfo[instance]
2113
2114       for nname in inst_config.all_nodes:
2115         if nname not in node_image:
2116           # ghost node
2117           gnode = self.NodeImage(name=nname)
2118           gnode.ghost = True
2119           node_image[nname] = gnode
2120
2121       inst_config.MapLVsByNode(node_vol_should)
2122
2123       pnode = inst_config.primary_node
2124       node_image[pnode].pinst.append(instance)
2125
2126       for snode in inst_config.secondary_nodes:
2127         nimg = node_image[snode]
2128         nimg.sinst.append(instance)
2129         if pnode not in nimg.sbp:
2130           nimg.sbp[pnode] = []
2131         nimg.sbp[pnode].append(instance)
2132
2133     # At this point, we have the in-memory data structures complete,
2134     # except for the runtime information, which we'll gather next
2135
2136     # Due to the way our RPC system works, exact response times cannot be
2137     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2138     # time before and after executing the request, we can at least have a time
2139     # window.
2140     nvinfo_starttime = time.time()
2141     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2142                                            self.cfg.GetClusterName())
2143     nvinfo_endtime = time.time()
2144
2145     all_drbd_map = self.cfg.ComputeDRBDMap()
2146
2147     feedback_fn("* Verifying node status")
2148
2149     refos_img = None
2150
2151     for node_i in nodeinfo:
2152       node = node_i.name
2153       nimg = node_image[node]
2154
2155       if node_i.offline:
2156         if verbose:
2157           feedback_fn("* Skipping offline node %s" % (node,))
2158         n_offline += 1
2159         continue
2160
2161       if node == master_node:
2162         ntype = "master"
2163       elif node_i.master_candidate:
2164         ntype = "master candidate"
2165       elif node_i.drained:
2166         ntype = "drained"
2167         n_drained += 1
2168       else:
2169         ntype = "regular"
2170       if verbose:
2171         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2172
2173       msg = all_nvinfo[node].fail_msg
2174       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2175       if msg:
2176         nimg.rpc_fail = True
2177         continue
2178
2179       nresult = all_nvinfo[node].payload
2180
2181       nimg.call_ok = self._VerifyNode(node_i, nresult)
2182       self._VerifyNodeNetwork(node_i, nresult)
2183       self._VerifyNodeLVM(node_i, nresult, vg_name)
2184       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2185                             master_files)
2186       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2187                            all_drbd_map)
2188       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2189
2190       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2191       self._UpdateNodeInstances(node_i, nresult, nimg)
2192       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2193       self._UpdateNodeOS(node_i, nresult, nimg)
2194       if not nimg.os_fail:
2195         if refos_img is None:
2196           refos_img = nimg
2197         self._VerifyNodeOS(node_i, nimg, refos_img)
2198
2199     feedback_fn("* Verifying instance status")
2200     for instance in instancelist:
2201       if verbose:
2202         feedback_fn("* Verifying instance %s" % instance)
2203       inst_config = instanceinfo[instance]
2204       self._VerifyInstance(instance, inst_config, node_image)
2205       inst_nodes_offline = []
2206
2207       pnode = inst_config.primary_node
2208       pnode_img = node_image[pnode]
2209       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2210                self.ENODERPC, pnode, "instance %s, connection to"
2211                " primary node failed", instance)
2212
2213       if pnode_img.offline:
2214         inst_nodes_offline.append(pnode)
2215
2216       # If the instance is non-redundant we cannot survive losing its primary
2217       # node, so we are not N+1 compliant. On the other hand we have no disk
2218       # templates with more than one secondary so that situation is not well
2219       # supported either.
2220       # FIXME: does not support file-backed instances
2221       if not inst_config.secondary_nodes:
2222         i_non_redundant.append(instance)
2223       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2224                instance, "instance has multiple secondary nodes: %s",
2225                utils.CommaJoin(inst_config.secondary_nodes),
2226                code=self.ETYPE_WARNING)
2227
2228       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2229         i_non_a_balanced.append(instance)
2230
2231       for snode in inst_config.secondary_nodes:
2232         s_img = node_image[snode]
2233         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2234                  "instance %s, connection to secondary node failed", instance)
2235
2236         if s_img.offline:
2237           inst_nodes_offline.append(snode)
2238
2239       # warn that the instance lives on offline nodes
2240       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2241                "instance lives on offline node(s) %s",
2242                utils.CommaJoin(inst_nodes_offline))
2243       # ... or ghost nodes
2244       for node in inst_config.all_nodes:
2245         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2246                  "instance lives on ghost node %s", node)
2247
2248     feedback_fn("* Verifying orphan volumes")
2249     reserved = utils.FieldSet(*cluster.reserved_lvs)
2250     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2251
2252     feedback_fn("* Verifying orphan instances")
2253     self._VerifyOrphanInstances(instancelist, node_image)
2254
2255     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2256       feedback_fn("* Verifying N+1 Memory redundancy")
2257       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2258
2259     feedback_fn("* Other Notes")
2260     if i_non_redundant:
2261       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2262                   % len(i_non_redundant))
2263
2264     if i_non_a_balanced:
2265       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2266                   % len(i_non_a_balanced))
2267
2268     if n_offline:
2269       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2270
2271     if n_drained:
2272       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2273
2274     return not self.bad
2275
2276   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2277     """Analyze the post-hooks' result
2278
2279     This method analyses the hook result, handles it, and sends some
2280     nicely-formatted feedback back to the user.
2281
2282     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2283         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2284     @param hooks_results: the results of the multi-node hooks rpc call
2285     @param feedback_fn: function used send feedback back to the caller
2286     @param lu_result: previous Exec result
2287     @return: the new Exec result, based on the previous result
2288         and hook results
2289
2290     """
2291     # We only really run POST phase hooks, and are only interested in
2292     # their results
2293     if phase == constants.HOOKS_PHASE_POST:
2294       # Used to change hooks' output to proper indentation
2295       indent_re = re.compile('^', re.M)
2296       feedback_fn("* Hooks Results")
2297       assert hooks_results, "invalid result from hooks"
2298
2299       for node_name in hooks_results:
2300         res = hooks_results[node_name]
2301         msg = res.fail_msg
2302         test = msg and not res.offline
2303         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2304                       "Communication failure in hooks execution: %s", msg)
2305         if res.offline or msg:
2306           # No need to investigate payload if node is offline or gave an error.
2307           # override manually lu_result here as _ErrorIf only
2308           # overrides self.bad
2309           lu_result = 1
2310           continue
2311         for script, hkr, output in res.payload:
2312           test = hkr == constants.HKR_FAIL
2313           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2314                         "Script %s failed, output:", script)
2315           if test:
2316             output = indent_re.sub('      ', output)
2317             feedback_fn("%s" % output)
2318             lu_result = 0
2319
2320       return lu_result
2321
2322
2323 class LUVerifyDisks(NoHooksLU):
2324   """Verifies the cluster disks status.
2325
2326   """
2327   REQ_BGL = False
2328
2329   def ExpandNames(self):
2330     self.needed_locks = {
2331       locking.LEVEL_NODE: locking.ALL_SET,
2332       locking.LEVEL_INSTANCE: locking.ALL_SET,
2333     }
2334     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2335
2336   def Exec(self, feedback_fn):
2337     """Verify integrity of cluster disks.
2338
2339     @rtype: tuple of three items
2340     @return: a tuple of (dict of node-to-node_error, list of instances
2341         which need activate-disks, dict of instance: (node, volume) for
2342         missing volumes
2343
2344     """
2345     result = res_nodes, res_instances, res_missing = {}, [], {}
2346
2347     vg_name = self.cfg.GetVGName()
2348     nodes = utils.NiceSort(self.cfg.GetNodeList())
2349     instances = [self.cfg.GetInstanceInfo(name)
2350                  for name in self.cfg.GetInstanceList()]
2351
2352     nv_dict = {}
2353     for inst in instances:
2354       inst_lvs = {}
2355       if (not inst.admin_up or
2356           inst.disk_template not in constants.DTS_NET_MIRROR):
2357         continue
2358       inst.MapLVsByNode(inst_lvs)
2359       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2360       for node, vol_list in inst_lvs.iteritems():
2361         for vol in vol_list:
2362           nv_dict[(node, vol)] = inst
2363
2364     if not nv_dict:
2365       return result
2366
2367     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2368
2369     for node in nodes:
2370       # node_volume
2371       node_res = node_lvs[node]
2372       if node_res.offline:
2373         continue
2374       msg = node_res.fail_msg
2375       if msg:
2376         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2377         res_nodes[node] = msg
2378         continue
2379
2380       lvs = node_res.payload
2381       for lv_name, (_, _, lv_online) in lvs.items():
2382         inst = nv_dict.pop((node, lv_name), None)
2383         if (not lv_online and inst is not None
2384             and inst.name not in res_instances):
2385           res_instances.append(inst.name)
2386
2387     # any leftover items in nv_dict are missing LVs, let's arrange the
2388     # data better
2389     for key, inst in nv_dict.iteritems():
2390       if inst.name not in res_missing:
2391         res_missing[inst.name] = []
2392       res_missing[inst.name].append(key)
2393
2394     return result
2395
2396
2397 class LURepairDiskSizes(NoHooksLU):
2398   """Verifies the cluster disks sizes.
2399
2400   """
2401   _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2402   REQ_BGL = False
2403
2404   def ExpandNames(self):
2405     if self.op.instances:
2406       self.wanted_names = []
2407       for name in self.op.instances:
2408         full_name = _ExpandInstanceName(self.cfg, name)
2409         self.wanted_names.append(full_name)
2410       self.needed_locks = {
2411         locking.LEVEL_NODE: [],
2412         locking.LEVEL_INSTANCE: self.wanted_names,
2413         }
2414       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2415     else:
2416       self.wanted_names = None
2417       self.needed_locks = {
2418         locking.LEVEL_NODE: locking.ALL_SET,
2419         locking.LEVEL_INSTANCE: locking.ALL_SET,
2420         }
2421     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2422
2423   def DeclareLocks(self, level):
2424     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2425       self._LockInstancesNodes(primary_only=True)
2426
2427   def CheckPrereq(self):
2428     """Check prerequisites.
2429
2430     This only checks the optional instance list against the existing names.
2431
2432     """
2433     if self.wanted_names is None:
2434       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2435
2436     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2437                              in self.wanted_names]
2438
2439   def _EnsureChildSizes(self, disk):
2440     """Ensure children of the disk have the needed disk size.
2441
2442     This is valid mainly for DRBD8 and fixes an issue where the
2443     children have smaller disk size.
2444
2445     @param disk: an L{ganeti.objects.Disk} object
2446
2447     """
2448     if disk.dev_type == constants.LD_DRBD8:
2449       assert disk.children, "Empty children for DRBD8?"
2450       fchild = disk.children[0]
2451       mismatch = fchild.size < disk.size
2452       if mismatch:
2453         self.LogInfo("Child disk has size %d, parent %d, fixing",
2454                      fchild.size, disk.size)
2455         fchild.size = disk.size
2456
2457       # and we recurse on this child only, not on the metadev
2458       return self._EnsureChildSizes(fchild) or mismatch
2459     else:
2460       return False
2461
2462   def Exec(self, feedback_fn):
2463     """Verify the size of cluster disks.
2464
2465     """
2466     # TODO: check child disks too
2467     # TODO: check differences in size between primary/secondary nodes
2468     per_node_disks = {}
2469     for instance in self.wanted_instances:
2470       pnode = instance.primary_node
2471       if pnode not in per_node_disks:
2472         per_node_disks[pnode] = []
2473       for idx, disk in enumerate(instance.disks):
2474         per_node_disks[pnode].append((instance, idx, disk))
2475
2476     changed = []
2477     for node, dskl in per_node_disks.items():
2478       newl = [v[2].Copy() for v in dskl]
2479       for dsk in newl:
2480         self.cfg.SetDiskID(dsk, node)
2481       result = self.rpc.call_blockdev_getsizes(node, newl)
2482       if result.fail_msg:
2483         self.LogWarning("Failure in blockdev_getsizes call to node"
2484                         " %s, ignoring", node)
2485         continue
2486       if len(result.data) != len(dskl):
2487         self.LogWarning("Invalid result from node %s, ignoring node results",
2488                         node)
2489         continue
2490       for ((instance, idx, disk), size) in zip(dskl, result.data):
2491         if size is None:
2492           self.LogWarning("Disk %d of instance %s did not return size"
2493                           " information, ignoring", idx, instance.name)
2494           continue
2495         if not isinstance(size, (int, long)):
2496           self.LogWarning("Disk %d of instance %s did not return valid"
2497                           " size information, ignoring", idx, instance.name)
2498           continue
2499         size = size >> 20
2500         if size != disk.size:
2501           self.LogInfo("Disk %d of instance %s has mismatched size,"
2502                        " correcting: recorded %d, actual %d", idx,
2503                        instance.name, disk.size, size)
2504           disk.size = size
2505           self.cfg.Update(instance, feedback_fn)
2506           changed.append((instance.name, idx, size))
2507         if self._EnsureChildSizes(disk):
2508           self.cfg.Update(instance, feedback_fn)
2509           changed.append((instance.name, idx, disk.size))
2510     return changed
2511
2512
2513 class LURenameCluster(LogicalUnit):
2514   """Rename the cluster.
2515
2516   """
2517   HPATH = "cluster-rename"
2518   HTYPE = constants.HTYPE_CLUSTER
2519   _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2520
2521   def BuildHooksEnv(self):
2522     """Build hooks env.
2523
2524     """
2525     env = {
2526       "OP_TARGET": self.cfg.GetClusterName(),
2527       "NEW_NAME": self.op.name,
2528       }
2529     mn = self.cfg.GetMasterNode()
2530     all_nodes = self.cfg.GetNodeList()
2531     return env, [mn], all_nodes
2532
2533   def CheckPrereq(self):
2534     """Verify that the passed name is a valid one.
2535
2536     """
2537     hostname = netutils.GetHostname(name=self.op.name,
2538                                     family=self.cfg.GetPrimaryIPFamily())
2539
2540     new_name = hostname.name
2541     self.ip = new_ip = hostname.ip
2542     old_name = self.cfg.GetClusterName()
2543     old_ip = self.cfg.GetMasterIP()
2544     if new_name == old_name and new_ip == old_ip:
2545       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2546                                  " cluster has changed",
2547                                  errors.ECODE_INVAL)
2548     if new_ip != old_ip:
2549       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2550         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2551                                    " reachable on the network" %
2552                                    new_ip, errors.ECODE_NOTUNIQUE)
2553
2554     self.op.name = new_name
2555
2556   def Exec(self, feedback_fn):
2557     """Rename the cluster.
2558
2559     """
2560     clustername = self.op.name
2561     ip = self.ip
2562
2563     # shutdown the master IP
2564     master = self.cfg.GetMasterNode()
2565     result = self.rpc.call_node_stop_master(master, False)
2566     result.Raise("Could not disable the master role")
2567
2568     try:
2569       cluster = self.cfg.GetClusterInfo()
2570       cluster.cluster_name = clustername
2571       cluster.master_ip = ip
2572       self.cfg.Update(cluster, feedback_fn)
2573
2574       # update the known hosts file
2575       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2576       node_list = self.cfg.GetNodeList()
2577       try:
2578         node_list.remove(master)
2579       except ValueError:
2580         pass
2581       result = self.rpc.call_upload_file(node_list,
2582                                          constants.SSH_KNOWN_HOSTS_FILE)
2583       for to_node, to_result in result.iteritems():
2584         msg = to_result.fail_msg
2585         if msg:
2586           msg = ("Copy of file %s to node %s failed: %s" %
2587                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2588           self.proc.LogWarning(msg)
2589
2590     finally:
2591       result = self.rpc.call_node_start_master(master, False, False)
2592       msg = result.fail_msg
2593       if msg:
2594         self.LogWarning("Could not re-enable the master role on"
2595                         " the master, please restart manually: %s", msg)
2596
2597     return clustername
2598
2599
2600 class LUSetClusterParams(LogicalUnit):
2601   """Change the parameters of the cluster.
2602
2603   """
2604   HPATH = "cluster-modify"
2605   HTYPE = constants.HTYPE_CLUSTER
2606   _OP_PARAMS = [
2607     ("vg_name", None, _TMaybeString),
2608     ("enabled_hypervisors", None,
2609      _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2610     ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2611     ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2612     ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2613     ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2614     ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2615     ("uid_pool", None, _NoType),
2616     ("add_uids", None, _NoType),
2617     ("remove_uids", None, _NoType),
2618     ("maintain_node_health", None, _TMaybeBool),
2619     ("nicparams", None, _TOr(_TDict, _TNone)),
2620     ("drbd_helper", None, _TOr(_TString, _TNone)),
2621     ("default_iallocator", None, _TMaybeString),
2622     ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2623     ("hidden_os", None, _TOr(_TListOf(\
2624           _TAnd(_TList,
2625                 _TIsLength(2),
2626                 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2627           _TNone)),
2628     ("blacklisted_os", None, _TOr(_TListOf(\
2629           _TAnd(_TList,
2630                 _TIsLength(2),
2631                 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2632           _TNone)),
2633     ]
2634   REQ_BGL = False
2635
2636   def CheckArguments(self):
2637     """Check parameters
2638
2639     """
2640     if self.op.uid_pool:
2641       uidpool.CheckUidPool(self.op.uid_pool)
2642
2643     if self.op.add_uids:
2644       uidpool.CheckUidPool(self.op.add_uids)
2645
2646     if self.op.remove_uids:
2647       uidpool.CheckUidPool(self.op.remove_uids)
2648
2649   def ExpandNames(self):
2650     # FIXME: in the future maybe other cluster params won't require checking on
2651     # all nodes to be modified.
2652     self.needed_locks = {
2653       locking.LEVEL_NODE: locking.ALL_SET,
2654     }
2655     self.share_locks[locking.LEVEL_NODE] = 1
2656
2657   def BuildHooksEnv(self):
2658     """Build hooks env.
2659
2660     """
2661     env = {
2662       "OP_TARGET": self.cfg.GetClusterName(),
2663       "NEW_VG_NAME": self.op.vg_name,
2664       }
2665     mn = self.cfg.GetMasterNode()
2666     return env, [mn], [mn]
2667
2668   def CheckPrereq(self):
2669     """Check prerequisites.
2670
2671     This checks whether the given params don't conflict and
2672     if the given volume group is valid.
2673
2674     """
2675     if self.op.vg_name is not None and not self.op.vg_name:
2676       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2677         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2678                                    " instances exist", errors.ECODE_INVAL)
2679
2680     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2681       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2682         raise errors.OpPrereqError("Cannot disable drbd helper while"
2683                                    " drbd-based instances exist",
2684                                    errors.ECODE_INVAL)
2685
2686     node_list = self.acquired_locks[locking.LEVEL_NODE]
2687
2688     # if vg_name not None, checks given volume group on all nodes
2689     if self.op.vg_name:
2690       vglist = self.rpc.call_vg_list(node_list)
2691       for node in node_list:
2692         msg = vglist[node].fail_msg
2693         if msg:
2694           # ignoring down node
2695           self.LogWarning("Error while gathering data on node %s"
2696                           " (ignoring node): %s", node, msg)
2697           continue
2698         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2699                                               self.op.vg_name,
2700                                               constants.MIN_VG_SIZE)
2701         if vgstatus:
2702           raise errors.OpPrereqError("Error on node '%s': %s" %
2703                                      (node, vgstatus), errors.ECODE_ENVIRON)
2704
2705     if self.op.drbd_helper:
2706       # checks given drbd helper on all nodes
2707       helpers = self.rpc.call_drbd_helper(node_list)
2708       for node in node_list:
2709         ninfo = self.cfg.GetNodeInfo(node)
2710         if ninfo.offline:
2711           self.LogInfo("Not checking drbd helper on offline node %s", node)
2712           continue
2713         msg = helpers[node].fail_msg
2714         if msg:
2715           raise errors.OpPrereqError("Error checking drbd helper on node"
2716                                      " '%s': %s" % (node, msg),
2717                                      errors.ECODE_ENVIRON)
2718         node_helper = helpers[node].payload
2719         if node_helper != self.op.drbd_helper:
2720           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2721                                      (node, node_helper), errors.ECODE_ENVIRON)
2722
2723     self.cluster = cluster = self.cfg.GetClusterInfo()
2724     # validate params changes
2725     if self.op.beparams:
2726       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2727       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2728
2729     if self.op.nicparams:
2730       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2731       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2732       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2733       nic_errors = []
2734
2735       # check all instances for consistency
2736       for instance in self.cfg.GetAllInstancesInfo().values():
2737         for nic_idx, nic in enumerate(instance.nics):
2738           params_copy = copy.deepcopy(nic.nicparams)
2739           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2740
2741           # check parameter syntax
2742           try:
2743             objects.NIC.CheckParameterSyntax(params_filled)
2744           except errors.ConfigurationError, err:
2745             nic_errors.append("Instance %s, nic/%d: %s" %
2746                               (instance.name, nic_idx, err))
2747
2748           # if we're moving instances to routed, check that they have an ip
2749           target_mode = params_filled[constants.NIC_MODE]
2750           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2751             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2752                               (instance.name, nic_idx))
2753       if nic_errors:
2754         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2755                                    "\n".join(nic_errors))
2756
2757     # hypervisor list/parameters
2758     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2759     if self.op.hvparams:
2760       for hv_name, hv_dict in self.op.hvparams.items():
2761         if hv_name not in self.new_hvparams:
2762           self.new_hvparams[hv_name] = hv_dict
2763         else:
2764           self.new_hvparams[hv_name].update(hv_dict)
2765
2766     # os hypervisor parameters
2767     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2768     if self.op.os_hvp:
2769       for os_name, hvs in self.op.os_hvp.items():
2770         if os_name not in self.new_os_hvp:
2771           self.new_os_hvp[os_name] = hvs
2772         else:
2773           for hv_name, hv_dict in hvs.items():
2774             if hv_name not in self.new_os_hvp[os_name]:
2775               self.new_os_hvp[os_name][hv_name] = hv_dict
2776             else:
2777               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2778
2779     # os parameters
2780     self.new_osp = objects.FillDict(cluster.osparams, {})
2781     if self.op.osparams:
2782       for os_name, osp in self.op.osparams.items():
2783         if os_name not in self.new_osp:
2784           self.new_osp[os_name] = {}
2785
2786         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2787                                                   use_none=True)
2788
2789         if not self.new_osp[os_name]:
2790           # we removed all parameters
2791           del self.new_osp[os_name]
2792         else:
2793           # check the parameter validity (remote check)
2794           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2795                          os_name, self.new_osp[os_name])
2796
2797     # changes to the hypervisor list
2798     if self.op.enabled_hypervisors is not None:
2799       self.hv_list = self.op.enabled_hypervisors
2800       for hv in self.hv_list:
2801         # if the hypervisor doesn't already exist in the cluster
2802         # hvparams, we initialize it to empty, and then (in both
2803         # cases) we make sure to fill the defaults, as we might not
2804         # have a complete defaults list if the hypervisor wasn't
2805         # enabled before
2806         if hv not in new_hvp:
2807           new_hvp[hv] = {}
2808         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2809         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2810     else:
2811       self.hv_list = cluster.enabled_hypervisors
2812
2813     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2814       # either the enabled list has changed, or the parameters have, validate
2815       for hv_name, hv_params in self.new_hvparams.items():
2816         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2817             (self.op.enabled_hypervisors and
2818              hv_name in self.op.enabled_hypervisors)):
2819           # either this is a new hypervisor, or its parameters have changed
2820           hv_class = hypervisor.GetHypervisor(hv_name)
2821           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2822           hv_class.CheckParameterSyntax(hv_params)
2823           _CheckHVParams(self, node_list, hv_name, hv_params)
2824
2825     if self.op.os_hvp:
2826       # no need to check any newly-enabled hypervisors, since the
2827       # defaults have already been checked in the above code-block
2828       for os_name, os_hvp in self.new_os_hvp.items():
2829         for hv_name, hv_params in os_hvp.items():
2830           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2831           # we need to fill in the new os_hvp on top of the actual hv_p
2832           cluster_defaults = self.new_hvparams.get(hv_name, {})
2833           new_osp = objects.FillDict(cluster_defaults, hv_params)
2834           hv_class = hypervisor.GetHypervisor(hv_name)
2835           hv_class.CheckParameterSyntax(new_osp)
2836           _CheckHVParams(self, node_list, hv_name, new_osp)
2837
2838     if self.op.default_iallocator:
2839       alloc_script = utils.FindFile(self.op.default_iallocator,
2840                                     constants.IALLOCATOR_SEARCH_PATH,
2841                                     os.path.isfile)
2842       if alloc_script is None:
2843         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2844                                    " specified" % self.op.default_iallocator,
2845                                    errors.ECODE_INVAL)
2846
2847   def Exec(self, feedback_fn):
2848     """Change the parameters of the cluster.
2849
2850     """
2851     if self.op.vg_name is not None:
2852       new_volume = self.op.vg_name
2853       if not new_volume:
2854         new_volume = None
2855       if new_volume != self.cfg.GetVGName():
2856         self.cfg.SetVGName(new_volume)
2857       else:
2858         feedback_fn("Cluster LVM configuration already in desired"
2859                     " state, not changing")
2860     if self.op.drbd_helper is not None:
2861       new_helper = self.op.drbd_helper
2862       if not new_helper:
2863         new_helper = None
2864       if new_helper != self.cfg.GetDRBDHelper():
2865         self.cfg.SetDRBDHelper(new_helper)
2866       else:
2867         feedback_fn("Cluster DRBD helper already in desired state,"
2868                     " not changing")
2869     if self.op.hvparams:
2870       self.cluster.hvparams = self.new_hvparams
2871     if self.op.os_hvp:
2872       self.cluster.os_hvp = self.new_os_hvp
2873     if self.op.enabled_hypervisors is not None:
2874       self.cluster.hvparams = self.new_hvparams
2875       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2876     if self.op.beparams:
2877       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2878     if self.op.nicparams:
2879       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2880     if self.op.osparams:
2881       self.cluster.osparams = self.new_osp
2882
2883     if self.op.candidate_pool_size is not None:
2884       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2885       # we need to update the pool size here, otherwise the save will fail
2886       _AdjustCandidatePool(self, [])
2887
2888     if self.op.maintain_node_health is not None:
2889       self.cluster.maintain_node_health = self.op.maintain_node_health
2890
2891     if self.op.add_uids is not None:
2892       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2893
2894     if self.op.remove_uids is not None:
2895       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2896
2897     if self.op.uid_pool is not None:
2898       self.cluster.uid_pool = self.op.uid_pool
2899
2900     if self.op.default_iallocator is not None:
2901       self.cluster.default_iallocator = self.op.default_iallocator
2902
2903     if self.op.reserved_lvs is not None:
2904       self.cluster.reserved_lvs = self.op.reserved_lvs
2905
2906     def helper_os(aname, mods, desc):
2907       desc += " OS list"
2908       lst = getattr(self.cluster, aname)
2909       for key, val in mods:
2910         if key == constants.DDM_ADD:
2911           if val in lst:
2912             feedback_fn("OS %s already in %s, ignoring", val, desc)
2913           else:
2914             lst.append(val)
2915         elif key == constants.DDM_REMOVE:
2916           if val in lst:
2917             lst.remove(val)
2918           else:
2919             feedback_fn("OS %s not found in %s, ignoring", val, desc)
2920         else:
2921           raise errors.ProgrammerError("Invalid modification '%s'" % key)
2922
2923     if self.op.hidden_os:
2924       helper_os("hidden_os", self.op.hidden_os, "hidden")
2925
2926     if self.op.blacklisted_os:
2927       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2928
2929     self.cfg.Update(self.cluster, feedback_fn)
2930
2931
2932 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2933   """Distribute additional files which are part of the cluster configuration.
2934
2935   ConfigWriter takes care of distributing the config and ssconf files, but
2936   there are more files which should be distributed to all nodes. This function
2937   makes sure those are copied.
2938
2939   @param lu: calling logical unit
2940   @param additional_nodes: list of nodes not in the config to distribute to
2941
2942   """
2943   # 1. Gather target nodes
2944   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2945   dist_nodes = lu.cfg.GetOnlineNodeList()
2946   if additional_nodes is not None:
2947     dist_nodes.extend(additional_nodes)
2948   if myself.name in dist_nodes:
2949     dist_nodes.remove(myself.name)
2950
2951   # 2. Gather files to distribute
2952   dist_files = set([constants.ETC_HOSTS,
2953                     constants.SSH_KNOWN_HOSTS_FILE,
2954                     constants.RAPI_CERT_FILE,
2955                     constants.RAPI_USERS_FILE,
2956                     constants.CONFD_HMAC_KEY,
2957                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2958                    ])
2959
2960   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2961   for hv_name in enabled_hypervisors:
2962     hv_class = hypervisor.GetHypervisor(hv_name)
2963     dist_files.update(hv_class.GetAncillaryFiles())
2964
2965   # 3. Perform the files upload
2966   for fname in dist_files:
2967     if os.path.exists(fname):
2968       result = lu.rpc.call_upload_file(dist_nodes, fname)
2969       for to_node, to_result in result.items():
2970         msg = to_result.fail_msg
2971         if msg:
2972           msg = ("Copy of file %s to node %s failed: %s" %
2973                  (fname, to_node, msg))
2974           lu.proc.LogWarning(msg)
2975
2976
2977 class LURedistributeConfig(NoHooksLU):
2978   """Force the redistribution of cluster configuration.
2979
2980   This is a very simple LU.
2981
2982   """
2983   REQ_BGL = False
2984
2985   def ExpandNames(self):
2986     self.needed_locks = {
2987       locking.LEVEL_NODE: locking.ALL_SET,
2988     }
2989     self.share_locks[locking.LEVEL_NODE] = 1
2990
2991   def Exec(self, feedback_fn):
2992     """Redistribute the configuration.
2993
2994     """
2995     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2996     _RedistributeAncillaryFiles(self)
2997
2998
2999 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3000   """Sleep and poll for an instance's disk to sync.
3001
3002   """
3003   if not instance.disks or disks is not None and not disks:
3004     return True
3005
3006   disks = _ExpandCheckDisks(instance, disks)
3007
3008   if not oneshot:
3009     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3010
3011   node = instance.primary_node
3012
3013   for dev in disks:
3014     lu.cfg.SetDiskID(dev, node)
3015
3016   # TODO: Convert to utils.Retry
3017
3018   retries = 0
3019   degr_retries = 10 # in seconds, as we sleep 1 second each time
3020   while True:
3021     max_time = 0
3022     done = True
3023     cumul_degraded = False
3024     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3025     msg = rstats.fail_msg
3026     if msg:
3027       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3028       retries += 1
3029       if retries >= 10:
3030         raise errors.RemoteError("Can't contact node %s for mirror data,"
3031                                  " aborting." % node)
3032       time.sleep(6)
3033       continue
3034     rstats = rstats.payload
3035     retries = 0
3036     for i, mstat in enumerate(rstats):
3037       if mstat is None:
3038         lu.LogWarning("Can't compute data for node %s/%s",
3039                            node, disks[i].iv_name)
3040         continue
3041
3042       cumul_degraded = (cumul_degraded or
3043                         (mstat.is_degraded and mstat.sync_percent is None))
3044       if mstat.sync_percent is not None:
3045         done = False
3046         if mstat.estimated_time is not None:
3047           rem_time = ("%s remaining (estimated)" %
3048                       utils.FormatSeconds(mstat.estimated_time))
3049           max_time = mstat.estimated_time
3050         else:
3051           rem_time = "no time estimate"
3052         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3053                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3054
3055     # if we're done but degraded, let's do a few small retries, to
3056     # make sure we see a stable and not transient situation; therefore
3057     # we force restart of the loop
3058     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3059       logging.info("Degraded disks found, %d retries left", degr_retries)
3060       degr_retries -= 1
3061       time.sleep(1)
3062       continue
3063
3064     if done or oneshot:
3065       break
3066
3067     time.sleep(min(60, max_time))
3068
3069   if done:
3070     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3071   return not cumul_degraded
3072
3073
3074 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3075   """Check that mirrors are not degraded.
3076
3077   The ldisk parameter, if True, will change the test from the
3078   is_degraded attribute (which represents overall non-ok status for
3079   the device(s)) to the ldisk (representing the local storage status).
3080
3081   """
3082   lu.cfg.SetDiskID(dev, node)
3083
3084   result = True
3085
3086   if on_primary or dev.AssembleOnSecondary():
3087     rstats = lu.rpc.call_blockdev_find(node, dev)
3088     msg = rstats.fail_msg
3089     if msg:
3090       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3091       result = False
3092     elif not rstats.payload:
3093       lu.LogWarning("Can't find disk on node %s", node)
3094       result = False
3095     else:
3096       if ldisk:
3097         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3098       else:
3099         result = result and not rstats.payload.is_degraded
3100
3101   if dev.children:
3102     for child in dev.children:
3103       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3104
3105   return result
3106
3107
3108 class LUDiagnoseOS(NoHooksLU):
3109   """Logical unit for OS diagnose/query.
3110
3111   """
3112   _OP_PARAMS = [
3113     _POutputFields,
3114     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3115     ]
3116   REQ_BGL = False
3117   _HID = "hidden"
3118   _BLK = "blacklisted"
3119   _VLD = "valid"
3120   _FIELDS_STATIC = utils.FieldSet()
3121   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3122                                    "parameters", "api_versions", _HID, _BLK)
3123
3124   def CheckArguments(self):
3125     if self.op.names:
3126       raise errors.OpPrereqError("Selective OS query not supported",
3127                                  errors.ECODE_INVAL)
3128
3129     _CheckOutputFields(static=self._FIELDS_STATIC,
3130                        dynamic=self._FIELDS_DYNAMIC,
3131                        selected=self.op.output_fields)
3132
3133   def ExpandNames(self):
3134     # Lock all nodes, in shared mode
3135     # Temporary removal of locks, should be reverted later
3136     # TODO: reintroduce locks when they are lighter-weight
3137     self.needed_locks = {}
3138     #self.share_locks[locking.LEVEL_NODE] = 1
3139     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3140
3141   @staticmethod
3142   def _DiagnoseByOS(rlist):
3143     """Remaps a per-node return list into an a per-os per-node dictionary
3144
3145     @param rlist: a map with node names as keys and OS objects as values
3146
3147     @rtype: dict
3148     @return: a dictionary with osnames as keys and as value another
3149         map, with nodes as keys and tuples of (path, status, diagnose,
3150         variants, parameters, api_versions) as values, eg::
3151
3152           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3153                                      (/srv/..., False, "invalid api")],
3154                            "node2": [(/srv/..., True, "", [], [])]}
3155           }
3156
3157     """
3158     all_os = {}
3159     # we build here the list of nodes that didn't fail the RPC (at RPC
3160     # level), so that nodes with a non-responding node daemon don't
3161     # make all OSes invalid
3162     good_nodes = [node_name for node_name in rlist
3163                   if not rlist[node_name].fail_msg]
3164     for node_name, nr in rlist.items():
3165       if nr.fail_msg or not nr.payload:
3166         continue
3167       for (name, path, status, diagnose, variants,
3168            params, api_versions) in nr.payload:
3169         if name not in all_os:
3170           # build a list of nodes for this os containing empty lists
3171           # for each node in node_list
3172           all_os[name] = {}
3173           for nname in good_nodes:
3174             all_os[name][nname] = []
3175         # convert params from [name, help] to (name, help)
3176         params = [tuple(v) for v in params]
3177         all_os[name][node_name].append((path, status, diagnose,
3178                                         variants, params, api_versions))
3179     return all_os
3180
3181   def Exec(self, feedback_fn):
3182     """Compute the list of OSes.
3183
3184     """
3185     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3186     node_data = self.rpc.call_os_diagnose(valid_nodes)
3187     pol = self._DiagnoseByOS(node_data)
3188     output = []
3189     cluster = self.cfg.GetClusterInfo()
3190
3191     for os_name in utils.NiceSort(pol.keys()):
3192       os_data = pol[os_name]
3193       row = []
3194       valid = True
3195       (variants, params, api_versions) = null_state = (set(), set(), set())
3196       for idx, osl in enumerate(os_data.values()):
3197         valid = bool(valid and osl and osl[0][1])
3198         if not valid:
3199           (variants, params, api_versions) = null_state
3200           break
3201         node_variants, node_params, node_api = osl[0][3:6]
3202         if idx == 0: # first entry
3203           variants = set(node_variants)
3204           params = set(node_params)
3205           api_versions = set(node_api)
3206         else: # keep consistency
3207           variants.intersection_update(node_variants)
3208           params.intersection_update(node_params)
3209           api_versions.intersection_update(node_api)
3210
3211       is_hid = os_name in cluster.hidden_os
3212       is_blk = os_name in cluster.blacklisted_os
3213       if ((self._HID not in self.op.output_fields and is_hid) or
3214           (self._BLK not in self.op.output_fields and is_blk) or
3215           (self._VLD not in self.op.output_fields and not valid)):
3216         continue
3217
3218       for field in self.op.output_fields:
3219         if field == "name":
3220           val = os_name
3221         elif field == self._VLD:
3222           val = valid
3223         elif field == "node_status":
3224           # this is just a copy of the dict
3225           val = {}
3226           for node_name, nos_list in os_data.items():
3227             val[node_name] = nos_list
3228         elif field == "variants":
3229           val = utils.NiceSort(list(variants))
3230         elif field == "parameters":
3231           val = list(params)
3232         elif field == "api_versions":
3233           val = list(api_versions)
3234         elif field == self._HID:
3235           val = is_hid
3236         elif field == self._BLK:
3237           val = is_blk
3238         else:
3239           raise errors.ParameterError(field)
3240         row.append(val)
3241       output.append(row)
3242
3243     return output
3244
3245
3246 class LURemoveNode(LogicalUnit):
3247   """Logical unit for removing a node.
3248
3249   """
3250   HPATH = "node-remove"
3251   HTYPE = constants.HTYPE_NODE
3252   _OP_PARAMS = [
3253     _PNodeName,
3254     ]
3255
3256   def BuildHooksEnv(self):
3257     """Build hooks env.
3258
3259     This doesn't run on the target node in the pre phase as a failed
3260     node would then be impossible to remove.
3261
3262     """
3263     env = {
3264       "OP_TARGET": self.op.node_name,
3265       "NODE_NAME": self.op.node_name,
3266       }
3267     all_nodes = self.cfg.GetNodeList()
3268     try:
3269       all_nodes.remove(self.op.node_name)
3270     except ValueError:
3271       logging.warning("Node %s which is about to be removed not found"
3272                       " in the all nodes list", self.op.node_name)
3273     return env, all_nodes, all_nodes
3274
3275   def CheckPrereq(self):
3276     """Check prerequisites.
3277
3278     This checks:
3279      - the node exists in the configuration
3280      - it does not have primary or secondary instances
3281      - it's not the master
3282
3283     Any errors are signaled by raising errors.OpPrereqError.
3284
3285     """
3286     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3287     node = self.cfg.GetNodeInfo(self.op.node_name)
3288     assert node is not None
3289
3290     instance_list = self.cfg.GetInstanceList()
3291
3292     masternode = self.cfg.GetMasterNode()
3293     if node.name == masternode:
3294       raise errors.OpPrereqError("Node is the master node,"
3295                                  " you need to failover first.",
3296                                  errors.ECODE_INVAL)
3297
3298     for instance_name in instance_list:
3299       instance = self.cfg.GetInstanceInfo(instance_name)
3300       if node.name in instance.all_nodes:
3301         raise errors.OpPrereqError("Instance %s is still running on the node,"
3302                                    " please remove first." % instance_name,
3303                                    errors.ECODE_INVAL)
3304     self.op.node_name = node.name
3305     self.node = node
3306
3307   def Exec(self, feedback_fn):
3308     """Removes the node from the cluster.
3309
3310     """
3311     node = self.node
3312     logging.info("Stopping the node daemon and removing configs from node %s",
3313                  node.name)
3314
3315     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3316
3317     # Promote nodes to master candidate as needed
3318     _AdjustCandidatePool(self, exceptions=[node.name])
3319     self.context.RemoveNode(node.name)
3320
3321     # Run post hooks on the node before it's removed
3322     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3323     try:
3324       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3325     except:
3326       # pylint: disable-msg=W0702
3327       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3328
3329     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3330     msg = result.fail_msg
3331     if msg:
3332       self.LogWarning("Errors encountered on the remote node while leaving"
3333                       " the cluster: %s", msg)
3334
3335     # Remove node from our /etc/hosts
3336     if self.cfg.GetClusterInfo().modify_etc_hosts:
3337       master_node = self.cfg.GetMasterNode()
3338       result = self.rpc.call_etc_hosts_modify(master_node,
3339                                               constants.ETC_HOSTS_REMOVE,
3340                                               node.name, None)
3341       result.Raise("Can't update hosts file with new host data")
3342       _RedistributeAncillaryFiles(self)
3343
3344
3345 class LUQueryNodes(NoHooksLU):
3346   """Logical unit for querying nodes.
3347
3348   """
3349   # pylint: disable-msg=W0142
3350   _OP_PARAMS = [
3351     _POutputFields,
3352     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3353     ("use_locking", False, _TBool),
3354     ]
3355   REQ_BGL = False
3356
3357   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3358                     "master_candidate", "offline", "drained"]
3359
3360   _FIELDS_DYNAMIC = utils.FieldSet(
3361     "dtotal", "dfree",
3362     "mtotal", "mnode", "mfree",
3363     "bootid",
3364     "ctotal", "cnodes", "csockets",
3365     )
3366
3367   _FIELDS_STATIC = utils.FieldSet(*[
3368     "pinst_cnt", "sinst_cnt",
3369     "pinst_list", "sinst_list",
3370     "pip", "sip", "tags",
3371     "master",
3372     "role"] + _SIMPLE_FIELDS
3373     )
3374
3375   def CheckArguments(self):
3376     _CheckOutputFields(static=self._FIELDS_STATIC,
3377                        dynamic=self._FIELDS_DYNAMIC,
3378                        selected=self.op.output_fields)
3379
3380   def ExpandNames(self):
3381     self.needed_locks = {}
3382     self.share_locks[locking.LEVEL_NODE] = 1
3383
3384     if self.op.names:
3385       self.wanted = _GetWantedNodes(self, self.op.names)
3386     else:
3387       self.wanted = locking.ALL_SET
3388
3389     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3390     self.do_locking = self.do_node_query and self.op.use_locking
3391     if self.do_locking:
3392       # if we don't request only static fields, we need to lock the nodes
3393       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3394
3395   def Exec(self, feedback_fn):
3396     """Computes the list of nodes and their attributes.
3397
3398     """
3399     all_info = self.cfg.GetAllNodesInfo()
3400     if self.do_locking:
3401       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3402     elif self.wanted != locking.ALL_SET:
3403       nodenames = self.wanted
3404       missing = set(nodenames).difference(all_info.keys())
3405       if missing:
3406         raise errors.OpExecError(
3407           "Some nodes were removed before retrieving their data: %s" % missing)
3408     else:
3409       nodenames = all_info.keys()
3410
3411     nodenames = utils.NiceSort(nodenames)
3412     nodelist = [all_info[name] for name in nodenames]
3413
3414     # begin data gathering
3415
3416     if self.do_node_query:
3417       live_data = {}
3418       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3419                                           self.cfg.GetHypervisorType())
3420       for name in nodenames:
3421         nodeinfo = node_data[name]
3422         if not nodeinfo.fail_msg and nodeinfo.payload:
3423           nodeinfo = nodeinfo.payload
3424           fn = utils.TryConvert
3425           live_data[name] = {
3426             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3427             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3428             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3429             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3430             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3431             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3432             "bootid": nodeinfo.get('bootid', None),
3433             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3434             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3435             }
3436         else:
3437           live_data[name] = {}
3438     else:
3439       live_data = dict.fromkeys(nodenames, {})
3440
3441     node_to_primary = dict([(name, set()) for name in nodenames])
3442     node_to_secondary = dict([(name, set()) for name in nodenames])
3443
3444     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3445                              "sinst_cnt", "sinst_list"))
3446     if inst_fields & frozenset(self.op.output_fields):
3447       inst_data = self.cfg.GetAllInstancesInfo()
3448
3449       for inst in inst_data.values():
3450         if inst.primary_node in node_to_primary:
3451           node_to_primary[inst.primary_node].add(inst.name)
3452         for secnode in inst.secondary_nodes:
3453           if secnode in node_to_secondary:
3454             node_to_secondary[secnode].add(inst.name)
3455
3456     master_node = self.cfg.GetMasterNode()
3457
3458     # end data gathering
3459
3460     output = []
3461     for node in nodelist:
3462       node_output = []
3463       for field in self.op.output_fields:
3464         if field in self._SIMPLE_FIELDS:
3465           val = getattr(node, field)
3466         elif field == "pinst_list":
3467           val = list(node_to_primary[node.name])
3468         elif field == "sinst_list":
3469           val = list(node_to_secondary[node.name])
3470         elif field == "pinst_cnt":
3471           val = len(node_to_primary[node.name])
3472         elif field == "sinst_cnt":
3473           val = len(node_to_secondary[node.name])
3474         elif field == "pip":
3475           val = node.primary_ip
3476         elif field == "sip":
3477           val = node.secondary_ip
3478         elif field == "tags":
3479           val = list(node.GetTags())
3480         elif field == "master":
3481           val = node.name == master_node
3482         elif self._FIELDS_DYNAMIC.Matches(field):
3483           val = live_data[node.name].get(field, None)
3484         elif field == "role":
3485           if node.name == master_node:
3486             val = "M"
3487           elif node.master_candidate:
3488             val = "C"
3489           elif node.drained:
3490             val = "D"
3491           elif node.offline:
3492             val = "O"
3493           else:
3494             val = "R"
3495         else:
3496           raise errors.ParameterError(field)
3497         node_output.append(val)
3498       output.append(node_output)
3499
3500     return output
3501
3502
3503 class LUQueryNodeVolumes(NoHooksLU):
3504   """Logical unit for getting volumes on node(s).
3505
3506   """
3507   _OP_PARAMS = [
3508     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3509     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3510     ]
3511   REQ_BGL = False
3512   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3513   _FIELDS_STATIC = utils.FieldSet("node")
3514
3515   def CheckArguments(self):
3516     _CheckOutputFields(static=self._FIELDS_STATIC,
3517                        dynamic=self._FIELDS_DYNAMIC,
3518                        selected=self.op.output_fields)
3519
3520   def ExpandNames(self):
3521     self.needed_locks = {}
3522     self.share_locks[locking.LEVEL_NODE] = 1
3523     if not self.op.nodes:
3524       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3525     else:
3526       self.needed_locks[locking.LEVEL_NODE] = \
3527         _GetWantedNodes(self, self.op.nodes)
3528
3529   def Exec(self, feedback_fn):
3530     """Computes the list of nodes and their attributes.
3531
3532     """
3533     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3534     volumes = self.rpc.call_node_volumes(nodenames)
3535
3536     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3537              in self.cfg.GetInstanceList()]
3538
3539     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3540
3541     output = []
3542     for node in nodenames:
3543       nresult = volumes[node]
3544       if nresult.offline:
3545         continue
3546       msg = nresult.fail_msg
3547       if msg:
3548         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3549         continue
3550
3551       node_vols = nresult.payload[:]
3552       node_vols.sort(key=lambda vol: vol['dev'])
3553
3554       for vol in node_vols:
3555         node_output = []
3556         for field in self.op.output_fields:
3557           if field == "node":
3558             val = node
3559           elif field == "phys":
3560             val = vol['dev']
3561           elif field == "vg":
3562             val = vol['vg']
3563           elif field == "name":
3564             val = vol['name']
3565           elif field == "size":
3566             val = int(float(vol['size']))
3567           elif field == "instance":
3568             for inst in ilist:
3569               if node not in lv_by_node[inst]:
3570                 continue
3571               if vol['name'] in lv_by_node[inst][node]:
3572                 val = inst.name
3573                 break
3574             else:
3575               val = '-'
3576           else:
3577             raise errors.ParameterError(field)
3578           node_output.append(str(val))
3579
3580         output.append(node_output)
3581
3582     return output
3583
3584
3585 class LUQueryNodeStorage(NoHooksLU):
3586   """Logical unit for getting information on storage units on node(s).
3587
3588   """
3589   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3590   _OP_PARAMS = [
3591     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3592     ("storage_type", _NoDefault, _CheckStorageType),
3593     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3594     ("name", None, _TMaybeString),
3595     ]
3596   REQ_BGL = False
3597
3598   def CheckArguments(self):
3599     _CheckOutputFields(static=self._FIELDS_STATIC,
3600                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3601                        selected=self.op.output_fields)
3602
3603   def ExpandNames(self):
3604     self.needed_locks = {}
3605     self.share_locks[locking.LEVEL_NODE] = 1
3606
3607     if self.op.nodes:
3608       self.needed_locks[locking.LEVEL_NODE] = \
3609         _GetWantedNodes(self, self.op.nodes)
3610     else:
3611       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3612
3613   def Exec(self, feedback_fn):
3614     """Computes the list of nodes and their attributes.
3615
3616     """
3617     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3618
3619     # Always get name to sort by
3620     if constants.SF_NAME in self.op.output_fields:
3621       fields = self.op.output_fields[:]
3622     else:
3623       fields = [constants.SF_NAME] + self.op.output_fields
3624
3625     # Never ask for node or type as it's only known to the LU
3626     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3627       while extra in fields:
3628         fields.remove(extra)
3629
3630     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3631     name_idx = field_idx[constants.SF_NAME]
3632
3633     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3634     data = self.rpc.call_storage_list(self.nodes,
3635                                       self.op.storage_type, st_args,
3636                                       self.op.name, fields)
3637
3638     result = []
3639
3640     for node in utils.NiceSort(self.nodes):
3641       nresult = data[node]
3642       if nresult.offline:
3643         continue
3644
3645       msg = nresult.fail_msg
3646       if msg:
3647         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3648         continue
3649
3650       rows = dict([(row[name_idx], row) for row in nresult.payload])
3651
3652       for name in utils.NiceSort(rows.keys()):
3653         row = rows[name]
3654
3655         out = []
3656
3657         for field in self.op.output_fields:
3658           if field == constants.SF_NODE:
3659             val = node
3660           elif field == constants.SF_TYPE:
3661             val = self.op.storage_type
3662           elif field in field_idx:
3663             val = row[field_idx[field]]
3664           else:
3665             raise errors.ParameterError(field)
3666
3667           out.append(val)
3668
3669         result.append(out)
3670
3671     return result
3672
3673
3674 class LUModifyNodeStorage(NoHooksLU):
3675   """Logical unit for modifying a storage volume on a node.
3676
3677   """
3678   _OP_PARAMS = [
3679     _PNodeName,
3680     ("storage_type", _NoDefault, _CheckStorageType),
3681     ("name", _NoDefault, _TNonEmptyString),
3682     ("changes", _NoDefault, _TDict),
3683     ]
3684   REQ_BGL = False
3685
3686   def CheckArguments(self):
3687     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3688
3689     storage_type = self.op.storage_type
3690
3691     try:
3692       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3693     except KeyError:
3694       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3695                                  " modified" % storage_type,
3696                                  errors.ECODE_INVAL)
3697
3698     diff = set(self.op.changes.keys()) - modifiable
3699     if diff:
3700       raise errors.OpPrereqError("The following fields can not be modified for"
3701                                  " storage units of type '%s': %r" %
3702                                  (storage_type, list(diff)),
3703                                  errors.ECODE_INVAL)
3704
3705   def ExpandNames(self):
3706     self.needed_locks = {
3707       locking.LEVEL_NODE: self.op.node_name,
3708       }
3709
3710   def Exec(self, feedback_fn):
3711     """Computes the list of nodes and their attributes.
3712
3713     """
3714     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3715     result = self.rpc.call_storage_modify(self.op.node_name,
3716                                           self.op.storage_type, st_args,
3717                                           self.op.name, self.op.changes)
3718     result.Raise("Failed to modify storage unit '%s' on %s" %
3719                  (self.op.name, self.op.node_name))
3720
3721
3722 class LUAddNode(LogicalUnit):
3723   """Logical unit for adding node to the cluster.
3724
3725   """
3726   HPATH = "node-add"
3727   HTYPE = constants.HTYPE_NODE
3728   _OP_PARAMS = [
3729     _PNodeName,
3730     ("primary_ip", None, _NoType),
3731     ("secondary_ip", None, _TMaybeString),
3732     ("readd", False, _TBool),
3733     ("nodegroup", None, _TMaybeString)
3734     ]
3735
3736   def CheckArguments(self):
3737     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3738     # validate/normalize the node name
3739     self.hostname = netutils.GetHostname(name=self.op.node_name,
3740                                          family=self.primary_ip_family)
3741     self.op.node_name = self.hostname.name
3742     if self.op.readd and self.op.nodegroup:
3743       raise errors.OpPrereqError("Cannot pass a nodegroup when a node is"
3744                                  " being readded", errors.ECODE_INVAL)
3745
3746   def BuildHooksEnv(self):
3747     """Build hooks env.
3748
3749     This will run on all nodes before, and on all nodes + the new node after.
3750
3751     """
3752     env = {
3753       "OP_TARGET": self.op.node_name,
3754       "NODE_NAME": self.op.node_name,
3755       "NODE_PIP": self.op.primary_ip,
3756       "NODE_SIP": self.op.secondary_ip,
3757       }
3758     nodes_0 = self.cfg.GetNodeList()
3759     nodes_1 = nodes_0 + [self.op.node_name, ]
3760     return env, nodes_0, nodes_1
3761
3762   def CheckPrereq(self):
3763     """Check prerequisites.
3764
3765     This checks:
3766      - the new node is not already in the config
3767      - it is resolvable
3768      - its parameters (single/dual homed) matches the cluster
3769
3770     Any errors are signaled by raising errors.OpPrereqError.
3771
3772     """
3773     cfg = self.cfg
3774     hostname = self.hostname
3775     node = hostname.name
3776     primary_ip = self.op.primary_ip = hostname.ip
3777     if self.op.secondary_ip is None:
3778       if self.primary_ip_family == netutils.IP6Address.family:
3779         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3780                                    " IPv4 address must be given as secondary",
3781                                    errors.ECODE_INVAL)
3782       self.op.secondary_ip = primary_ip
3783
3784     secondary_ip = self.op.secondary_ip
3785     if not netutils.IP4Address.IsValid(secondary_ip):
3786       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3787                                  " address" % secondary_ip, errors.ECODE_INVAL)
3788
3789     node_list = cfg.GetNodeList()
3790     if not self.op.readd and node in node_list:
3791       raise errors.OpPrereqError("Node %s is already in the configuration" %
3792                                  node, errors.ECODE_EXISTS)
3793     elif self.op.readd and node not in node_list:
3794       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3795                                  errors.ECODE_NOENT)
3796
3797     self.changed_primary_ip = False
3798
3799     for existing_node_name in node_list:
3800       existing_node = cfg.GetNodeInfo(existing_node_name)
3801
3802       if self.op.readd and node == existing_node_name:
3803         if existing_node.secondary_ip != secondary_ip:
3804           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3805                                      " address configuration as before",
3806                                      errors.ECODE_INVAL)
3807         if existing_node.primary_ip != primary_ip:
3808           self.changed_primary_ip = True
3809
3810         continue
3811
3812       if (existing_node.primary_ip == primary_ip or
3813           existing_node.secondary_ip == primary_ip or
3814           existing_node.primary_ip == secondary_ip or
3815           existing_node.secondary_ip == secondary_ip):
3816         raise errors.OpPrereqError("New node ip address(es) conflict with"
3817                                    " existing node %s" % existing_node.name,
3818                                    errors.ECODE_NOTUNIQUE)
3819
3820     # check that the type of the node (single versus dual homed) is the
3821     # same as for the master
3822     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3823     master_singlehomed = myself.secondary_ip == myself.primary_ip
3824     newbie_singlehomed = secondary_ip == primary_ip
3825     if master_singlehomed != newbie_singlehomed:
3826       if master_singlehomed:
3827         raise errors.OpPrereqError("The master has no private ip but the"
3828                                    " new node has one",
3829                                    errors.ECODE_INVAL)
3830       else:
3831         raise errors.OpPrereqError("The master has a private ip but the"
3832                                    " new node doesn't have one",
3833                                    errors.ECODE_INVAL)
3834
3835     # checks reachability
3836     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3837       raise errors.OpPrereqError("Node not reachable by ping",
3838                                  errors.ECODE_ENVIRON)
3839
3840     if not newbie_singlehomed:
3841       # check reachability from my secondary ip to newbie's secondary ip
3842       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3843                            source=myself.secondary_ip):
3844         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3845                                    " based ping to noded port",
3846                                    errors.ECODE_ENVIRON)
3847
3848     if self.op.readd:
3849       exceptions = [node]
3850     else:
3851       exceptions = []
3852
3853     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3854
3855     if self.op.readd:
3856       self.new_node = self.cfg.GetNodeInfo(node)
3857       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3858     else:
3859       nodegroup = cfg.LookupNodeGroup(self.op.nodegroup)
3860       self.new_node = objects.Node(name=node,
3861                                    primary_ip=primary_ip,
3862                                    secondary_ip=secondary_ip,
3863                                    master_candidate=self.master_candidate,
3864                                    offline=False, drained=False,
3865                                    nodegroup=nodegroup)
3866
3867   def Exec(self, feedback_fn):
3868     """Adds the new node to the cluster.
3869
3870     """
3871     new_node = self.new_node
3872     node = new_node.name
3873
3874     # for re-adds, reset the offline/drained/master-candidate flags;
3875     # we need to reset here, otherwise offline would prevent RPC calls
3876     # later in the procedure; this also means that if the re-add
3877     # fails, we are left with a non-offlined, broken node
3878     if self.op.readd:
3879       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3880       self.LogInfo("Readding a node, the offline/drained flags were reset")
3881       # if we demote the node, we do cleanup later in the procedure
3882       new_node.master_candidate = self.master_candidate
3883       if self.changed_primary_ip:
3884         new_node.primary_ip = self.op.primary_ip
3885
3886     # notify the user about any possible mc promotion
3887     if new_node.master_candidate:
3888       self.LogInfo("Node will be a master candidate")
3889
3890     # check connectivity
3891     result = self.rpc.call_version([node])[node]
3892     result.Raise("Can't get version information from node %s" % node)
3893     if constants.PROTOCOL_VERSION == result.payload:
3894       logging.info("Communication to node %s fine, sw version %s match",
3895                    node, result.payload)
3896     else:
3897       raise errors.OpExecError("Version mismatch master version %s,"
3898                                " node version %s" %
3899                                (constants.PROTOCOL_VERSION, result.payload))
3900
3901     # Add node to our /etc/hosts, and add key to known_hosts
3902     if self.cfg.GetClusterInfo().modify_etc_hosts:
3903       master_node = self.cfg.GetMasterNode()
3904       result = self.rpc.call_etc_hosts_modify(master_node,
3905                                               constants.ETC_HOSTS_ADD,
3906                                               self.hostname.name,
3907                                               self.hostname.ip)
3908       result.Raise("Can't update hosts file with new host data")
3909
3910     if new_node.secondary_ip != new_node.primary_ip:
3911       result = self.rpc.call_node_has_ip_address(new_node.name,
3912                                                  new_node.secondary_ip)
3913       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3914                    prereq=True, ecode=errors.ECODE_ENVIRON)
3915       if not result.payload:
3916         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3917                                  " you gave (%s). Please fix and re-run this"
3918                                  " command." % new_node.secondary_ip)
3919
3920     node_verify_list = [self.cfg.GetMasterNode()]
3921     node_verify_param = {
3922       constants.NV_NODELIST: [node],
3923       # TODO: do a node-net-test as well?
3924     }
3925
3926     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3927                                        self.cfg.GetClusterName())
3928     for verifier in node_verify_list:
3929       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3930       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3931       if nl_payload:
3932         for failed in nl_payload:
3933           feedback_fn("ssh/hostname verification failed"
3934                       " (checking from %s): %s" %
3935                       (verifier, nl_payload[failed]))
3936         raise errors.OpExecError("ssh/hostname verification failed.")
3937
3938     if self.op.readd:
3939       _RedistributeAncillaryFiles(self)
3940       self.context.ReaddNode(new_node)
3941       # make sure we redistribute the config
3942       self.cfg.Update(new_node, feedback_fn)
3943       # and make sure the new node will not have old files around
3944       if not new_node.master_candidate:
3945         result = self.rpc.call_node_demote_from_mc(new_node.name)
3946         msg = result.fail_msg
3947         if msg:
3948           self.LogWarning("Node failed to demote itself from master"
3949                           " candidate status: %s" % msg)
3950     else:
3951       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3952       self.context.AddNode(new_node, self.proc.GetECId())
3953
3954
3955 class LUSetNodeParams(LogicalUnit):
3956   """Modifies the parameters of a node.
3957
3958   """
3959   HPATH = "node-modify"
3960   HTYPE = constants.HTYPE_NODE
3961   _OP_PARAMS = [
3962     _PNodeName,
3963     ("master_candidate", None, _TMaybeBool),
3964     ("offline", None, _TMaybeBool),
3965     ("drained", None, _TMaybeBool),
3966     ("auto_promote", False, _TBool),
3967     _PForce,
3968     ]
3969   REQ_BGL = False
3970
3971   def CheckArguments(self):
3972     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3973     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3974     if all_mods.count(None) == 3:
3975       raise errors.OpPrereqError("Please pass at least one modification",
3976                                  errors.ECODE_INVAL)
3977     if all_mods.count(True) > 1:
3978       raise errors.OpPrereqError("Can't set the node into more than one"
3979                                  " state at the same time",
3980                                  errors.ECODE_INVAL)
3981
3982     # Boolean value that tells us whether we're offlining or draining the node
3983     self.offline_or_drain = (self.op.offline == True or
3984                              self.op.drained == True)
3985     self.deoffline_or_drain = (self.op.offline == False or
3986                                self.op.drained == False)
3987     self.might_demote = (self.op.master_candidate == False or
3988                          self.offline_or_drain)
3989
3990     self.lock_all = self.op.auto_promote and self.might_demote
3991
3992
3993   def ExpandNames(self):
3994     if self.lock_all:
3995       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3996     else:
3997       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3998
3999   def BuildHooksEnv(self):
4000     """Build hooks env.
4001
4002     This runs on the master node.
4003
4004     """
4005     env = {
4006       "OP_TARGET": self.op.node_name,
4007       "MASTER_CANDIDATE": str(self.op.master_candidate),
4008       "OFFLINE": str(self.op.offline),
4009       "DRAINED": str(self.op.drained),
4010       }
4011     nl = [self.cfg.GetMasterNode(),
4012           self.op.node_name]
4013     return env, nl, nl
4014
4015   def CheckPrereq(self):
4016     """Check prerequisites.
4017
4018     This only checks the instance list against the existing names.
4019
4020     """
4021     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4022
4023     if (self.op.master_candidate is not None or
4024         self.op.drained is not None or
4025         self.op.offline is not None):
4026       # we can't change the master's node flags
4027       if self.op.node_name == self.cfg.GetMasterNode():
4028         raise errors.OpPrereqError("The master role can be changed"
4029                                    " only via master-failover",
4030                                    errors.ECODE_INVAL)
4031
4032
4033     if node.master_candidate and self.might_demote and not self.lock_all:
4034       assert not self.op.auto_promote, "auto-promote set but lock_all not"
4035       # check if after removing the current node, we're missing master
4036       # candidates
4037       (mc_remaining, mc_should, _) = \
4038           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4039       if mc_remaining < mc_should:
4040         raise errors.OpPrereqError("Not enough master candidates, please"
4041                                    " pass auto_promote to allow promotion",
4042                                    errors.ECODE_INVAL)
4043
4044     if (self.op.master_candidate == True and
4045         ((node.offline and not self.op.offline == False) or
4046          (node.drained and not self.op.drained == False))):
4047       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4048                                  " to master_candidate" % node.name,
4049                                  errors.ECODE_INVAL)
4050
4051     # If we're being deofflined/drained, we'll MC ourself if needed
4052     if (self.deoffline_or_drain and not self.offline_or_drain and not
4053         self.op.master_candidate == True and not node.master_candidate):
4054       self.op.master_candidate = _DecideSelfPromotion(self)
4055       if self.op.master_candidate:
4056         self.LogInfo("Autopromoting node to master candidate")
4057
4058     return
4059
4060   def Exec(self, feedback_fn):
4061     """Modifies a node.
4062
4063     """
4064     node = self.node
4065
4066     result = []
4067     changed_mc = False
4068
4069     if self.op.offline is not None:
4070       node.offline = self.op.offline
4071       result.append(("offline", str(self.op.offline)))
4072       if self.op.offline == True:
4073         if node.master_candidate:
4074           node.master_candidate = False
4075           changed_mc = True
4076           result.append(("master_candidate", "auto-demotion due to offline"))
4077         if node.drained:
4078           node.drained = False
4079           result.append(("drained", "clear drained status due to offline"))
4080
4081     if self.op.master_candidate is not None:
4082       node.master_candidate = self.op.master_candidate
4083       changed_mc = True
4084       result.append(("master_candidate", str(self.op.master_candidate)))
4085       if self.op.master_candidate == False:
4086         rrc = self.rpc.call_node_demote_from_mc(node.name)
4087         msg = rrc.fail_msg
4088         if msg:
4089           self.LogWarning("Node failed to demote itself: %s" % msg)
4090
4091     if self.op.drained is not None:
4092       node.drained = self.op.drained
4093       result.append(("drained", str(self.op.drained)))
4094       if self.op.drained == True:
4095         if node.master_candidate:
4096           node.master_candidate = False
4097           changed_mc = True
4098           result.append(("master_candidate", "auto-demotion due to drain"))
4099           rrc = self.rpc.call_node_demote_from_mc(node.name)
4100           msg = rrc.fail_msg
4101           if msg:
4102             self.LogWarning("Node failed to demote itself: %s" % msg)
4103         if node.offline:
4104           node.offline = False
4105           result.append(("offline", "clear offline status due to drain"))
4106
4107     # we locked all nodes, we adjust the CP before updating this node
4108     if self.lock_all:
4109       _AdjustCandidatePool(self, [node.name])
4110
4111     # this will trigger configuration file update, if needed
4112     self.cfg.Update(node, feedback_fn)
4113
4114     # this will trigger job queue propagation or cleanup
4115     if changed_mc:
4116       self.context.ReaddNode(node)
4117
4118     return result
4119
4120
4121 class LUPowercycleNode(NoHooksLU):
4122   """Powercycles a node.
4123
4124   """
4125   _OP_PARAMS = [
4126     _PNodeName,
4127     _PForce,
4128     ]
4129   REQ_BGL = False
4130
4131   def CheckArguments(self):
4132     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4133     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4134       raise errors.OpPrereqError("The node is the master and the force"
4135                                  " parameter was not set",
4136                                  errors.ECODE_INVAL)
4137
4138   def ExpandNames(self):
4139     """Locking for PowercycleNode.
4140
4141     This is a last-resort option and shouldn't block on other
4142     jobs. Therefore, we grab no locks.
4143
4144     """
4145     self.needed_locks = {}
4146
4147   def Exec(self, feedback_fn):
4148     """Reboots a node.
4149
4150     """
4151     result = self.rpc.call_node_powercycle(self.op.node_name,
4152                                            self.cfg.GetHypervisorType())
4153     result.Raise("Failed to schedule the reboot")
4154     return result.payload
4155
4156
4157 class LUQueryClusterInfo(NoHooksLU):
4158   """Query cluster configuration.
4159
4160   """
4161   REQ_BGL = False
4162
4163   def ExpandNames(self):
4164     self.needed_locks = {}
4165
4166   def Exec(self, feedback_fn):
4167     """Return cluster config.
4168
4169     """
4170     cluster = self.cfg.GetClusterInfo()
4171     os_hvp = {}
4172
4173     # Filter just for enabled hypervisors
4174     for os_name, hv_dict in cluster.os_hvp.items():
4175       os_hvp[os_name] = {}
4176       for hv_name, hv_params in hv_dict.items():
4177         if hv_name in cluster.enabled_hypervisors:
4178           os_hvp[os_name][hv_name] = hv_params
4179
4180     # Convert ip_family to ip_version
4181     primary_ip_version = constants.IP4_VERSION
4182     if cluster.primary_ip_family == netutils.IP6Address.family:
4183       primary_ip_version = constants.IP6_VERSION
4184
4185     result = {
4186       "software_version": constants.RELEASE_VERSION,
4187       "protocol_version": constants.PROTOCOL_VERSION,
4188       "config_version": constants.CONFIG_VERSION,
4189       "os_api_version": max(constants.OS_API_VERSIONS),
4190       "export_version": constants.EXPORT_VERSION,
4191       "architecture": (platform.architecture()[0], platform.machine()),
4192       "name": cluster.cluster_name,
4193       "master": cluster.master_node,
4194       "default_hypervisor": cluster.enabled_hypervisors[0],
4195       "enabled_hypervisors": cluster.enabled_hypervisors,
4196       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4197                         for hypervisor_name in cluster.enabled_hypervisors]),
4198       "os_hvp": os_hvp,
4199       "beparams": cluster.beparams,
4200       "osparams": cluster.osparams,
4201       "nicparams": cluster.nicparams,
4202       "candidate_pool_size": cluster.candidate_pool_size,
4203       "master_netdev": cluster.master_netdev,
4204       "volume_group_name": cluster.volume_group_name,
4205       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4206       "file_storage_dir": cluster.file_storage_dir,
4207       "maintain_node_health": cluster.maintain_node_health,
4208       "ctime": cluster.ctime,
4209       "mtime": cluster.mtime,
4210       "uuid": cluster.uuid,
4211       "tags": list(cluster.GetTags()),
4212       "uid_pool": cluster.uid_pool,
4213       "default_iallocator": cluster.default_iallocator,
4214       "reserved_lvs": cluster.reserved_lvs,
4215       "primary_ip_version": primary_ip_version,
4216       }
4217
4218     return result
4219
4220
4221 class LUQueryConfigValues(NoHooksLU):
4222   """Return configuration values.
4223
4224   """
4225   _OP_PARAMS = [_POutputFields]
4226   REQ_BGL = False
4227   _FIELDS_DYNAMIC = utils.FieldSet()
4228   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4229                                   "watcher_pause")
4230
4231   def CheckArguments(self):
4232     _CheckOutputFields(static=self._FIELDS_STATIC,
4233                        dynamic=self._FIELDS_DYNAMIC,
4234                        selected=self.op.output_fields)
4235
4236   def ExpandNames(self):
4237     self.needed_locks = {}
4238
4239   def Exec(self, feedback_fn):
4240     """Dump a representation of the cluster config to the standard output.
4241
4242     """
4243     values = []
4244     for field in self.op.output_fields:
4245       if field == "cluster_name":
4246         entry = self.cfg.GetClusterName()
4247       elif field == "master_node":
4248         entry = self.cfg.GetMasterNode()
4249       elif field == "drain_flag":
4250         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4251       elif field == "watcher_pause":
4252         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4253       else:
4254         raise errors.ParameterError(field)
4255       values.append(entry)
4256     return values
4257
4258
4259 class LUActivateInstanceDisks(NoHooksLU):
4260   """Bring up an instance's disks.
4261
4262   """
4263   _OP_PARAMS = [
4264     _PInstanceName,
4265     ("ignore_size", False, _TBool),
4266     ]
4267   REQ_BGL = False
4268
4269   def ExpandNames(self):
4270     self._ExpandAndLockInstance()
4271     self.needed_locks[locking.LEVEL_NODE] = []
4272     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4273
4274   def DeclareLocks(self, level):
4275     if level == locking.LEVEL_NODE:
4276       self._LockInstancesNodes()
4277
4278   def CheckPrereq(self):
4279     """Check prerequisites.
4280
4281     This checks that the instance is in the cluster.
4282
4283     """
4284     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4285     assert self.instance is not None, \
4286       "Cannot retrieve locked instance %s" % self.op.instance_name
4287     _CheckNodeOnline(self, self.instance.primary_node)
4288
4289   def Exec(self, feedback_fn):
4290     """Activate the disks.
4291
4292     """
4293     disks_ok, disks_info = \
4294               _AssembleInstanceDisks(self, self.instance,
4295                                      ignore_size=self.op.ignore_size)
4296     if not disks_ok:
4297       raise errors.OpExecError("Cannot activate block devices")
4298
4299     return disks_info
4300
4301
4302 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4303                            ignore_size=False):
4304   """Prepare the block devices for an instance.
4305
4306   This sets up the block devices on all nodes.
4307
4308   @type lu: L{LogicalUnit}
4309   @param lu: the logical unit on whose behalf we execute
4310   @type instance: L{objects.Instance}
4311   @param instance: the instance for whose disks we assemble
4312   @type disks: list of L{objects.Disk} or None
4313   @param disks: which disks to assemble (or all, if None)
4314   @type ignore_secondaries: boolean
4315   @param ignore_secondaries: if true, errors on secondary nodes
4316       won't result in an error return from the function
4317   @type ignore_size: boolean
4318   @param ignore_size: if true, the current known size of the disk
4319       will not be used during the disk activation, useful for cases
4320       when the size is wrong
4321   @return: False if the operation failed, otherwise a list of
4322       (host, instance_visible_name, node_visible_name)
4323       with the mapping from node devices to instance devices
4324
4325   """
4326   device_info = []
4327   disks_ok = True
4328   iname = instance.name
4329   disks = _ExpandCheckDisks(instance, disks)
4330
4331   # With the two passes mechanism we try to reduce the window of
4332   # opportunity for the race condition of switching DRBD to primary
4333   # before handshaking occured, but we do not eliminate it
4334
4335   # The proper fix would be to wait (with some limits) until the
4336   # connection has been made and drbd transitions from WFConnection
4337   # into any other network-connected state (Connected, SyncTarget,
4338   # SyncSource, etc.)
4339
4340   # 1st pass, assemble on all nodes in secondary mode
4341   for inst_disk in disks:
4342     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4343       if ignore_size:
4344         node_disk = node_disk.Copy()
4345         node_disk.UnsetSize()
4346       lu.cfg.SetDiskID(node_disk, node)
4347       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4348       msg = result.fail_msg
4349       if msg:
4350         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4351                            " (is_primary=False, pass=1): %s",
4352                            inst_disk.iv_name, node, msg)
4353         if not ignore_secondaries:
4354           disks_ok = False
4355
4356   # FIXME: race condition on drbd migration to primary
4357
4358   # 2nd pass, do only the primary node
4359   for inst_disk in disks:
4360     dev_path = None
4361
4362     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4363       if node != instance.primary_node:
4364         continue
4365       if ignore_size:
4366         node_disk = node_disk.Copy()
4367         node_disk.UnsetSize()
4368       lu.cfg.SetDiskID(node_disk, node)
4369       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4370       msg = result.fail_msg
4371       if msg:
4372         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4373                            " (is_primary=True, pass=2): %s",
4374                            inst_disk.iv_name, node, msg)
4375         disks_ok = False
4376       else:
4377         dev_path = result.payload
4378
4379     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4380
4381   # leave the disks configured for the primary node
4382   # this is a workaround that would be fixed better by
4383   # improving the logical/physical id handling
4384   for disk in disks:
4385     lu.cfg.SetDiskID(disk, instance.primary_node)
4386
4387   return disks_ok, device_info
4388
4389
4390 def _StartInstanceDisks(lu, instance, force):
4391   """Start the disks of an instance.
4392
4393   """
4394   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4395                                            ignore_secondaries=force)
4396   if not disks_ok:
4397     _ShutdownInstanceDisks(lu, instance)
4398     if force is not None and not force:
4399       lu.proc.LogWarning("", hint="If the message above refers to a"
4400                          " secondary node,"
4401                          " you can retry the operation using '--force'.")
4402     raise errors.OpExecError("Disk consistency error")
4403
4404
4405 class LUDeactivateInstanceDisks(NoHooksLU):
4406   """Shutdown an instance's disks.
4407
4408   """
4409   _OP_PARAMS = [
4410     _PInstanceName,
4411     ]
4412   REQ_BGL = False
4413
4414   def ExpandNames(self):
4415     self._ExpandAndLockInstance()
4416     self.needed_locks[locking.LEVEL_NODE] = []
4417     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4418
4419   def DeclareLocks(self, level):
4420     if level == locking.LEVEL_NODE:
4421       self._LockInstancesNodes()
4422
4423   def CheckPrereq(self):
4424     """Check prerequisites.
4425
4426     This checks that the instance is in the cluster.
4427
4428     """
4429     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4430     assert self.instance is not None, \
4431       "Cannot retrieve locked instance %s" % self.op.instance_name
4432
4433   def Exec(self, feedback_fn):
4434     """Deactivate the disks
4435
4436     """
4437     instance = self.instance
4438     _SafeShutdownInstanceDisks(self, instance)
4439
4440
4441 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4442   """Shutdown block devices of an instance.
4443
4444   This function checks if an instance is running, before calling
4445   _ShutdownInstanceDisks.
4446
4447   """
4448   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4449   _ShutdownInstanceDisks(lu, instance, disks=disks)
4450
4451
4452 def _ExpandCheckDisks(instance, disks):
4453   """Return the instance disks selected by the disks list
4454
4455   @type disks: list of L{objects.Disk} or None
4456   @param disks: selected disks
4457   @rtype: list of L{objects.Disk}
4458   @return: selected instance disks to act on
4459
4460   """
4461   if disks is None:
4462     return instance.disks
4463   else:
4464     if not set(disks).issubset(instance.disks):
4465       raise errors.ProgrammerError("Can only act on disks belonging to the"
4466                                    " target instance")
4467     return disks
4468
4469
4470 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4471   """Shutdown block devices of an instance.
4472
4473   This does the shutdown on all nodes of the instance.
4474
4475   If the ignore_primary is false, errors on the primary node are
4476   ignored.
4477
4478   """
4479   all_result = True
4480   disks = _ExpandCheckDisks(instance, disks)
4481
4482   for disk in disks:
4483     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4484       lu.cfg.SetDiskID(top_disk, node)
4485       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4486       msg = result.fail_msg
4487       if msg:
4488         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4489                       disk.iv_name, node, msg)
4490         if not ignore_primary or node != instance.primary_node:
4491           all_result = False
4492   return all_result
4493
4494
4495 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4496   """Checks if a node has enough free memory.
4497
4498   This function check if a given node has the needed amount of free
4499   memory. In case the node has less memory or we cannot get the
4500   information from the node, this function raise an OpPrereqError
4501   exception.
4502
4503   @type lu: C{LogicalUnit}
4504   @param lu: a logical unit from which we get configuration data
4505   @type node: C{str}
4506   @param node: the node to check
4507   @type reason: C{str}
4508   @param reason: string to use in the error message
4509   @type requested: C{int}
4510   @param requested: the amount of memory in MiB to check for
4511   @type hypervisor_name: C{str}
4512   @param hypervisor_name: the hypervisor to ask for memory stats
4513   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4514       we cannot check the node
4515
4516   """
4517   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4518   nodeinfo[node].Raise("Can't get data from node %s" % node,
4519                        prereq=True, ecode=errors.ECODE_ENVIRON)
4520   free_mem = nodeinfo[node].payload.get('memory_free', None)
4521   if not isinstance(free_mem, int):
4522     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4523                                " was '%s'" % (node, free_mem),
4524                                errors.ECODE_ENVIRON)
4525   if requested > free_mem:
4526     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4527                                " needed %s MiB, available %s MiB" %
4528                                (node, reason, requested, free_mem),
4529                                errors.ECODE_NORES)
4530
4531
4532 def _CheckNodesFreeDisk(lu, nodenames, requested):
4533   """Checks if nodes have enough free disk space in the default VG.
4534
4535   This function check if all given nodes have the needed amount of
4536   free disk. In case any node has less disk or we cannot get the
4537   information from the node, this function raise an OpPrereqError
4538   exception.
4539
4540   @type lu: C{LogicalUnit}
4541   @param lu: a logical unit from which we get configuration data
4542   @type nodenames: C{list}
4543   @param nodenames: the list of node names to check
4544   @type requested: C{int}
4545   @param requested: the amount of disk in MiB to check for
4546   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4547       we cannot check the node
4548
4549   """
4550   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4551                                    lu.cfg.GetHypervisorType())
4552   for node in nodenames:
4553     info = nodeinfo[node]
4554     info.Raise("Cannot get current information from node %s" % node,
4555                prereq=True, ecode=errors.ECODE_ENVIRON)
4556     vg_free = info.payload.get("vg_free", None)
4557     if not isinstance(vg_free, int):
4558       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4559                                  " result was '%s'" % (node, vg_free),
4560                                  errors.ECODE_ENVIRON)
4561     if requested > vg_free:
4562       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4563                                  " required %d MiB, available %d MiB" %
4564                                  (node, requested, vg_free),
4565                                  errors.ECODE_NORES)
4566
4567
4568 class LUStartupInstance(LogicalUnit):
4569   """Starts an instance.
4570
4571   """
4572   HPATH = "instance-start"
4573   HTYPE = constants.HTYPE_INSTANCE
4574   _OP_PARAMS = [
4575     _PInstanceName,
4576     _PForce,
4577     ("hvparams", _EmptyDict, _TDict),
4578     ("beparams", _EmptyDict, _TDict),
4579     ]
4580   REQ_BGL = False
4581
4582   def CheckArguments(self):
4583     # extra beparams
4584     if self.op.beparams:
4585       # fill the beparams dict
4586       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4587
4588   def ExpandNames(self):
4589     self._ExpandAndLockInstance()
4590
4591   def BuildHooksEnv(self):
4592     """Build hooks env.
4593
4594     This runs on master, primary and secondary nodes of the instance.
4595
4596     """
4597     env = {
4598       "FORCE": self.op.force,
4599       }
4600     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4601     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4602     return env, nl, nl
4603
4604   def CheckPrereq(self):
4605     """Check prerequisites.
4606
4607     This checks that the instance is in the cluster.
4608
4609     """
4610     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4611     assert self.instance is not None, \
4612       "Cannot retrieve locked instance %s" % self.op.instance_name
4613
4614     # extra hvparams
4615     if self.op.hvparams:
4616       # check hypervisor parameter syntax (locally)
4617       cluster = self.cfg.GetClusterInfo()
4618       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4619       filled_hvp = cluster.FillHV(instance)
4620       filled_hvp.update(self.op.hvparams)
4621       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4622       hv_type.CheckParameterSyntax(filled_hvp)
4623       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4624
4625     _CheckNodeOnline(self, instance.primary_node)
4626
4627     bep = self.cfg.GetClusterInfo().FillBE(instance)
4628     # check bridges existence
4629     _CheckInstanceBridgesExist(self, instance)
4630
4631     remote_info = self.rpc.call_instance_info(instance.primary_node,
4632                                               instance.name,
4633                                               instance.hypervisor)
4634     remote_info.Raise("Error checking node %s" % instance.primary_node,
4635                       prereq=True, ecode=errors.ECODE_ENVIRON)
4636     if not remote_info.payload: # not running already
4637       _CheckNodeFreeMemory(self, instance.primary_node,
4638                            "starting instance %s" % instance.name,
4639                            bep[constants.BE_MEMORY], instance.hypervisor)
4640
4641   def Exec(self, feedback_fn):
4642     """Start the instance.
4643
4644     """
4645     instance = self.instance
4646     force = self.op.force
4647
4648     self.cfg.MarkInstanceUp(instance.name)
4649
4650     node_current = instance.primary_node
4651
4652     _StartInstanceDisks(self, instance, force)
4653
4654     result = self.rpc.call_instance_start(node_current, instance,
4655                                           self.op.hvparams, self.op.beparams)
4656     msg = result.fail_msg
4657     if msg:
4658       _ShutdownInstanceDisks(self, instance)
4659       raise errors.OpExecError("Could not start instance: %s" % msg)
4660
4661
4662 class LURebootInstance(LogicalUnit):
4663   """Reboot an instance.
4664
4665   """
4666   HPATH = "instance-reboot"
4667   HTYPE = constants.HTYPE_INSTANCE
4668   _OP_PARAMS = [
4669     _PInstanceName,
4670     ("ignore_secondaries", False, _TBool),
4671     ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4672     _PShutdownTimeout,
4673     ]
4674   REQ_BGL = False
4675
4676   def ExpandNames(self):
4677     self._ExpandAndLockInstance()
4678
4679   def BuildHooksEnv(self):
4680     """Build hooks env.
4681
4682     This runs on master, primary and secondary nodes of the instance.
4683
4684     """
4685     env = {
4686       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4687       "REBOOT_TYPE": self.op.reboot_type,
4688       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4689       }
4690     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4691     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4692     return env, nl, nl
4693
4694   def CheckPrereq(self):
4695     """Check prerequisites.
4696
4697     This checks that the instance is in the cluster.
4698
4699     """
4700     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4701     assert self.instance is not None, \
4702       "Cannot retrieve locked instance %s" % self.op.instance_name
4703
4704     _CheckNodeOnline(self, instance.primary_node)
4705
4706     # check bridges existence
4707     _CheckInstanceBridgesExist(self, instance)
4708
4709   def Exec(self, feedback_fn):
4710     """Reboot the instance.
4711
4712     """
4713     instance = self.instance
4714     ignore_secondaries = self.op.ignore_secondaries
4715     reboot_type = self.op.reboot_type
4716
4717     node_current = instance.primary_node
4718
4719     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4720                        constants.INSTANCE_REBOOT_HARD]:
4721       for disk in instance.disks:
4722         self.cfg.SetDiskID(disk, node_current)
4723       result = self.rpc.call_instance_reboot(node_current, instance,
4724                                              reboot_type,
4725                                              self.op.shutdown_timeout)
4726       result.Raise("Could not reboot instance")
4727     else:
4728       result = self.rpc.call_instance_shutdown(node_current, instance,
4729                                                self.op.shutdown_timeout)
4730       result.Raise("Could not shutdown instance for full reboot")
4731       _ShutdownInstanceDisks(self, instance)
4732       _StartInstanceDisks(self, instance, ignore_secondaries)
4733       result = self.rpc.call_instance_start(node_current, instance, None, None)
4734       msg = result.fail_msg
4735       if msg:
4736         _ShutdownInstanceDisks(self, instance)
4737         raise errors.OpExecError("Could not start instance for"
4738                                  " full reboot: %s" % msg)
4739
4740     self.cfg.MarkInstanceUp(instance.name)
4741
4742
4743 class LUShutdownInstance(LogicalUnit):
4744   """Shutdown an instance.
4745
4746   """
4747   HPATH = "instance-stop"
4748   HTYPE = constants.HTYPE_INSTANCE
4749   _OP_PARAMS = [
4750     _PInstanceName,
4751     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4752     ]
4753   REQ_BGL = False
4754
4755   def ExpandNames(self):
4756     self._ExpandAndLockInstance()
4757
4758   def BuildHooksEnv(self):
4759     """Build hooks env.
4760
4761     This runs on master, primary and secondary nodes of the instance.
4762
4763     """
4764     env = _BuildInstanceHookEnvByObject(self, self.instance)
4765     env["TIMEOUT"] = self.op.timeout
4766     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4767     return env, nl, nl
4768
4769   def CheckPrereq(self):
4770     """Check prerequisites.
4771
4772     This checks that the instance is in the cluster.
4773
4774     """
4775     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4776     assert self.instance is not None, \
4777       "Cannot retrieve locked instance %s" % self.op.instance_name
4778     _CheckNodeOnline(self, self.instance.primary_node)
4779
4780   def Exec(self, feedback_fn):
4781     """Shutdown the instance.
4782
4783     """
4784     instance = self.instance
4785     node_current = instance.primary_node
4786     timeout = self.op.timeout
4787     self.cfg.MarkInstanceDown(instance.name)
4788     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4789     msg = result.fail_msg
4790     if msg:
4791       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4792
4793     _ShutdownInstanceDisks(self, instance)
4794
4795
4796 class LUReinstallInstance(LogicalUnit):
4797   """Reinstall an instance.
4798
4799   """
4800   HPATH = "instance-reinstall"
4801   HTYPE = constants.HTYPE_INSTANCE
4802   _OP_PARAMS = [
4803     _PInstanceName,
4804     ("os_type", None, _TMaybeString),
4805     ("force_variant", False, _TBool),
4806     ]
4807   REQ_BGL = False
4808
4809   def ExpandNames(self):
4810     self._ExpandAndLockInstance()
4811
4812   def BuildHooksEnv(self):
4813     """Build hooks env.
4814
4815     This runs on master, primary and secondary nodes of the instance.
4816
4817     """
4818     env = _BuildInstanceHookEnvByObject(self, self.instance)
4819     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4820     return env, nl, nl
4821
4822   def CheckPrereq(self):
4823     """Check prerequisites.
4824
4825     This checks that the instance is in the cluster and is not running.
4826
4827     """
4828     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4829     assert instance is not None, \
4830       "Cannot retrieve locked instance %s" % self.op.instance_name
4831     _CheckNodeOnline(self, instance.primary_node)
4832
4833     if instance.disk_template == constants.DT_DISKLESS:
4834       raise errors.OpPrereqError("Instance '%s' has no disks" %
4835                                  self.op.instance_name,
4836                                  errors.ECODE_INVAL)
4837     _CheckInstanceDown(self, instance, "cannot reinstall")
4838
4839     if self.op.os_type is not None:
4840       # OS verification
4841       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4842       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4843
4844     self.instance = instance
4845
4846   def Exec(self, feedback_fn):
4847     """Reinstall the instance.
4848
4849     """
4850     inst = self.instance
4851
4852     if self.op.os_type is not None:
4853       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4854       inst.os = self.op.os_type
4855       self.cfg.Update(inst, feedback_fn)
4856
4857     _StartInstanceDisks(self, inst, None)
4858     try:
4859       feedback_fn("Running the instance OS create scripts...")
4860       # FIXME: pass debug option from opcode to backend
4861       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4862                                              self.op.debug_level)
4863       result.Raise("Could not install OS for instance %s on node %s" %
4864                    (inst.name, inst.primary_node))
4865     finally:
4866       _ShutdownInstanceDisks(self, inst)
4867
4868
4869 class LURecreateInstanceDisks(LogicalUnit):
4870   """Recreate an instance's missing disks.
4871
4872   """
4873   HPATH = "instance-recreate-disks"
4874   HTYPE = constants.HTYPE_INSTANCE
4875   _OP_PARAMS = [
4876     _PInstanceName,
4877     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4878     ]
4879   REQ_BGL = False
4880
4881   def ExpandNames(self):
4882     self._ExpandAndLockInstance()
4883
4884   def BuildHooksEnv(self):
4885     """Build hooks env.
4886
4887     This runs on master, primary and secondary nodes of the instance.
4888
4889     """
4890     env = _BuildInstanceHookEnvByObject(self, self.instance)
4891     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4892     return env, nl, nl
4893
4894   def CheckPrereq(self):
4895     """Check prerequisites.
4896
4897     This checks that the instance is in the cluster and is not running.
4898
4899     """
4900     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4901     assert instance is not None, \
4902       "Cannot retrieve locked instance %s" % self.op.instance_name
4903     _CheckNodeOnline(self, instance.primary_node)
4904
4905     if instance.disk_template == constants.DT_DISKLESS:
4906       raise errors.OpPrereqError("Instance '%s' has no disks" %
4907                                  self.op.instance_name, errors.ECODE_INVAL)
4908     _CheckInstanceDown(self, instance, "cannot recreate disks")
4909
4910     if not self.op.disks:
4911       self.op.disks = range(len(instance.disks))
4912     else:
4913       for idx in self.op.disks:
4914         if idx >= len(instance.disks):
4915           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4916                                      errors.ECODE_INVAL)
4917
4918     self.instance = instance
4919
4920   def Exec(self, feedback_fn):
4921     """Recreate the disks.
4922
4923     """
4924     to_skip = []
4925     for idx, _ in enumerate(self.instance.disks):
4926       if idx not in self.op.disks: # disk idx has not been passed in
4927         to_skip.append(idx)
4928         continue
4929
4930     _CreateDisks(self, self.instance, to_skip=to_skip)
4931
4932
4933 class LURenameInstance(LogicalUnit):
4934   """Rename an instance.
4935
4936   """
4937   HPATH = "instance-rename"
4938   HTYPE = constants.HTYPE_INSTANCE
4939   _OP_PARAMS = [
4940     _PInstanceName,
4941     ("new_name", _NoDefault, _TNonEmptyString),
4942     ("ip_check", False, _TBool),
4943     ("name_check", True, _TBool),
4944     ]
4945
4946   def CheckArguments(self):
4947     """Check arguments.
4948
4949     """
4950     if self.op.ip_check and not self.op.name_check:
4951       # TODO: make the ip check more flexible and not depend on the name check
4952       raise errors.OpPrereqError("Cannot do ip check without a name check",
4953                                  errors.ECODE_INVAL)
4954
4955   def BuildHooksEnv(self):
4956     """Build hooks env.
4957
4958     This runs on master, primary and secondary nodes of the instance.
4959
4960     """
4961     env = _BuildInstanceHookEnvByObject(self, self.instance)
4962     env["INSTANCE_NEW_NAME"] = self.op.new_name
4963     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4964     return env, nl, nl
4965
4966   def CheckPrereq(self):
4967     """Check prerequisites.
4968
4969     This checks that the instance is in the cluster and is not running.
4970
4971     """
4972     self.op.instance_name = _ExpandInstanceName(self.cfg,
4973                                                 self.op.instance_name)
4974     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4975     assert instance is not None
4976     _CheckNodeOnline(self, instance.primary_node)
4977     _CheckInstanceDown(self, instance, "cannot rename")
4978     self.instance = instance
4979
4980     new_name = self.op.new_name
4981     if self.op.name_check:
4982       hostname = netutils.GetHostname(name=new_name)
4983       new_name = self.op.new_name = hostname.name
4984       if (self.op.ip_check and
4985           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
4986         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4987                                    (hostname.ip, new_name),
4988                                    errors.ECODE_NOTUNIQUE)
4989
4990     instance_list = self.cfg.GetInstanceList()
4991     if new_name in instance_list:
4992       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4993                                  new_name, errors.ECODE_EXISTS)
4994
4995   def Exec(self, feedback_fn):
4996     """Reinstall the instance.
4997
4998     """
4999     inst = self.instance
5000     old_name = inst.name
5001
5002     if inst.disk_template == constants.DT_FILE:
5003       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5004
5005     self.cfg.RenameInstance(inst.name, self.op.new_name)
5006     # Change the instance lock. This is definitely safe while we hold the BGL
5007     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5008     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5009
5010     # re-read the instance from the configuration after rename
5011     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5012
5013     if inst.disk_template == constants.DT_FILE:
5014       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5015       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5016                                                      old_file_storage_dir,
5017                                                      new_file_storage_dir)
5018       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5019                    " (but the instance has been renamed in Ganeti)" %
5020                    (inst.primary_node, old_file_storage_dir,
5021                     new_file_storage_dir))
5022
5023     _StartInstanceDisks(self, inst, None)
5024     try:
5025       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5026                                                  old_name, self.op.debug_level)
5027       msg = result.fail_msg
5028       if msg:
5029         msg = ("Could not run OS rename script for instance %s on node %s"
5030                " (but the instance has been renamed in Ganeti): %s" %
5031                (inst.name, inst.primary_node, msg))
5032         self.proc.LogWarning(msg)
5033     finally:
5034       _ShutdownInstanceDisks(self, inst)
5035
5036     return inst.name
5037
5038
5039 class LURemoveInstance(LogicalUnit):
5040   """Remove an instance.
5041
5042   """
5043   HPATH = "instance-remove"
5044   HTYPE = constants.HTYPE_INSTANCE
5045   _OP_PARAMS = [
5046     _PInstanceName,
5047     ("ignore_failures", False, _TBool),
5048     _PShutdownTimeout,
5049     ]
5050   REQ_BGL = False
5051
5052   def ExpandNames(self):
5053     self._ExpandAndLockInstance()
5054     self.needed_locks[locking.LEVEL_NODE] = []
5055     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5056
5057   def DeclareLocks(self, level):
5058     if level == locking.LEVEL_NODE:
5059       self._LockInstancesNodes()
5060
5061   def BuildHooksEnv(self):
5062     """Build hooks env.
5063
5064     This runs on master, primary and secondary nodes of the instance.
5065
5066     """
5067     env = _BuildInstanceHookEnvByObject(self, self.instance)
5068     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5069     nl = [self.cfg.GetMasterNode()]
5070     nl_post = list(self.instance.all_nodes) + nl
5071     return env, nl, nl_post
5072
5073   def CheckPrereq(self):
5074     """Check prerequisites.
5075
5076     This checks that the instance is in the cluster.
5077
5078     """
5079     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5080     assert self.instance is not None, \
5081       "Cannot retrieve locked instance %s" % self.op.instance_name
5082
5083   def Exec(self, feedback_fn):
5084     """Remove the instance.
5085
5086     """
5087     instance = self.instance
5088     logging.info("Shutting down instance %s on node %s",
5089                  instance.name, instance.primary_node)
5090
5091     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5092                                              self.op.shutdown_timeout)
5093     msg = result.fail_msg
5094     if msg:
5095       if self.op.ignore_failures:
5096         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5097       else:
5098         raise errors.OpExecError("Could not shutdown instance %s on"
5099                                  " node %s: %s" %
5100                                  (instance.name, instance.primary_node, msg))
5101
5102     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5103
5104
5105 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5106   """Utility function to remove an instance.
5107
5108   """
5109   logging.info("Removing block devices for instance %s", instance.name)
5110
5111   if not _RemoveDisks(lu, instance):
5112     if not ignore_failures:
5113       raise errors.OpExecError("Can't remove instance's disks")
5114     feedback_fn("Warning: can't remove instance's disks")
5115
5116   logging.info("Removing instance %s out of cluster config", instance.name)
5117
5118   lu.cfg.RemoveInstance(instance.name)
5119
5120   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5121     "Instance lock removal conflict"
5122
5123   # Remove lock for the instance
5124   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5125
5126
5127 class LUQueryInstances(NoHooksLU):
5128   """Logical unit for querying instances.
5129
5130   """
5131   # pylint: disable-msg=W0142
5132   _OP_PARAMS = [
5133     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5134     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5135     ("use_locking", False, _TBool),
5136     ]
5137   REQ_BGL = False
5138   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5139                     "serial_no", "ctime", "mtime", "uuid"]
5140   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5141                                     "admin_state",
5142                                     "disk_template", "ip", "mac", "bridge",
5143                                     "nic_mode", "nic_link",
5144                                     "sda_size", "sdb_size", "vcpus", "tags",
5145                                     "network_port", "beparams",
5146                                     r"(disk)\.(size)/([0-9]+)",
5147                                     r"(disk)\.(sizes)", "disk_usage",
5148                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5149                                     r"(nic)\.(bridge)/([0-9]+)",
5150                                     r"(nic)\.(macs|ips|modes|links|bridges)",
5151                                     r"(disk|nic)\.(count)",
5152                                     "hvparams",
5153                                     ] + _SIMPLE_FIELDS +
5154                                   ["hv/%s" % name
5155                                    for name in constants.HVS_PARAMETERS
5156                                    if name not in constants.HVC_GLOBALS] +
5157                                   ["be/%s" % name
5158                                    for name in constants.BES_PARAMETERS])
5159   _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5160                                    "oper_ram",
5161                                    "oper_vcpus",
5162                                    "status")
5163
5164
5165   def CheckArguments(self):
5166     _CheckOutputFields(static=self._FIELDS_STATIC,
5167                        dynamic=self._FIELDS_DYNAMIC,
5168                        selected=self.op.output_fields)
5169
5170   def ExpandNames(self):
5171     self.needed_locks = {}
5172     self.share_locks[locking.LEVEL_INSTANCE] = 1
5173     self.share_locks[locking.LEVEL_NODE] = 1
5174
5175     if self.op.names:
5176       self.wanted = _GetWantedInstances(self, self.op.names)
5177     else:
5178       self.wanted = locking.ALL_SET
5179
5180     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5181     self.do_locking = self.do_node_query and self.op.use_locking
5182     if self.do_locking:
5183       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5184       self.needed_locks[locking.LEVEL_NODE] = []
5185       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5186
5187   def DeclareLocks(self, level):
5188     if level == locking.LEVEL_NODE and self.do_locking:
5189       self._LockInstancesNodes()
5190
5191   def Exec(self, feedback_fn):
5192     """Computes the list of nodes and their attributes.
5193
5194     """
5195     # pylint: disable-msg=R0912
5196     # way too many branches here
5197     all_info = self.cfg.GetAllInstancesInfo()
5198     if self.wanted == locking.ALL_SET:
5199       # caller didn't specify instance names, so ordering is not important
5200       if self.do_locking:
5201         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5202       else:
5203         instance_names = all_info.keys()
5204       instance_names = utils.NiceSort(instance_names)
5205     else:
5206       # caller did specify names, so we must keep the ordering
5207       if self.do_locking:
5208         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5209       else:
5210         tgt_set = all_info.keys()
5211       missing = set(self.wanted).difference(tgt_set)
5212       if missing:
5213         raise errors.OpExecError("Some instances were removed before"
5214                                  " retrieving their data: %s" % missing)
5215       instance_names = self.wanted
5216
5217     instance_list = [all_info[iname] for iname in instance_names]
5218
5219     # begin data gathering
5220
5221     nodes = frozenset([inst.primary_node for inst in instance_list])
5222     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5223
5224     bad_nodes = []
5225     off_nodes = []
5226     if self.do_node_query:
5227       live_data = {}
5228       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5229       for name in nodes:
5230         result = node_data[name]
5231         if result.offline:
5232           # offline nodes will be in both lists
5233           off_nodes.append(name)
5234         if result.fail_msg:
5235           bad_nodes.append(name)
5236         else:
5237           if result.payload:
5238             live_data.update(result.payload)
5239           # else no instance is alive
5240     else:
5241       live_data = dict([(name, {}) for name in instance_names])
5242
5243     # end data gathering
5244
5245     HVPREFIX = "hv/"
5246     BEPREFIX = "be/"
5247     output = []
5248     cluster = self.cfg.GetClusterInfo()
5249     for instance in instance_list:
5250       iout = []
5251       i_hv = cluster.FillHV(instance, skip_globals=True)
5252       i_be = cluster.FillBE(instance)
5253       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5254       for field in self.op.output_fields:
5255         st_match = self._FIELDS_STATIC.Matches(field)
5256         if field in self._SIMPLE_FIELDS:
5257           val = getattr(instance, field)
5258         elif field == "pnode":
5259           val = instance.primary_node
5260         elif field == "snodes":
5261           val = list(instance.secondary_nodes)
5262         elif field == "admin_state":
5263           val = instance.admin_up
5264         elif field == "oper_state":
5265           if instance.primary_node in bad_nodes:
5266             val = None
5267           else:
5268             val = bool(live_data.get(instance.name))
5269         elif field == "status":
5270           if instance.primary_node in off_nodes:
5271             val = "ERROR_nodeoffline"
5272           elif instance.primary_node in bad_nodes:
5273             val = "ERROR_nodedown"
5274           else:
5275             running = bool(live_data.get(instance.name))
5276             if running:
5277               if instance.admin_up:
5278                 val = "running"
5279               else:
5280                 val = "ERROR_up"
5281             else:
5282               if instance.admin_up:
5283                 val = "ERROR_down"
5284               else:
5285                 val = "ADMIN_down"
5286         elif field == "oper_ram":
5287           if instance.primary_node in bad_nodes:
5288             val = None
5289           elif instance.name in live_data:
5290             val = live_data[instance.name].get("memory", "?")
5291           else:
5292             val = "-"
5293         elif field == "oper_vcpus":
5294           if instance.primary_node in bad_nodes:
5295             val = None
5296           elif instance.name in live_data:
5297             val = live_data[instance.name].get("vcpus", "?")
5298           else:
5299             val = "-"
5300         elif field == "vcpus":
5301           val = i_be[constants.BE_VCPUS]
5302         elif field == "disk_template":
5303           val = instance.disk_template
5304         elif field == "ip":
5305           if instance.nics:
5306             val = instance.nics[0].ip
5307           else:
5308             val = None
5309         elif field == "nic_mode":
5310           if instance.nics:
5311             val = i_nicp[0][constants.NIC_MODE]
5312           else:
5313             val = None
5314         elif field == "nic_link":
5315           if instance.nics:
5316             val = i_nicp[0][constants.NIC_LINK]
5317           else:
5318             val = None
5319         elif field == "bridge":
5320           if (instance.nics and
5321               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5322             val = i_nicp[0][constants.NIC_LINK]
5323           else:
5324             val = None
5325         elif field == "mac":
5326           if instance.nics:
5327             val = instance.nics[0].mac
5328           else:
5329             val = None
5330         elif field == "sda_size" or field == "sdb_size":
5331           idx = ord(field[2]) - ord('a')
5332           try:
5333             val = instance.FindDisk(idx).size
5334           except errors.OpPrereqError:
5335             val = None
5336         elif field == "disk_usage": # total disk usage per node
5337           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5338           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5339         elif field == "tags":
5340           val = list(instance.GetTags())
5341         elif field == "hvparams":
5342           val = i_hv
5343         elif (field.startswith(HVPREFIX) and
5344               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5345               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5346           val = i_hv.get(field[len(HVPREFIX):], None)
5347         elif field == "beparams":
5348           val = i_be
5349         elif (field.startswith(BEPREFIX) and
5350               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5351           val = i_be.get(field[len(BEPREFIX):], None)
5352         elif st_match and st_match.groups():
5353           # matches a variable list
5354           st_groups = st_match.groups()
5355           if st_groups and st_groups[0] == "disk":
5356             if st_groups[1] == "count":
5357               val = len(instance.disks)
5358             elif st_groups[1] == "sizes":
5359               val = [disk.size for disk in instance.disks]
5360             elif st_groups[1] == "size":
5361               try:
5362                 val = instance.FindDisk(st_groups[2]).size
5363               except errors.OpPrereqError:
5364                 val = None
5365             else:
5366               assert False, "Unhandled disk parameter"
5367           elif st_groups[0] == "nic":
5368             if st_groups[1] == "count":
5369               val = len(instance.nics)
5370             elif st_groups[1] == "macs":
5371               val = [nic.mac for nic in instance.nics]
5372             elif st_groups[1] == "ips":
5373               val = [nic.ip for nic in instance.nics]
5374             elif st_groups[1] == "modes":
5375               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5376             elif st_groups[1] == "links":
5377               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5378             elif st_groups[1] == "bridges":
5379               val = []
5380               for nicp in i_nicp:
5381                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5382                   val.append(nicp[constants.NIC_LINK])
5383                 else:
5384                   val.append(None)
5385             else:
5386               # index-based item
5387               nic_idx = int(st_groups[2])
5388               if nic_idx >= len(instance.nics):
5389                 val = None
5390               else:
5391                 if st_groups[1] == "mac":
5392                   val = instance.nics[nic_idx].mac
5393                 elif st_groups[1] == "ip":
5394                   val = instance.nics[nic_idx].ip
5395                 elif st_groups[1] == "mode":
5396                   val = i_nicp[nic_idx][constants.NIC_MODE]
5397                 elif st_groups[1] == "link":
5398                   val = i_nicp[nic_idx][constants.NIC_LINK]
5399                 elif st_groups[1] == "bridge":
5400                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5401                   if nic_mode == constants.NIC_MODE_BRIDGED:
5402                     val = i_nicp[nic_idx][constants.NIC_LINK]
5403                   else:
5404                     val = None
5405                 else:
5406                   assert False, "Unhandled NIC parameter"
5407           else:
5408             assert False, ("Declared but unhandled variable parameter '%s'" %
5409                            field)
5410         else:
5411           assert False, "Declared but unhandled parameter '%s'" % field
5412         iout.append(val)
5413       output.append(iout)
5414
5415     return output
5416
5417
5418 class LUFailoverInstance(LogicalUnit):
5419   """Failover an instance.
5420
5421   """
5422   HPATH = "instance-failover"
5423   HTYPE = constants.HTYPE_INSTANCE
5424   _OP_PARAMS = [
5425     _PInstanceName,
5426     ("ignore_consistency", False, _TBool),
5427     _PShutdownTimeout,
5428     ]
5429   REQ_BGL = False
5430
5431   def ExpandNames(self):
5432     self._ExpandAndLockInstance()
5433     self.needed_locks[locking.LEVEL_NODE] = []
5434     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5435
5436   def DeclareLocks(self, level):
5437     if level == locking.LEVEL_NODE:
5438       self._LockInstancesNodes()
5439
5440   def BuildHooksEnv(self):
5441     """Build hooks env.
5442
5443     This runs on master, primary and secondary nodes of the instance.
5444
5445     """
5446     instance = self.instance
5447     source_node = instance.primary_node
5448     target_node = instance.secondary_nodes[0]
5449     env = {
5450       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5451       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5452       "OLD_PRIMARY": source_node,
5453       "OLD_SECONDARY": target_node,
5454       "NEW_PRIMARY": target_node,
5455       "NEW_SECONDARY": source_node,
5456       }
5457     env.update(_BuildInstanceHookEnvByObject(self, instance))
5458     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5459     nl_post = list(nl)
5460     nl_post.append(source_node)
5461     return env, nl, nl_post
5462
5463   def CheckPrereq(self):
5464     """Check prerequisites.
5465
5466     This checks that the instance is in the cluster.
5467
5468     """
5469     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5470     assert self.instance is not None, \
5471       "Cannot retrieve locked instance %s" % self.op.instance_name
5472
5473     bep = self.cfg.GetClusterInfo().FillBE(instance)
5474     if instance.disk_template not in constants.DTS_NET_MIRROR:
5475       raise errors.OpPrereqError("Instance's disk layout is not"
5476                                  " network mirrored, cannot failover.",
5477                                  errors.ECODE_STATE)
5478
5479     secondary_nodes = instance.secondary_nodes
5480     if not secondary_nodes:
5481       raise errors.ProgrammerError("no secondary node but using "
5482                                    "a mirrored disk template")
5483
5484     target_node = secondary_nodes[0]
5485     _CheckNodeOnline(self, target_node)
5486     _CheckNodeNotDrained(self, target_node)
5487     if instance.admin_up:
5488       # check memory requirements on the secondary node
5489       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5490                            instance.name, bep[constants.BE_MEMORY],
5491                            instance.hypervisor)
5492     else:
5493       self.LogInfo("Not checking memory on the secondary node as"
5494                    " instance will not be started")
5495
5496     # check bridge existance
5497     _CheckInstanceBridgesExist(self, instance, node=target_node)
5498
5499   def Exec(self, feedback_fn):
5500     """Failover an instance.
5501
5502     The failover is done by shutting it down on its present node and
5503     starting it on the secondary.
5504
5505     """
5506     instance = self.instance
5507     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5508
5509     source_node = instance.primary_node
5510     target_node = instance.secondary_nodes[0]
5511
5512     if instance.admin_up:
5513       feedback_fn("* checking disk consistency between source and target")
5514       for dev in instance.disks:
5515         # for drbd, these are drbd over lvm
5516         if not _CheckDiskConsistency(self, dev, target_node, False):
5517           if not self.op.ignore_consistency:
5518             raise errors.OpExecError("Disk %s is degraded on target node,"
5519                                      " aborting failover." % dev.iv_name)
5520     else:
5521       feedback_fn("* not checking disk consistency as instance is not running")
5522
5523     feedback_fn("* shutting down instance on source node")
5524     logging.info("Shutting down instance %s on node %s",
5525                  instance.name, source_node)
5526
5527     result = self.rpc.call_instance_shutdown(source_node, instance,
5528                                              self.op.shutdown_timeout)
5529     msg = result.fail_msg
5530     if msg:
5531       if self.op.ignore_consistency or primary_node.offline:
5532         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5533                              " Proceeding anyway. Please make sure node"
5534                              " %s is down. Error details: %s",
5535                              instance.name, source_node, source_node, msg)
5536       else:
5537         raise errors.OpExecError("Could not shutdown instance %s on"
5538                                  " node %s: %s" %
5539                                  (instance.name, source_node, msg))
5540
5541     feedback_fn("* deactivating the instance's disks on source node")
5542     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5543       raise errors.OpExecError("Can't shut down the instance's disks.")
5544
5545     instance.primary_node = target_node
5546     # distribute new instance config to the other nodes
5547     self.cfg.Update(instance, feedback_fn)
5548
5549     # Only start the instance if it's marked as up
5550     if instance.admin_up:
5551       feedback_fn("* activating the instance's disks on target node")
5552       logging.info("Starting instance %s on node %s",
5553                    instance.name, target_node)
5554
5555       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5556                                            ignore_secondaries=True)
5557       if not disks_ok:
5558         _ShutdownInstanceDisks(self, instance)
5559         raise errors.OpExecError("Can't activate the instance's disks")
5560
5561       feedback_fn("* starting the instance on the target node")
5562       result = self.rpc.call_instance_start(target_node, instance, None, None)
5563       msg = result.fail_msg
5564       if msg:
5565         _ShutdownInstanceDisks(self, instance)
5566         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5567                                  (instance.name, target_node, msg))
5568
5569
5570 class LUMigrateInstance(LogicalUnit):
5571   """Migrate an instance.
5572
5573   This is migration without shutting down, compared to the failover,
5574   which is done with shutdown.
5575
5576   """
5577   HPATH = "instance-migrate"
5578   HTYPE = constants.HTYPE_INSTANCE
5579   _OP_PARAMS = [
5580     _PInstanceName,
5581     _PMigrationMode,
5582     _PMigrationLive,
5583     ("cleanup", False, _TBool),
5584     ]
5585
5586   REQ_BGL = False
5587
5588   def ExpandNames(self):
5589     self._ExpandAndLockInstance()
5590
5591     self.needed_locks[locking.LEVEL_NODE] = []
5592     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5593
5594     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5595                                        self.op.cleanup)
5596     self.tasklets = [self._migrater]
5597
5598   def DeclareLocks(self, level):
5599     if level == locking.LEVEL_NODE:
5600       self._LockInstancesNodes()
5601
5602   def BuildHooksEnv(self):
5603     """Build hooks env.
5604
5605     This runs on master, primary and secondary nodes of the instance.
5606
5607     """
5608     instance = self._migrater.instance
5609     source_node = instance.primary_node
5610     target_node = instance.secondary_nodes[0]
5611     env = _BuildInstanceHookEnvByObject(self, instance)
5612     env["MIGRATE_LIVE"] = self._migrater.live
5613     env["MIGRATE_CLEANUP"] = self.op.cleanup
5614     env.update({
5615         "OLD_PRIMARY": source_node,
5616         "OLD_SECONDARY": target_node,
5617         "NEW_PRIMARY": target_node,
5618         "NEW_SECONDARY": source_node,
5619         })
5620     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5621     nl_post = list(nl)
5622     nl_post.append(source_node)
5623     return env, nl, nl_post
5624
5625
5626 class LUMoveInstance(LogicalUnit):
5627   """Move an instance by data-copying.
5628
5629   """
5630   HPATH = "instance-move"
5631   HTYPE = constants.HTYPE_INSTANCE
5632   _OP_PARAMS = [
5633     _PInstanceName,
5634     ("target_node", _NoDefault, _TNonEmptyString),
5635     _PShutdownTimeout,
5636     ]
5637   REQ_BGL = False
5638
5639   def ExpandNames(self):
5640     self._ExpandAndLockInstance()
5641     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5642     self.op.target_node = target_node
5643     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5644     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5645
5646   def DeclareLocks(self, level):
5647     if level == locking.LEVEL_NODE:
5648       self._LockInstancesNodes(primary_only=True)
5649
5650   def BuildHooksEnv(self):
5651     """Build hooks env.
5652
5653     This runs on master, primary and secondary nodes of the instance.
5654
5655     """
5656     env = {
5657       "TARGET_NODE": self.op.target_node,
5658       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5659       }
5660     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5661     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5662                                        self.op.target_node]
5663     return env, nl, nl
5664
5665   def CheckPrereq(self):
5666     """Check prerequisites.
5667
5668     This checks that the instance is in the cluster.
5669
5670     """
5671     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5672     assert self.instance is not None, \
5673       "Cannot retrieve locked instance %s" % self.op.instance_name
5674
5675     node = self.cfg.GetNodeInfo(self.op.target_node)
5676     assert node is not None, \
5677       "Cannot retrieve locked node %s" % self.op.target_node
5678
5679     self.target_node = target_node = node.name
5680
5681     if target_node == instance.primary_node:
5682       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5683                                  (instance.name, target_node),
5684                                  errors.ECODE_STATE)
5685
5686     bep = self.cfg.GetClusterInfo().FillBE(instance)
5687
5688     for idx, dsk in enumerate(instance.disks):
5689       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5690         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5691                                    " cannot copy" % idx, errors.ECODE_STATE)
5692
5693     _CheckNodeOnline(self, target_node)
5694     _CheckNodeNotDrained(self, target_node)
5695
5696     if instance.admin_up:
5697       # check memory requirements on the secondary node
5698       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5699                            instance.name, bep[constants.BE_MEMORY],
5700                            instance.hypervisor)
5701     else:
5702       self.LogInfo("Not checking memory on the secondary node as"
5703                    " instance will not be started")
5704
5705     # check bridge existance
5706     _CheckInstanceBridgesExist(self, instance, node=target_node)
5707
5708   def Exec(self, feedback_fn):
5709     """Move an instance.
5710
5711     The move is done by shutting it down on its present node, copying
5712     the data over (slow) and starting it on the new node.
5713
5714     """
5715     instance = self.instance
5716
5717     source_node = instance.primary_node
5718     target_node = self.target_node
5719
5720     self.LogInfo("Shutting down instance %s on source node %s",
5721                  instance.name, source_node)
5722
5723     result = self.rpc.call_instance_shutdown(source_node, instance,
5724                                              self.op.shutdown_timeout)
5725     msg = result.fail_msg
5726     if msg:
5727       if self.op.ignore_consistency:
5728         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5729                              " Proceeding anyway. Please make sure node"
5730                              " %s is down. Error details: %s",
5731                              instance.name, source_node, source_node, msg)
5732       else:
5733         raise errors.OpExecError("Could not shutdown instance %s on"
5734                                  " node %s: %s" %
5735                                  (instance.name, source_node, msg))
5736
5737     # create the target disks
5738     try:
5739       _CreateDisks(self, instance, target_node=target_node)
5740     except errors.OpExecError:
5741       self.LogWarning("Device creation failed, reverting...")
5742       try:
5743         _RemoveDisks(self, instance, target_node=target_node)
5744       finally:
5745         self.cfg.ReleaseDRBDMinors(instance.name)
5746         raise
5747
5748     cluster_name = self.cfg.GetClusterInfo().cluster_name
5749
5750     errs = []
5751     # activate, get path, copy the data over
5752     for idx, disk in enumerate(instance.disks):
5753       self.LogInfo("Copying data for disk %d", idx)
5754       result = self.rpc.call_blockdev_assemble(target_node, disk,
5755                                                instance.name, True)
5756       if result.fail_msg:
5757         self.LogWarning("Can't assemble newly created disk %d: %s",
5758                         idx, result.fail_msg)
5759         errs.append(result.fail_msg)
5760         break
5761       dev_path = result.payload
5762       result = self.rpc.call_blockdev_export(source_node, disk,
5763                                              target_node, dev_path,
5764                                              cluster_name)
5765       if result.fail_msg:
5766         self.LogWarning("Can't copy data over for disk %d: %s",
5767                         idx, result.fail_msg)
5768         errs.append(result.fail_msg)
5769         break
5770
5771     if errs:
5772       self.LogWarning("Some disks failed to copy, aborting")
5773       try:
5774         _RemoveDisks(self, instance, target_node=target_node)
5775       finally:
5776         self.cfg.ReleaseDRBDMinors(instance.name)
5777         raise errors.OpExecError("Errors during disk copy: %s" %
5778                                  (",".join(errs),))
5779
5780     instance.primary_node = target_node
5781     self.cfg.Update(instance, feedback_fn)
5782
5783     self.LogInfo("Removing the disks on the original node")
5784     _RemoveDisks(self, instance, target_node=source_node)
5785
5786     # Only start the instance if it's marked as up
5787     if instance.admin_up:
5788       self.LogInfo("Starting instance %s on node %s",
5789                    instance.name, target_node)
5790
5791       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5792                                            ignore_secondaries=True)
5793       if not disks_ok:
5794         _ShutdownInstanceDisks(self, instance)
5795         raise errors.OpExecError("Can't activate the instance's disks")
5796
5797       result = self.rpc.call_instance_start(target_node, instance, None, None)
5798       msg = result.fail_msg
5799       if msg:
5800         _ShutdownInstanceDisks(self, instance)
5801         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5802                                  (instance.name, target_node, msg))
5803
5804
5805 class LUMigrateNode(LogicalUnit):
5806   """Migrate all instances from a node.
5807
5808   """
5809   HPATH = "node-migrate"
5810   HTYPE = constants.HTYPE_NODE
5811   _OP_PARAMS = [
5812     _PNodeName,
5813     _PMigrationMode,
5814     _PMigrationLive,
5815     ]
5816   REQ_BGL = False
5817
5818   def ExpandNames(self):
5819     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5820
5821     self.needed_locks = {
5822       locking.LEVEL_NODE: [self.op.node_name],
5823       }
5824
5825     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5826
5827     # Create tasklets for migrating instances for all instances on this node
5828     names = []
5829     tasklets = []
5830
5831     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5832       logging.debug("Migrating instance %s", inst.name)
5833       names.append(inst.name)
5834
5835       tasklets.append(TLMigrateInstance(self, inst.name, False))
5836
5837     self.tasklets = tasklets
5838
5839     # Declare instance locks
5840     self.needed_locks[locking.LEVEL_INSTANCE] = names
5841
5842   def DeclareLocks(self, level):
5843     if level == locking.LEVEL_NODE:
5844       self._LockInstancesNodes()
5845
5846   def BuildHooksEnv(self):
5847     """Build hooks env.
5848
5849     This runs on the master, the primary and all the secondaries.
5850
5851     """
5852     env = {
5853       "NODE_NAME": self.op.node_name,
5854       }
5855
5856     nl = [self.cfg.GetMasterNode()]
5857
5858     return (env, nl, nl)
5859
5860
5861 class TLMigrateInstance(Tasklet):
5862   """Tasklet class for instance migration.
5863
5864   @type live: boolean
5865   @ivar live: whether the migration will be done live or non-live;
5866       this variable is initalized only after CheckPrereq has run
5867
5868   """
5869   def __init__(self, lu, instance_name, cleanup):
5870     """Initializes this class.
5871
5872     """
5873     Tasklet.__init__(self, lu)
5874
5875     # Parameters
5876     self.instance_name = instance_name
5877     self.cleanup = cleanup
5878     self.live = False # will be overridden later
5879
5880   def CheckPrereq(self):
5881     """Check prerequisites.
5882
5883     This checks that the instance is in the cluster.
5884
5885     """
5886     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5887     instance = self.cfg.GetInstanceInfo(instance_name)
5888     assert instance is not None
5889
5890     if instance.disk_template != constants.DT_DRBD8:
5891       raise errors.OpPrereqError("Instance's disk layout is not"
5892                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5893
5894     secondary_nodes = instance.secondary_nodes
5895     if not secondary_nodes:
5896       raise errors.ConfigurationError("No secondary node but using"
5897                                       " drbd8 disk template")
5898
5899     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5900
5901     target_node = secondary_nodes[0]
5902     # check memory requirements on the secondary node
5903     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5904                          instance.name, i_be[constants.BE_MEMORY],
5905                          instance.hypervisor)
5906
5907     # check bridge existance
5908     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5909
5910     if not self.cleanup:
5911       _CheckNodeNotDrained(self.lu, target_node)
5912       result = self.rpc.call_instance_migratable(instance.primary_node,
5913                                                  instance)
5914       result.Raise("Can't migrate, please use failover",
5915                    prereq=True, ecode=errors.ECODE_STATE)
5916
5917     self.instance = instance
5918
5919     if self.lu.op.live is not None and self.lu.op.mode is not None:
5920       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5921                                  " parameters are accepted",
5922                                  errors.ECODE_INVAL)
5923     if self.lu.op.live is not None:
5924       if self.lu.op.live:
5925         self.lu.op.mode = constants.HT_MIGRATION_LIVE
5926       else:
5927         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5928       # reset the 'live' parameter to None so that repeated
5929       # invocations of CheckPrereq do not raise an exception
5930       self.lu.op.live = None
5931     elif self.lu.op.mode is None:
5932       # read the default value from the hypervisor
5933       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5934       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5935
5936     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5937
5938   def _WaitUntilSync(self):
5939     """Poll with custom rpc for disk sync.
5940
5941     This uses our own step-based rpc call.
5942
5943     """
5944     self.feedback_fn("* wait until resync is done")
5945     all_done = False
5946     while not all_done:
5947       all_done = True
5948       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5949                                             self.nodes_ip,
5950                                             self.instance.disks)
5951       min_percent = 100
5952       for node, nres in result.items():
5953         nres.Raise("Cannot resync disks on node %s" % node)
5954         node_done, node_percent = nres.payload
5955         all_done = all_done and node_done
5956         if node_percent is not None:
5957           min_percent = min(min_percent, node_percent)
5958       if not all_done:
5959         if min_percent < 100:
5960           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5961         time.sleep(2)
5962
5963   def _EnsureSecondary(self, node):
5964     """Demote a node to secondary.
5965
5966     """
5967     self.feedback_fn("* switching node %s to secondary mode" % node)
5968
5969     for dev in self.instance.disks:
5970       self.cfg.SetDiskID(dev, node)
5971
5972     result = self.rpc.call_blockdev_close(node, self.instance.name,
5973                                           self.instance.disks)
5974     result.Raise("Cannot change disk to secondary on node %s" % node)
5975
5976   def _GoStandalone(self):
5977     """Disconnect from the network.
5978
5979     """
5980     self.feedback_fn("* changing into standalone mode")
5981     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5982                                                self.instance.disks)
5983     for node, nres in result.items():
5984       nres.Raise("Cannot disconnect disks node %s" % node)
5985
5986   def _GoReconnect(self, multimaster):
5987     """Reconnect to the network.
5988
5989     """
5990     if multimaster:
5991       msg = "dual-master"
5992     else:
5993       msg = "single-master"
5994     self.feedback_fn("* changing disks into %s mode" % msg)
5995     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5996                                            self.instance.disks,
5997                                            self.instance.name, multimaster)
5998     for node, nres in result.items():
5999       nres.Raise("Cannot change disks config on node %s" % node)
6000
6001   def _ExecCleanup(self):
6002     """Try to cleanup after a failed migration.
6003
6004     The cleanup is done by:
6005       - check that the instance is running only on one node
6006         (and update the config if needed)
6007       - change disks on its secondary node to secondary
6008       - wait until disks are fully synchronized
6009       - disconnect from the network
6010       - change disks into single-master mode
6011       - wait again until disks are fully synchronized
6012
6013     """
6014     instance = self.instance
6015     target_node = self.target_node
6016     source_node = self.source_node
6017
6018     # check running on only one node
6019     self.feedback_fn("* checking where the instance actually runs"
6020                      " (if this hangs, the hypervisor might be in"
6021                      " a bad state)")
6022     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6023     for node, result in ins_l.items():
6024       result.Raise("Can't contact node %s" % node)
6025
6026     runningon_source = instance.name in ins_l[source_node].payload
6027     runningon_target = instance.name in ins_l[target_node].payload
6028
6029     if runningon_source and runningon_target:
6030       raise errors.OpExecError("Instance seems to be running on two nodes,"
6031                                " or the hypervisor is confused. You will have"
6032                                " to ensure manually that it runs only on one"
6033                                " and restart this operation.")
6034
6035     if not (runningon_source or runningon_target):
6036       raise errors.OpExecError("Instance does not seem to be running at all."
6037                                " In this case, it's safer to repair by"
6038                                " running 'gnt-instance stop' to ensure disk"
6039                                " shutdown, and then restarting it.")
6040
6041     if runningon_target:
6042       # the migration has actually succeeded, we need to update the config
6043       self.feedback_fn("* instance running on secondary node (%s),"
6044                        " updating config" % target_node)
6045       instance.primary_node = target_node
6046       self.cfg.Update(instance, self.feedback_fn)
6047       demoted_node = source_node
6048     else:
6049       self.feedback_fn("* instance confirmed to be running on its"
6050                        " primary node (%s)" % source_node)
6051       demoted_node = target_node
6052
6053     self._EnsureSecondary(demoted_node)
6054     try:
6055       self._WaitUntilSync()
6056     except errors.OpExecError:
6057       # we ignore here errors, since if the device is standalone, it
6058       # won't be able to sync
6059       pass
6060     self._GoStandalone()
6061     self._GoReconnect(False)
6062     self._WaitUntilSync()
6063
6064     self.feedback_fn("* done")
6065
6066   def _RevertDiskStatus(self):
6067     """Try to revert the disk status after a failed migration.
6068
6069     """
6070     target_node = self.target_node
6071     try:
6072       self._EnsureSecondary(target_node)
6073       self._GoStandalone()
6074       self._GoReconnect(False)
6075       self._WaitUntilSync()
6076     except errors.OpExecError, err:
6077       self.lu.LogWarning("Migration failed and I can't reconnect the"
6078                          " drives: error '%s'\n"
6079                          "Please look and recover the instance status" %
6080                          str(err))
6081
6082   def _AbortMigration(self):
6083     """Call the hypervisor code to abort a started migration.
6084
6085     """
6086     instance = self.instance
6087     target_node = self.target_node
6088     migration_info = self.migration_info
6089
6090     abort_result = self.rpc.call_finalize_migration(target_node,
6091                                                     instance,
6092                                                     migration_info,
6093                                                     False)
6094     abort_msg = abort_result.fail_msg
6095     if abort_msg:
6096       logging.error("Aborting migration failed on target node %s: %s",
6097                     target_node, abort_msg)
6098       # Don't raise an exception here, as we stil have to try to revert the
6099       # disk status, even if this step failed.
6100
6101   def _ExecMigration(self):
6102     """Migrate an instance.
6103
6104     The migrate is done by:
6105       - change the disks into dual-master mode
6106       - wait until disks are fully synchronized again
6107       - migrate the instance
6108       - change disks on the new secondary node (the old primary) to secondary
6109       - wait until disks are fully synchronized
6110       - change disks into single-master mode
6111
6112     """
6113     instance = self.instance
6114     target_node = self.target_node
6115     source_node = self.source_node
6116
6117     self.feedback_fn("* checking disk consistency between source and target")
6118     for dev in instance.disks:
6119       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6120         raise errors.OpExecError("Disk %s is degraded or not fully"
6121                                  " synchronized on target node,"
6122                                  " aborting migrate." % dev.iv_name)
6123
6124     # First get the migration information from the remote node
6125     result = self.rpc.call_migration_info(source_node, instance)
6126     msg = result.fail_msg
6127     if msg:
6128       log_err = ("Failed fetching source migration information from %s: %s" %
6129                  (source_node, msg))
6130       logging.error(log_err)
6131       raise errors.OpExecError(log_err)
6132
6133     self.migration_info = migration_info = result.payload
6134
6135     # Then switch the disks to master/master mode
6136     self._EnsureSecondary(target_node)
6137     self._GoStandalone()
6138     self._GoReconnect(True)
6139     self._WaitUntilSync()
6140
6141     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6142     result = self.rpc.call_accept_instance(target_node,
6143                                            instance,
6144                                            migration_info,
6145                                            self.nodes_ip[target_node])
6146
6147     msg = result.fail_msg
6148     if msg:
6149       logging.error("Instance pre-migration failed, trying to revert"
6150                     " disk status: %s", msg)
6151       self.feedback_fn("Pre-migration failed, aborting")
6152       self._AbortMigration()
6153       self._RevertDiskStatus()
6154       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6155                                (instance.name, msg))
6156
6157     self.feedback_fn("* migrating instance to %s" % target_node)
6158     time.sleep(10)
6159     result = self.rpc.call_instance_migrate(source_node, instance,
6160                                             self.nodes_ip[target_node],
6161                                             self.live)
6162     msg = result.fail_msg
6163     if msg:
6164       logging.error("Instance migration failed, trying to revert"
6165                     " disk status: %s", msg)
6166       self.feedback_fn("Migration failed, aborting")
6167       self._AbortMigration()
6168       self._RevertDiskStatus()
6169       raise errors.OpExecError("Could not migrate instance %s: %s" %
6170                                (instance.name, msg))
6171     time.sleep(10)
6172
6173     instance.primary_node = target_node
6174     # distribute new instance config to the other nodes
6175     self.cfg.Update(instance, self.feedback_fn)
6176
6177     result = self.rpc.call_finalize_migration(target_node,
6178                                               instance,
6179                                               migration_info,
6180                                               True)
6181     msg = result.fail_msg
6182     if msg:
6183       logging.error("Instance migration succeeded, but finalization failed:"
6184                     " %s", msg)
6185       raise errors.OpExecError("Could not finalize instance migration: %s" %
6186                                msg)
6187
6188     self._EnsureSecondary(source_node)
6189     self._WaitUntilSync()
6190     self._GoStandalone()
6191     self._GoReconnect(False)
6192     self._WaitUntilSync()
6193
6194     self.feedback_fn("* done")
6195
6196   def Exec(self, feedback_fn):
6197     """Perform the migration.
6198
6199     """
6200     feedback_fn("Migrating instance %s" % self.instance.name)
6201
6202     self.feedback_fn = feedback_fn
6203
6204     self.source_node = self.instance.primary_node
6205     self.target_node = self.instance.secondary_nodes[0]
6206     self.all_nodes = [self.source_node, self.target_node]
6207     self.nodes_ip = {
6208       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6209       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6210       }
6211
6212     if self.cleanup:
6213       return self._ExecCleanup()
6214     else:
6215       return self._ExecMigration()
6216
6217
6218 def _CreateBlockDev(lu, node, instance, device, force_create,
6219                     info, force_open):
6220   """Create a tree of block devices on a given node.
6221
6222   If this device type has to be created on secondaries, create it and
6223   all its children.
6224
6225   If not, just recurse to children keeping the same 'force' value.
6226
6227   @param lu: the lu on whose behalf we execute
6228   @param node: the node on which to create the device
6229   @type instance: L{objects.Instance}
6230   @param instance: the instance which owns the device
6231   @type device: L{objects.Disk}
6232   @param device: the device to create
6233   @type force_create: boolean
6234   @param force_create: whether to force creation of this device; this
6235       will be change to True whenever we find a device which has
6236       CreateOnSecondary() attribute
6237   @param info: the extra 'metadata' we should attach to the device
6238       (this will be represented as a LVM tag)
6239   @type force_open: boolean
6240   @param force_open: this parameter will be passes to the
6241       L{backend.BlockdevCreate} function where it specifies
6242       whether we run on primary or not, and it affects both
6243       the child assembly and the device own Open() execution
6244
6245   """
6246   if device.CreateOnSecondary():
6247     force_create = True
6248
6249   if device.children:
6250     for child in device.children:
6251       _CreateBlockDev(lu, node, instance, child, force_create,
6252                       info, force_open)
6253
6254   if not force_create:
6255     return
6256
6257   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6258
6259
6260 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6261   """Create a single block device on a given node.
6262
6263   This will not recurse over children of the device, so they must be
6264   created in advance.
6265
6266   @param lu: the lu on whose behalf we execute
6267   @param node: the node on which to create the device
6268   @type instance: L{objects.Instance}
6269   @param instance: the instance which owns the device
6270   @type device: L{objects.Disk}
6271   @param device: the device to create
6272   @param info: the extra 'metadata' we should attach to the device
6273       (this will be represented as a LVM tag)
6274   @type force_open: boolean
6275   @param force_open: this parameter will be passes to the
6276       L{backend.BlockdevCreate} function where it specifies
6277       whether we run on primary or not, and it affects both
6278       the child assembly and the device own Open() execution
6279
6280   """
6281   lu.cfg.SetDiskID(device, node)
6282   result = lu.rpc.call_blockdev_create(node, device, device.size,
6283                                        instance.name, force_open, info)
6284   result.Raise("Can't create block device %s on"
6285                " node %s for instance %s" % (device, node, instance.name))
6286   if device.physical_id is None:
6287     device.physical_id = result.payload
6288
6289
6290 def _GenerateUniqueNames(lu, exts):
6291   """Generate a suitable LV name.
6292
6293   This will generate a logical volume name for the given instance.
6294
6295   """
6296   results = []
6297   for val in exts:
6298     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6299     results.append("%s%s" % (new_id, val))
6300   return results
6301
6302
6303 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6304                          p_minor, s_minor):
6305   """Generate a drbd8 device complete with its children.
6306
6307   """
6308   port = lu.cfg.AllocatePort()
6309   vgname = lu.cfg.GetVGName()
6310   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6311   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6312                           logical_id=(vgname, names[0]))
6313   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6314                           logical_id=(vgname, names[1]))
6315   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6316                           logical_id=(primary, secondary, port,
6317                                       p_minor, s_minor,
6318                                       shared_secret),
6319                           children=[dev_data, dev_meta],
6320                           iv_name=iv_name)
6321   return drbd_dev
6322
6323
6324 def _GenerateDiskTemplate(lu, template_name,
6325                           instance_name, primary_node,
6326                           secondary_nodes, disk_info,
6327                           file_storage_dir, file_driver,
6328                           base_index):
6329   """Generate the entire disk layout for a given template type.
6330
6331   """
6332   #TODO: compute space requirements
6333
6334   vgname = lu.cfg.GetVGName()
6335   disk_count = len(disk_info)
6336   disks = []
6337   if template_name == constants.DT_DISKLESS:
6338     pass
6339   elif template_name == constants.DT_PLAIN:
6340     if len(secondary_nodes) != 0:
6341       raise errors.ProgrammerError("Wrong template configuration")
6342
6343     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6344                                       for i in range(disk_count)])
6345     for idx, disk in enumerate(disk_info):
6346       disk_index = idx + base_index
6347       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6348                               logical_id=(vgname, names[idx]),
6349                               iv_name="disk/%d" % disk_index,
6350                               mode=disk["mode"])
6351       disks.append(disk_dev)
6352   elif template_name == constants.DT_DRBD8:
6353     if len(secondary_nodes) != 1:
6354       raise errors.ProgrammerError("Wrong template configuration")
6355     remote_node = secondary_nodes[0]
6356     minors = lu.cfg.AllocateDRBDMinor(
6357       [primary_node, remote_node] * len(disk_info), instance_name)
6358
6359     names = []
6360     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6361                                                for i in range(disk_count)]):
6362       names.append(lv_prefix + "_data")
6363       names.append(lv_prefix + "_meta")
6364     for idx, disk in enumerate(disk_info):
6365       disk_index = idx + base_index
6366       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6367                                       disk["size"], names[idx*2:idx*2+2],
6368                                       "disk/%d" % disk_index,
6369                                       minors[idx*2], minors[idx*2+1])
6370       disk_dev.mode = disk["mode"]
6371       disks.append(disk_dev)
6372   elif template_name == constants.DT_FILE:
6373     if len(secondary_nodes) != 0:
6374       raise errors.ProgrammerError("Wrong template configuration")
6375
6376     _RequireFileStorage()
6377
6378     for idx, disk in enumerate(disk_info):
6379       disk_index = idx + base_index
6380       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6381                               iv_name="disk/%d" % disk_index,
6382                               logical_id=(file_driver,
6383                                           "%s/disk%d" % (file_storage_dir,
6384                                                          disk_index)),
6385                               mode=disk["mode"])
6386       disks.append(disk_dev)
6387   else:
6388     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6389   return disks
6390
6391
6392 def _GetInstanceInfoText(instance):
6393   """Compute that text that should be added to the disk's metadata.
6394
6395   """
6396   return "originstname+%s" % instance.name
6397
6398
6399 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6400   """Create all disks for an instance.
6401
6402   This abstracts away some work from AddInstance.
6403
6404   @type lu: L{LogicalUnit}
6405   @param lu: the logical unit on whose behalf we execute
6406   @type instance: L{objects.Instance}
6407   @param instance: the instance whose disks we should create
6408   @type to_skip: list
6409   @param to_skip: list of indices to skip
6410   @type target_node: string
6411   @param target_node: if passed, overrides the target node for creation
6412   @rtype: boolean
6413   @return: the success of the creation
6414
6415   """
6416   info = _GetInstanceInfoText(instance)
6417   if target_node is None:
6418     pnode = instance.primary_node
6419     all_nodes = instance.all_nodes
6420   else:
6421     pnode = target_node
6422     all_nodes = [pnode]
6423
6424   if instance.disk_template == constants.DT_FILE:
6425     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6426     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6427
6428     result.Raise("Failed to create directory '%s' on"
6429                  " node %s" % (file_storage_dir, pnode))
6430
6431   # Note: this needs to be kept in sync with adding of disks in
6432   # LUSetInstanceParams
6433   for idx, device in enumerate(instance.disks):
6434     if to_skip and idx in to_skip:
6435       continue
6436     logging.info("Creating volume %s for instance %s",
6437                  device.iv_name, instance.name)
6438     #HARDCODE
6439     for node in all_nodes:
6440       f_create = node == pnode
6441       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6442
6443
6444 def _RemoveDisks(lu, instance, target_node=None):
6445   """Remove all disks for an instance.
6446
6447   This abstracts away some work from `AddInstance()` and
6448   `RemoveInstance()`. Note that in case some of the devices couldn't
6449   be removed, the removal will continue with the other ones (compare
6450   with `_CreateDisks()`).
6451
6452   @type lu: L{LogicalUnit}
6453   @param lu: the logical unit on whose behalf we execute
6454   @type instance: L{objects.Instance}
6455   @param instance: the instance whose disks we should remove
6456   @type target_node: string
6457   @param target_node: used to override the node on which to remove the disks
6458   @rtype: boolean
6459   @return: the success of the removal
6460
6461   """
6462   logging.info("Removing block devices for instance %s", instance.name)
6463
6464   all_result = True
6465   for device in instance.disks:
6466     if target_node:
6467       edata = [(target_node, device)]
6468     else:
6469       edata = device.ComputeNodeTree(instance.primary_node)
6470     for node, disk in edata:
6471       lu.cfg.SetDiskID(disk, node)
6472       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6473       if msg:
6474         lu.LogWarning("Could not remove block device %s on node %s,"
6475                       " continuing anyway: %s", device.iv_name, node, msg)
6476         all_result = False
6477
6478   if instance.disk_template == constants.DT_FILE:
6479     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6480     if target_node:
6481       tgt = target_node
6482     else:
6483       tgt = instance.primary_node
6484     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6485     if result.fail_msg:
6486       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6487                     file_storage_dir, instance.primary_node, result.fail_msg)
6488       all_result = False
6489
6490   return all_result
6491
6492
6493 def _ComputeDiskSize(disk_template, disks):
6494   """Compute disk size requirements in the volume group
6495
6496   """
6497   # Required free disk space as a function of disk and swap space
6498   req_size_dict = {
6499     constants.DT_DISKLESS: None,
6500     constants.DT_PLAIN: sum(d["size"] for d in disks),
6501     # 128 MB are added for drbd metadata for each disk
6502     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6503     constants.DT_FILE: None,
6504   }
6505
6506   if disk_template not in req_size_dict:
6507     raise errors.ProgrammerError("Disk template '%s' size requirement"
6508                                  " is unknown" %  disk_template)
6509
6510   return req_size_dict[disk_template]
6511
6512
6513 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6514   """Hypervisor parameter validation.
6515
6516   This function abstract the hypervisor parameter validation to be
6517   used in both instance create and instance modify.
6518
6519   @type lu: L{LogicalUnit}
6520   @param lu: the logical unit for which we check
6521   @type nodenames: list
6522   @param nodenames: the list of nodes on which we should check
6523   @type hvname: string
6524   @param hvname: the name of the hypervisor we should use
6525   @type hvparams: dict
6526   @param hvparams: the parameters which we need to check
6527   @raise errors.OpPrereqError: if the parameters are not valid
6528
6529   """
6530   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6531                                                   hvname,
6532                                                   hvparams)
6533   for node in nodenames:
6534     info = hvinfo[node]
6535     if info.offline:
6536       continue
6537     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6538
6539
6540 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6541   """OS parameters validation.
6542
6543   @type lu: L{LogicalUnit}
6544   @param lu: the logical unit for which we check
6545   @type required: boolean
6546   @param required: whether the validation should fail if the OS is not
6547       found
6548   @type nodenames: list
6549   @param nodenames: the list of nodes on which we should check
6550   @type osname: string
6551   @param osname: the name of the hypervisor we should use
6552   @type osparams: dict
6553   @param osparams: the parameters which we need to check
6554   @raise errors.OpPrereqError: if the parameters are not valid
6555
6556   """
6557   result = lu.rpc.call_os_validate(required, nodenames, osname,
6558                                    [constants.OS_VALIDATE_PARAMETERS],
6559                                    osparams)
6560   for node, nres in result.items():
6561     # we don't check for offline cases since this should be run only
6562     # against the master node and/or an instance's nodes
6563     nres.Raise("OS Parameters validation failed on node %s" % node)
6564     if not nres.payload:
6565       lu.LogInfo("OS %s not found on node %s, validation skipped",
6566                  osname, node)
6567
6568
6569 class LUCreateInstance(LogicalUnit):
6570   """Create an instance.
6571
6572   """
6573   HPATH = "instance-add"
6574   HTYPE = constants.HTYPE_INSTANCE
6575   _OP_PARAMS = [
6576     _PInstanceName,
6577     ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6578     ("start", True, _TBool),
6579     ("wait_for_sync", True, _TBool),
6580     ("ip_check", True, _TBool),
6581     ("name_check", True, _TBool),
6582     ("disks", _NoDefault, _TListOf(_TDict)),
6583     ("nics", _NoDefault, _TListOf(_TDict)),
6584     ("hvparams", _EmptyDict, _TDict),
6585     ("beparams", _EmptyDict, _TDict),
6586     ("osparams", _EmptyDict, _TDict),
6587     ("no_install", None, _TMaybeBool),
6588     ("os_type", None, _TMaybeString),
6589     ("force_variant", False, _TBool),
6590     ("source_handshake", None, _TOr(_TList, _TNone)),
6591     ("source_x509_ca", None, _TMaybeString),
6592     ("source_instance_name", None, _TMaybeString),
6593     ("src_node", None, _TMaybeString),
6594     ("src_path", None, _TMaybeString),
6595     ("pnode", None, _TMaybeString),
6596     ("snode", None, _TMaybeString),
6597     ("iallocator", None, _TMaybeString),
6598     ("hypervisor", None, _TMaybeString),
6599     ("disk_template", _NoDefault, _CheckDiskTemplate),
6600     ("identify_defaults", False, _TBool),
6601     ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6602     ("file_storage_dir", None, _TMaybeString),
6603     ]
6604   REQ_BGL = False
6605
6606   def CheckArguments(self):
6607     """Check arguments.
6608
6609     """
6610     # do not require name_check to ease forward/backward compatibility
6611     # for tools
6612     if self.op.no_install and self.op.start:
6613       self.LogInfo("No-installation mode selected, disabling startup")
6614       self.op.start = False
6615     # validate/normalize the instance name
6616     self.op.instance_name = \
6617       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6618
6619     if self.op.ip_check and not self.op.name_check:
6620       # TODO: make the ip check more flexible and not depend on the name check
6621       raise errors.OpPrereqError("Cannot do ip check without a name check",
6622                                  errors.ECODE_INVAL)
6623
6624     # check nics' parameter names
6625     for nic in self.op.nics:
6626       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6627
6628     # check disks. parameter names and consistent adopt/no-adopt strategy
6629     has_adopt = has_no_adopt = False
6630     for disk in self.op.disks:
6631       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6632       if "adopt" in disk:
6633         has_adopt = True
6634       else:
6635         has_no_adopt = True
6636     if has_adopt and has_no_adopt:
6637       raise errors.OpPrereqError("Either all disks are adopted or none is",
6638                                  errors.ECODE_INVAL)
6639     if has_adopt:
6640       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6641         raise errors.OpPrereqError("Disk adoption is not supported for the"
6642                                    " '%s' disk template" %
6643                                    self.op.disk_template,
6644                                    errors.ECODE_INVAL)
6645       if self.op.iallocator is not None:
6646         raise errors.OpPrereqError("Disk adoption not allowed with an"
6647                                    " iallocator script", errors.ECODE_INVAL)
6648       if self.op.mode == constants.INSTANCE_IMPORT:
6649         raise errors.OpPrereqError("Disk adoption not allowed for"
6650                                    " instance import", errors.ECODE_INVAL)
6651
6652     self.adopt_disks = has_adopt
6653
6654     # instance name verification
6655     if self.op.name_check:
6656       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6657       self.op.instance_name = self.hostname1.name
6658       # used in CheckPrereq for ip ping check
6659       self.check_ip = self.hostname1.ip
6660     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6661       raise errors.OpPrereqError("Remote imports require names to be checked" %
6662                                  errors.ECODE_INVAL)
6663     else:
6664       self.check_ip = None
6665
6666     # file storage checks
6667     if (self.op.file_driver and
6668         not self.op.file_driver in constants.FILE_DRIVER):
6669       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6670                                  self.op.file_driver, errors.ECODE_INVAL)
6671
6672     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6673       raise errors.OpPrereqError("File storage directory path not absolute",
6674                                  errors.ECODE_INVAL)
6675
6676     ### Node/iallocator related checks
6677     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6678
6679     if self.op.pnode is not None:
6680       if self.op.disk_template in constants.DTS_NET_MIRROR:
6681         if self.op.snode is None:
6682           raise errors.OpPrereqError("The networked disk templates need"
6683                                      " a mirror node", errors.ECODE_INVAL)
6684       elif self.op.snode:
6685         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6686                         " template")
6687         self.op.snode = None
6688
6689     self._cds = _GetClusterDomainSecret()
6690
6691     if self.op.mode == constants.INSTANCE_IMPORT:
6692       # On import force_variant must be True, because if we forced it at
6693       # initial install, our only chance when importing it back is that it
6694       # works again!
6695       self.op.force_variant = True
6696
6697       if self.op.no_install:
6698         self.LogInfo("No-installation mode has no effect during import")
6699
6700     elif self.op.mode == constants.INSTANCE_CREATE:
6701       if self.op.os_type is None:
6702         raise errors.OpPrereqError("No guest OS specified",
6703                                    errors.ECODE_INVAL)
6704       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6705         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6706                                    " installation" % self.op.os_type,
6707                                    errors.ECODE_STATE)
6708       if self.op.disk_template is None:
6709         raise errors.OpPrereqError("No disk template specified",
6710                                    errors.ECODE_INVAL)
6711
6712     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6713       # Check handshake to ensure both clusters have the same domain secret
6714       src_handshake = self.op.source_handshake
6715       if not src_handshake:
6716         raise errors.OpPrereqError("Missing source handshake",
6717                                    errors.ECODE_INVAL)
6718
6719       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6720                                                            src_handshake)
6721       if errmsg:
6722         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6723                                    errors.ECODE_INVAL)
6724
6725       # Load and check source CA
6726       self.source_x509_ca_pem = self.op.source_x509_ca
6727       if not self.source_x509_ca_pem:
6728         raise errors.OpPrereqError("Missing source X509 CA",
6729                                    errors.ECODE_INVAL)
6730
6731       try:
6732         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6733                                                     self._cds)
6734       except OpenSSL.crypto.Error, err:
6735         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6736                                    (err, ), errors.ECODE_INVAL)
6737
6738       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6739       if errcode is not None:
6740         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6741                                    errors.ECODE_INVAL)
6742
6743       self.source_x509_ca = cert
6744
6745       src_instance_name = self.op.source_instance_name
6746       if not src_instance_name:
6747         raise errors.OpPrereqError("Missing source instance name",
6748                                    errors.ECODE_INVAL)
6749
6750       self.source_instance_name = \
6751           netutils.GetHostname(name=src_instance_name).name
6752
6753     else:
6754       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6755                                  self.op.mode, errors.ECODE_INVAL)
6756
6757   def ExpandNames(self):
6758     """ExpandNames for CreateInstance.
6759
6760     Figure out the right locks for instance creation.
6761
6762     """
6763     self.needed_locks = {}
6764
6765     instance_name = self.op.instance_name
6766     # this is just a preventive check, but someone might still add this
6767     # instance in the meantime, and creation will fail at lock-add time
6768     if instance_name in self.cfg.GetInstanceList():
6769       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6770                                  instance_name, errors.ECODE_EXISTS)
6771
6772     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6773
6774     if self.op.iallocator:
6775       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6776     else:
6777       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6778       nodelist = [self.op.pnode]
6779       if self.op.snode is not None:
6780         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6781         nodelist.append(self.op.snode)
6782       self.needed_locks[locking.LEVEL_NODE] = nodelist
6783
6784     # in case of import lock the source node too
6785     if self.op.mode == constants.INSTANCE_IMPORT:
6786       src_node = self.op.src_node
6787       src_path = self.op.src_path
6788
6789       if src_path is None:
6790         self.op.src_path = src_path = self.op.instance_name
6791
6792       if src_node is None:
6793         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6794         self.op.src_node = None
6795         if os.path.isabs(src_path):
6796           raise errors.OpPrereqError("Importing an instance from an absolute"
6797                                      " path requires a source node option.",
6798                                      errors.ECODE_INVAL)
6799       else:
6800         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6801         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6802           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6803         if not os.path.isabs(src_path):
6804           self.op.src_path = src_path = \
6805             utils.PathJoin(constants.EXPORT_DIR, src_path)
6806
6807   def _RunAllocator(self):
6808     """Run the allocator based on input opcode.
6809
6810     """
6811     nics = [n.ToDict() for n in self.nics]
6812     ial = IAllocator(self.cfg, self.rpc,
6813                      mode=constants.IALLOCATOR_MODE_ALLOC,
6814                      name=self.op.instance_name,
6815                      disk_template=self.op.disk_template,
6816                      tags=[],
6817                      os=self.op.os_type,
6818                      vcpus=self.be_full[constants.BE_VCPUS],
6819                      mem_size=self.be_full[constants.BE_MEMORY],
6820                      disks=self.disks,
6821                      nics=nics,
6822                      hypervisor=self.op.hypervisor,
6823                      )
6824
6825     ial.Run(self.op.iallocator)
6826
6827     if not ial.success:
6828       raise errors.OpPrereqError("Can't compute nodes using"
6829                                  " iallocator '%s': %s" %
6830                                  (self.op.iallocator, ial.info),
6831                                  errors.ECODE_NORES)
6832     if len(ial.result) != ial.required_nodes:
6833       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6834                                  " of nodes (%s), required %s" %
6835                                  (self.op.iallocator, len(ial.result),
6836                                   ial.required_nodes), errors.ECODE_FAULT)
6837     self.op.pnode = ial.result[0]
6838     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6839                  self.op.instance_name, self.op.iallocator,
6840                  utils.CommaJoin(ial.result))
6841     if ial.required_nodes == 2:
6842       self.op.snode = ial.result[1]
6843
6844   def BuildHooksEnv(self):
6845     """Build hooks env.
6846
6847     This runs on master, primary and secondary nodes of the instance.
6848
6849     """
6850     env = {
6851       "ADD_MODE": self.op.mode,
6852       }
6853     if self.op.mode == constants.INSTANCE_IMPORT:
6854       env["SRC_NODE"] = self.op.src_node
6855       env["SRC_PATH"] = self.op.src_path
6856       env["SRC_IMAGES"] = self.src_images
6857
6858     env.update(_BuildInstanceHookEnv(
6859       name=self.op.instance_name,
6860       primary_node=self.op.pnode,
6861       secondary_nodes=self.secondaries,
6862       status=self.op.start,
6863       os_type=self.op.os_type,
6864       memory=self.be_full[constants.BE_MEMORY],
6865       vcpus=self.be_full[constants.BE_VCPUS],
6866       nics=_NICListToTuple(self, self.nics),
6867       disk_template=self.op.disk_template,
6868       disks=[(d["size"], d["mode"]) for d in self.disks],
6869       bep=self.be_full,
6870       hvp=self.hv_full,
6871       hypervisor_name=self.op.hypervisor,
6872     ))
6873
6874     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6875           self.secondaries)
6876     return env, nl, nl
6877
6878   def _ReadExportInfo(self):
6879     """Reads the export information from disk.
6880
6881     It will override the opcode source node and path with the actual
6882     information, if these two were not specified before.
6883
6884     @return: the export information
6885
6886     """
6887     assert self.op.mode == constants.INSTANCE_IMPORT
6888
6889     src_node = self.op.src_node
6890     src_path = self.op.src_path
6891
6892     if src_node is None:
6893       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6894       exp_list = self.rpc.call_export_list(locked_nodes)
6895       found = False
6896       for node in exp_list:
6897         if exp_list[node].fail_msg:
6898           continue
6899         if src_path in exp_list[node].payload:
6900           found = True
6901           self.op.src_node = src_node = node
6902           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6903                                                        src_path)
6904           break
6905       if not found:
6906         raise errors.OpPrereqError("No export found for relative path %s" %
6907                                     src_path, errors.ECODE_INVAL)
6908
6909     _CheckNodeOnline(self, src_node)
6910     result = self.rpc.call_export_info(src_node, src_path)
6911     result.Raise("No export or invalid export found in dir %s" % src_path)
6912
6913     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6914     if not export_info.has_section(constants.INISECT_EXP):
6915       raise errors.ProgrammerError("Corrupted export config",
6916                                    errors.ECODE_ENVIRON)
6917
6918     ei_version = export_info.get(constants.INISECT_EXP, "version")
6919     if (int(ei_version) != constants.EXPORT_VERSION):
6920       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6921                                  (ei_version, constants.EXPORT_VERSION),
6922                                  errors.ECODE_ENVIRON)
6923     return export_info
6924
6925   def _ReadExportParams(self, einfo):
6926     """Use export parameters as defaults.
6927
6928     In case the opcode doesn't specify (as in override) some instance
6929     parameters, then try to use them from the export information, if
6930     that declares them.
6931
6932     """
6933     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6934
6935     if self.op.disk_template is None:
6936       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6937         self.op.disk_template = einfo.get(constants.INISECT_INS,
6938                                           "disk_template")
6939       else:
6940         raise errors.OpPrereqError("No disk template specified and the export"
6941                                    " is missing the disk_template information",
6942                                    errors.ECODE_INVAL)
6943
6944     if not self.op.disks:
6945       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6946         disks = []
6947         # TODO: import the disk iv_name too
6948         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6949           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6950           disks.append({"size": disk_sz})
6951         self.op.disks = disks
6952       else:
6953         raise errors.OpPrereqError("No disk info specified and the export"
6954                                    " is missing the disk information",
6955                                    errors.ECODE_INVAL)
6956
6957     if (not self.op.nics and
6958         einfo.has_option(constants.INISECT_INS, "nic_count")):
6959       nics = []
6960       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6961         ndict = {}
6962         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6963           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6964           ndict[name] = v
6965         nics.append(ndict)
6966       self.op.nics = nics
6967
6968     if (self.op.hypervisor is None and
6969         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6970       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6971     if einfo.has_section(constants.INISECT_HYP):
6972       # use the export parameters but do not override the ones
6973       # specified by the user
6974       for name, value in einfo.items(constants.INISECT_HYP):
6975         if name not in self.op.hvparams:
6976           self.op.hvparams[name] = value
6977
6978     if einfo.has_section(constants.INISECT_BEP):
6979       # use the parameters, without overriding
6980       for name, value in einfo.items(constants.INISECT_BEP):
6981         if name not in self.op.beparams:
6982           self.op.beparams[name] = value
6983     else:
6984       # try to read the parameters old style, from the main section
6985       for name in constants.BES_PARAMETERS:
6986         if (name not in self.op.beparams and
6987             einfo.has_option(constants.INISECT_INS, name)):
6988           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6989
6990     if einfo.has_section(constants.INISECT_OSP):
6991       # use the parameters, without overriding
6992       for name, value in einfo.items(constants.INISECT_OSP):
6993         if name not in self.op.osparams:
6994           self.op.osparams[name] = value
6995
6996   def _RevertToDefaults(self, cluster):
6997     """Revert the instance parameters to the default values.
6998
6999     """
7000     # hvparams
7001     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7002     for name in self.op.hvparams.keys():
7003       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7004         del self.op.hvparams[name]
7005     # beparams
7006     be_defs = cluster.SimpleFillBE({})
7007     for name in self.op.beparams.keys():
7008       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7009         del self.op.beparams[name]
7010     # nic params
7011     nic_defs = cluster.SimpleFillNIC({})
7012     for nic in self.op.nics:
7013       for name in constants.NICS_PARAMETERS:
7014         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7015           del nic[name]
7016     # osparams
7017     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7018     for name in self.op.osparams.keys():
7019       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7020         del self.op.osparams[name]
7021
7022   def CheckPrereq(self):
7023     """Check prerequisites.
7024
7025     """
7026     if self.op.mode == constants.INSTANCE_IMPORT:
7027       export_info = self._ReadExportInfo()
7028       self._ReadExportParams(export_info)
7029
7030     _CheckDiskTemplate(self.op.disk_template)
7031
7032     if (not self.cfg.GetVGName() and
7033         self.op.disk_template not in constants.DTS_NOT_LVM):
7034       raise errors.OpPrereqError("Cluster does not support lvm-based"
7035                                  " instances", errors.ECODE_STATE)
7036
7037     if self.op.hypervisor is None:
7038       self.op.hypervisor = self.cfg.GetHypervisorType()
7039
7040     cluster = self.cfg.GetClusterInfo()
7041     enabled_hvs = cluster.enabled_hypervisors
7042     if self.op.hypervisor not in enabled_hvs:
7043       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7044                                  " cluster (%s)" % (self.op.hypervisor,
7045                                   ",".join(enabled_hvs)),
7046                                  errors.ECODE_STATE)
7047
7048     # check hypervisor parameter syntax (locally)
7049     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7050     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7051                                       self.op.hvparams)
7052     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7053     hv_type.CheckParameterSyntax(filled_hvp)
7054     self.hv_full = filled_hvp
7055     # check that we don't specify global parameters on an instance
7056     _CheckGlobalHvParams(self.op.hvparams)
7057
7058     # fill and remember the beparams dict
7059     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7060     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7061
7062     # build os parameters
7063     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7064
7065     # now that hvp/bep are in final format, let's reset to defaults,
7066     # if told to do so
7067     if self.op.identify_defaults:
7068       self._RevertToDefaults(cluster)
7069
7070     # NIC buildup
7071     self.nics = []
7072     for idx, nic in enumerate(self.op.nics):
7073       nic_mode_req = nic.get("mode", None)
7074       nic_mode = nic_mode_req
7075       if nic_mode is None:
7076         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7077
7078       # in routed mode, for the first nic, the default ip is 'auto'
7079       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7080         default_ip_mode = constants.VALUE_AUTO
7081       else:
7082         default_ip_mode = constants.VALUE_NONE
7083
7084       # ip validity checks
7085       ip = nic.get("ip", default_ip_mode)
7086       if ip is None or ip.lower() == constants.VALUE_NONE:
7087         nic_ip = None
7088       elif ip.lower() == constants.VALUE_AUTO:
7089         if not self.op.name_check:
7090           raise errors.OpPrereqError("IP address set to auto but name checks"
7091                                      " have been skipped",
7092                                      errors.ECODE_INVAL)
7093         nic_ip = self.hostname1.ip
7094       else:
7095         if not netutils.IPAddress.IsValid(ip):
7096           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7097                                      errors.ECODE_INVAL)
7098         nic_ip = ip
7099
7100       # TODO: check the ip address for uniqueness
7101       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7102         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7103                                    errors.ECODE_INVAL)
7104
7105       # MAC address verification
7106       mac = nic.get("mac", constants.VALUE_AUTO)
7107       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7108         mac = utils.NormalizeAndValidateMac(mac)
7109
7110         try:
7111           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7112         except errors.ReservationError:
7113           raise errors.OpPrereqError("MAC address %s already in use"
7114                                      " in cluster" % mac,
7115                                      errors.ECODE_NOTUNIQUE)
7116
7117       # bridge verification
7118       bridge = nic.get("bridge", None)
7119       link = nic.get("link", None)
7120       if bridge and link:
7121         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7122                                    " at the same time", errors.ECODE_INVAL)
7123       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7124         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7125                                    errors.ECODE_INVAL)
7126       elif bridge:
7127         link = bridge
7128
7129       nicparams = {}
7130       if nic_mode_req:
7131         nicparams[constants.NIC_MODE] = nic_mode_req
7132       if link:
7133         nicparams[constants.NIC_LINK] = link
7134
7135       check_params = cluster.SimpleFillNIC(nicparams)
7136       objects.NIC.CheckParameterSyntax(check_params)
7137       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7138
7139     # disk checks/pre-build
7140     self.disks = []
7141     for disk in self.op.disks:
7142       mode = disk.get("mode", constants.DISK_RDWR)
7143       if mode not in constants.DISK_ACCESS_SET:
7144         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7145                                    mode, errors.ECODE_INVAL)
7146       size = disk.get("size", None)
7147       if size is None:
7148         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7149       try:
7150         size = int(size)
7151       except (TypeError, ValueError):
7152         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7153                                    errors.ECODE_INVAL)
7154       new_disk = {"size": size, "mode": mode}
7155       if "adopt" in disk:
7156         new_disk["adopt"] = disk["adopt"]
7157       self.disks.append(new_disk)
7158
7159     if self.op.mode == constants.INSTANCE_IMPORT:
7160
7161       # Check that the new instance doesn't have less disks than the export
7162       instance_disks = len(self.disks)
7163       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7164       if instance_disks < export_disks:
7165         raise errors.OpPrereqError("Not enough disks to import."
7166                                    " (instance: %d, export: %d)" %
7167                                    (instance_disks, export_disks),
7168                                    errors.ECODE_INVAL)
7169
7170       disk_images = []
7171       for idx in range(export_disks):
7172         option = 'disk%d_dump' % idx
7173         if export_info.has_option(constants.INISECT_INS, option):
7174           # FIXME: are the old os-es, disk sizes, etc. useful?
7175           export_name = export_info.get(constants.INISECT_INS, option)
7176           image = utils.PathJoin(self.op.src_path, export_name)
7177           disk_images.append(image)
7178         else:
7179           disk_images.append(False)
7180
7181       self.src_images = disk_images
7182
7183       old_name = export_info.get(constants.INISECT_INS, 'name')
7184       try:
7185         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7186       except (TypeError, ValueError), err:
7187         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7188                                    " an integer: %s" % str(err),
7189                                    errors.ECODE_STATE)
7190       if self.op.instance_name == old_name:
7191         for idx, nic in enumerate(self.nics):
7192           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7193             nic_mac_ini = 'nic%d_mac' % idx
7194             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7195
7196     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7197
7198     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7199     if self.op.ip_check:
7200       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7201         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7202                                    (self.check_ip, self.op.instance_name),
7203                                    errors.ECODE_NOTUNIQUE)
7204
7205     #### mac address generation
7206     # By generating here the mac address both the allocator and the hooks get
7207     # the real final mac address rather than the 'auto' or 'generate' value.
7208     # There is a race condition between the generation and the instance object
7209     # creation, which means that we know the mac is valid now, but we're not
7210     # sure it will be when we actually add the instance. If things go bad
7211     # adding the instance will abort because of a duplicate mac, and the
7212     # creation job will fail.
7213     for nic in self.nics:
7214       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7215         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7216
7217     #### allocator run
7218
7219     if self.op.iallocator is not None:
7220       self._RunAllocator()
7221
7222     #### node related checks
7223
7224     # check primary node
7225     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7226     assert self.pnode is not None, \
7227       "Cannot retrieve locked node %s" % self.op.pnode
7228     if pnode.offline:
7229       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7230                                  pnode.name, errors.ECODE_STATE)
7231     if pnode.drained:
7232       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7233                                  pnode.name, errors.ECODE_STATE)
7234
7235     self.secondaries = []
7236
7237     # mirror node verification
7238     if self.op.disk_template in constants.DTS_NET_MIRROR:
7239       if self.op.snode == pnode.name:
7240         raise errors.OpPrereqError("The secondary node cannot be the"
7241                                    " primary node.", errors.ECODE_INVAL)
7242       _CheckNodeOnline(self, self.op.snode)
7243       _CheckNodeNotDrained(self, self.op.snode)
7244       self.secondaries.append(self.op.snode)
7245
7246     nodenames = [pnode.name] + self.secondaries
7247
7248     req_size = _ComputeDiskSize(self.op.disk_template,
7249                                 self.disks)
7250
7251     # Check lv size requirements, if not adopting
7252     if req_size is not None and not self.adopt_disks:
7253       _CheckNodesFreeDisk(self, nodenames, req_size)
7254
7255     if self.adopt_disks: # instead, we must check the adoption data
7256       all_lvs = set([i["adopt"] for i in self.disks])
7257       if len(all_lvs) != len(self.disks):
7258         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7259                                    errors.ECODE_INVAL)
7260       for lv_name in all_lvs:
7261         try:
7262           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7263         except errors.ReservationError:
7264           raise errors.OpPrereqError("LV named %s used by another instance" %
7265                                      lv_name, errors.ECODE_NOTUNIQUE)
7266
7267       node_lvs = self.rpc.call_lv_list([pnode.name],
7268                                        self.cfg.GetVGName())[pnode.name]
7269       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7270       node_lvs = node_lvs.payload
7271       delta = all_lvs.difference(node_lvs.keys())
7272       if delta:
7273         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7274                                    utils.CommaJoin(delta),
7275                                    errors.ECODE_INVAL)
7276       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7277       if online_lvs:
7278         raise errors.OpPrereqError("Online logical volumes found, cannot"
7279                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7280                                    errors.ECODE_STATE)
7281       # update the size of disk based on what is found
7282       for dsk in self.disks:
7283         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7284
7285     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7286
7287     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7288     # check OS parameters (remotely)
7289     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7290
7291     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7292
7293     # memory check on primary node
7294     if self.op.start:
7295       _CheckNodeFreeMemory(self, self.pnode.name,
7296                            "creating instance %s" % self.op.instance_name,
7297                            self.be_full[constants.BE_MEMORY],
7298                            self.op.hypervisor)
7299
7300     self.dry_run_result = list(nodenames)
7301
7302   def Exec(self, feedback_fn):
7303     """Create and add the instance to the cluster.
7304
7305     """
7306     instance = self.op.instance_name
7307     pnode_name = self.pnode.name
7308
7309     ht_kind = self.op.hypervisor
7310     if ht_kind in constants.HTS_REQ_PORT:
7311       network_port = self.cfg.AllocatePort()
7312     else:
7313       network_port = None
7314
7315     if constants.ENABLE_FILE_STORAGE:
7316       # this is needed because os.path.join does not accept None arguments
7317       if self.op.file_storage_dir is None:
7318         string_file_storage_dir = ""
7319       else:
7320         string_file_storage_dir = self.op.file_storage_dir
7321
7322       # build the full file storage dir path
7323       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7324                                         string_file_storage_dir, instance)
7325     else:
7326       file_storage_dir = ""
7327
7328     disks = _GenerateDiskTemplate(self,
7329                                   self.op.disk_template,
7330                                   instance, pnode_name,
7331                                   self.secondaries,
7332                                   self.disks,
7333                                   file_storage_dir,
7334                                   self.op.file_driver,
7335                                   0)
7336
7337     iobj = objects.Instance(name=instance, os=self.op.os_type,
7338                             primary_node=pnode_name,
7339                             nics=self.nics, disks=disks,
7340                             disk_template=self.op.disk_template,
7341                             admin_up=False,
7342                             network_port=network_port,
7343                             beparams=self.op.beparams,
7344                             hvparams=self.op.hvparams,
7345                             hypervisor=self.op.hypervisor,
7346                             osparams=self.op.osparams,
7347                             )
7348
7349     if self.adopt_disks:
7350       # rename LVs to the newly-generated names; we need to construct
7351       # 'fake' LV disks with the old data, plus the new unique_id
7352       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7353       rename_to = []
7354       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7355         rename_to.append(t_dsk.logical_id)
7356         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7357         self.cfg.SetDiskID(t_dsk, pnode_name)
7358       result = self.rpc.call_blockdev_rename(pnode_name,
7359                                              zip(tmp_disks, rename_to))
7360       result.Raise("Failed to rename adoped LVs")
7361     else:
7362       feedback_fn("* creating instance disks...")
7363       try:
7364         _CreateDisks(self, iobj)
7365       except errors.OpExecError:
7366         self.LogWarning("Device creation failed, reverting...")
7367         try:
7368           _RemoveDisks(self, iobj)
7369         finally:
7370           self.cfg.ReleaseDRBDMinors(instance)
7371           raise
7372
7373     feedback_fn("adding instance %s to cluster config" % instance)
7374
7375     self.cfg.AddInstance(iobj, self.proc.GetECId())
7376
7377     # Declare that we don't want to remove the instance lock anymore, as we've
7378     # added the instance to the config
7379     del self.remove_locks[locking.LEVEL_INSTANCE]
7380     # Unlock all the nodes
7381     if self.op.mode == constants.INSTANCE_IMPORT:
7382       nodes_keep = [self.op.src_node]
7383       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7384                        if node != self.op.src_node]
7385       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7386       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7387     else:
7388       self.context.glm.release(locking.LEVEL_NODE)
7389       del self.acquired_locks[locking.LEVEL_NODE]
7390
7391     if self.op.wait_for_sync:
7392       disk_abort = not _WaitForSync(self, iobj)
7393     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7394       # make sure the disks are not degraded (still sync-ing is ok)
7395       time.sleep(15)
7396       feedback_fn("* checking mirrors status")
7397       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7398     else:
7399       disk_abort = False
7400
7401     if disk_abort:
7402       _RemoveDisks(self, iobj)
7403       self.cfg.RemoveInstance(iobj.name)
7404       # Make sure the instance lock gets removed
7405       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7406       raise errors.OpExecError("There are some degraded disks for"
7407                                " this instance")
7408
7409     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7410       if self.op.mode == constants.INSTANCE_CREATE:
7411         if not self.op.no_install:
7412           feedback_fn("* running the instance OS create scripts...")
7413           # FIXME: pass debug option from opcode to backend
7414           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7415                                                  self.op.debug_level)
7416           result.Raise("Could not add os for instance %s"
7417                        " on node %s" % (instance, pnode_name))
7418
7419       elif self.op.mode == constants.INSTANCE_IMPORT:
7420         feedback_fn("* running the instance OS import scripts...")
7421
7422         transfers = []
7423
7424         for idx, image in enumerate(self.src_images):
7425           if not image:
7426             continue
7427
7428           # FIXME: pass debug option from opcode to backend
7429           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7430                                              constants.IEIO_FILE, (image, ),
7431                                              constants.IEIO_SCRIPT,
7432                                              (iobj.disks[idx], idx),
7433                                              None)
7434           transfers.append(dt)
7435
7436         import_result = \
7437           masterd.instance.TransferInstanceData(self, feedback_fn,
7438                                                 self.op.src_node, pnode_name,
7439                                                 self.pnode.secondary_ip,
7440                                                 iobj, transfers)
7441         if not compat.all(import_result):
7442           self.LogWarning("Some disks for instance %s on node %s were not"
7443                           " imported successfully" % (instance, pnode_name))
7444
7445       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7446         feedback_fn("* preparing remote import...")
7447         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7448         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7449
7450         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7451                                                      self.source_x509_ca,
7452                                                      self._cds, timeouts)
7453         if not compat.all(disk_results):
7454           # TODO: Should the instance still be started, even if some disks
7455           # failed to import (valid for local imports, too)?
7456           self.LogWarning("Some disks for instance %s on node %s were not"
7457                           " imported successfully" % (instance, pnode_name))
7458
7459         # Run rename script on newly imported instance
7460         assert iobj.name == instance
7461         feedback_fn("Running rename script for %s" % instance)
7462         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7463                                                    self.source_instance_name,
7464                                                    self.op.debug_level)
7465         if result.fail_msg:
7466           self.LogWarning("Failed to run rename script for %s on node"
7467                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7468
7469       else:
7470         # also checked in the prereq part
7471         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7472                                      % self.op.mode)
7473
7474     if self.op.start:
7475       iobj.admin_up = True
7476       self.cfg.Update(iobj, feedback_fn)
7477       logging.info("Starting instance %s on node %s", instance, pnode_name)
7478       feedback_fn("* starting instance...")
7479       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7480       result.Raise("Could not start instance")
7481
7482     return list(iobj.all_nodes)
7483
7484
7485 class LUConnectConsole(NoHooksLU):
7486   """Connect to an instance's console.
7487
7488   This is somewhat special in that it returns the command line that
7489   you need to run on the master node in order to connect to the
7490   console.
7491
7492   """
7493   _OP_PARAMS = [
7494     _PInstanceName
7495     ]
7496   REQ_BGL = False
7497
7498   def ExpandNames(self):
7499     self._ExpandAndLockInstance()
7500
7501   def CheckPrereq(self):
7502     """Check prerequisites.
7503
7504     This checks that the instance is in the cluster.
7505
7506     """
7507     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7508     assert self.instance is not None, \
7509       "Cannot retrieve locked instance %s" % self.op.instance_name
7510     _CheckNodeOnline(self, self.instance.primary_node)
7511
7512   def Exec(self, feedback_fn):
7513     """Connect to the console of an instance
7514
7515     """
7516     instance = self.instance
7517     node = instance.primary_node
7518
7519     node_insts = self.rpc.call_instance_list([node],
7520                                              [instance.hypervisor])[node]
7521     node_insts.Raise("Can't get node information from %s" % node)
7522
7523     if instance.name not in node_insts.payload:
7524       raise errors.OpExecError("Instance %s is not running." % instance.name)
7525
7526     logging.debug("Connecting to console of %s on %s", instance.name, node)
7527
7528     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7529     cluster = self.cfg.GetClusterInfo()
7530     # beparams and hvparams are passed separately, to avoid editing the
7531     # instance and then saving the defaults in the instance itself.
7532     hvparams = cluster.FillHV(instance)
7533     beparams = cluster.FillBE(instance)
7534     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7535
7536     # build ssh cmdline
7537     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7538
7539
7540 class LUReplaceDisks(LogicalUnit):
7541   """Replace the disks of an instance.
7542
7543   """
7544   HPATH = "mirrors-replace"
7545   HTYPE = constants.HTYPE_INSTANCE
7546   _OP_PARAMS = [
7547     _PInstanceName,
7548     ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7549     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7550     ("remote_node", None, _TMaybeString),
7551     ("iallocator", None, _TMaybeString),
7552     ("early_release", False, _TBool),
7553     ]
7554   REQ_BGL = False
7555
7556   def CheckArguments(self):
7557     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7558                                   self.op.iallocator)
7559
7560   def ExpandNames(self):
7561     self._ExpandAndLockInstance()
7562
7563     if self.op.iallocator is not None:
7564       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7565
7566     elif self.op.remote_node is not None:
7567       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7568       self.op.remote_node = remote_node
7569
7570       # Warning: do not remove the locking of the new secondary here
7571       # unless DRBD8.AddChildren is changed to work in parallel;
7572       # currently it doesn't since parallel invocations of
7573       # FindUnusedMinor will conflict
7574       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7575       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7576
7577     else:
7578       self.needed_locks[locking.LEVEL_NODE] = []
7579       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7580
7581     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7582                                    self.op.iallocator, self.op.remote_node,
7583                                    self.op.disks, False, self.op.early_release)
7584
7585     self.tasklets = [self.replacer]
7586
7587   def DeclareLocks(self, level):
7588     # If we're not already locking all nodes in the set we have to declare the
7589     # instance's primary/secondary nodes.
7590     if (level == locking.LEVEL_NODE and
7591         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7592       self._LockInstancesNodes()
7593
7594   def BuildHooksEnv(self):
7595     """Build hooks env.
7596
7597     This runs on the master, the primary and all the secondaries.
7598
7599     """
7600     instance = self.replacer.instance
7601     env = {
7602       "MODE": self.op.mode,
7603       "NEW_SECONDARY": self.op.remote_node,
7604       "OLD_SECONDARY": instance.secondary_nodes[0],
7605       }
7606     env.update(_BuildInstanceHookEnvByObject(self, instance))
7607     nl = [
7608       self.cfg.GetMasterNode(),
7609       instance.primary_node,
7610       ]
7611     if self.op.remote_node is not None:
7612       nl.append(self.op.remote_node)
7613     return env, nl, nl
7614
7615
7616 class TLReplaceDisks(Tasklet):
7617   """Replaces disks for an instance.
7618
7619   Note: Locking is not within the scope of this class.
7620
7621   """
7622   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7623                disks, delay_iallocator, early_release):
7624     """Initializes this class.
7625
7626     """
7627     Tasklet.__init__(self, lu)
7628
7629     # Parameters
7630     self.instance_name = instance_name
7631     self.mode = mode
7632     self.iallocator_name = iallocator_name
7633     self.remote_node = remote_node
7634     self.disks = disks
7635     self.delay_iallocator = delay_iallocator
7636     self.early_release = early_release
7637
7638     # Runtime data
7639     self.instance = None
7640     self.new_node = None
7641     self.target_node = None
7642     self.other_node = None
7643     self.remote_node_info = None
7644     self.node_secondary_ip = None
7645
7646   @staticmethod
7647   def CheckArguments(mode, remote_node, iallocator):
7648     """Helper function for users of this class.
7649
7650     """
7651     # check for valid parameter combination
7652     if mode == constants.REPLACE_DISK_CHG:
7653       if remote_node is None and iallocator is None:
7654         raise errors.OpPrereqError("When changing the secondary either an"
7655                                    " iallocator script must be used or the"
7656                                    " new node given", errors.ECODE_INVAL)
7657
7658       if remote_node is not None and iallocator is not None:
7659         raise errors.OpPrereqError("Give either the iallocator or the new"
7660                                    " secondary, not both", errors.ECODE_INVAL)
7661
7662     elif remote_node is not None or iallocator is not None:
7663       # Not replacing the secondary
7664       raise errors.OpPrereqError("The iallocator and new node options can"
7665                                  " only be used when changing the"
7666                                  " secondary node", errors.ECODE_INVAL)
7667
7668   @staticmethod
7669   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7670     """Compute a new secondary node using an IAllocator.
7671
7672     """
7673     ial = IAllocator(lu.cfg, lu.rpc,
7674                      mode=constants.IALLOCATOR_MODE_RELOC,
7675                      name=instance_name,
7676                      relocate_from=relocate_from)
7677
7678     ial.Run(iallocator_name)
7679
7680     if not ial.success:
7681       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7682                                  " %s" % (iallocator_name, ial.info),
7683                                  errors.ECODE_NORES)
7684
7685     if len(ial.result) != ial.required_nodes:
7686       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7687                                  " of nodes (%s), required %s" %
7688                                  (iallocator_name,
7689                                   len(ial.result), ial.required_nodes),
7690                                  errors.ECODE_FAULT)
7691
7692     remote_node_name = ial.result[0]
7693
7694     lu.LogInfo("Selected new secondary for instance '%s': %s",
7695                instance_name, remote_node_name)
7696
7697     return remote_node_name
7698
7699   def _FindFaultyDisks(self, node_name):
7700     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7701                                     node_name, True)
7702
7703   def CheckPrereq(self):
7704     """Check prerequisites.
7705
7706     This checks that the instance is in the cluster.
7707
7708     """
7709     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7710     assert instance is not None, \
7711       "Cannot retrieve locked instance %s" % self.instance_name
7712
7713     if instance.disk_template != constants.DT_DRBD8:
7714       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7715                                  " instances", errors.ECODE_INVAL)
7716
7717     if len(instance.secondary_nodes) != 1:
7718       raise errors.OpPrereqError("The instance has a strange layout,"
7719                                  " expected one secondary but found %d" %
7720                                  len(instance.secondary_nodes),
7721                                  errors.ECODE_FAULT)
7722
7723     if not self.delay_iallocator:
7724       self._CheckPrereq2()
7725
7726   def _CheckPrereq2(self):
7727     """Check prerequisites, second part.
7728
7729     This function should always be part of CheckPrereq. It was separated and is
7730     now called from Exec because during node evacuation iallocator was only
7731     called with an unmodified cluster model, not taking planned changes into
7732     account.
7733
7734     """
7735     instance = self.instance
7736     secondary_node = instance.secondary_nodes[0]
7737
7738     if self.iallocator_name is None:
7739       remote_node = self.remote_node
7740     else:
7741       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7742                                        instance.name, instance.secondary_nodes)
7743
7744     if remote_node is not None:
7745       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7746       assert self.remote_node_info is not None, \
7747         "Cannot retrieve locked node %s" % remote_node
7748     else:
7749       self.remote_node_info = None
7750
7751     if remote_node == self.instance.primary_node:
7752       raise errors.OpPrereqError("The specified node is the primary node of"
7753                                  " the instance.", errors.ECODE_INVAL)
7754
7755     if remote_node == secondary_node:
7756       raise errors.OpPrereqError("The specified node is already the"
7757                                  " secondary node of the instance.",
7758                                  errors.ECODE_INVAL)
7759
7760     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7761                                     constants.REPLACE_DISK_CHG):
7762       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7763                                  errors.ECODE_INVAL)
7764
7765     if self.mode == constants.REPLACE_DISK_AUTO:
7766       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7767       faulty_secondary = self._FindFaultyDisks(secondary_node)
7768
7769       if faulty_primary and faulty_secondary:
7770         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7771                                    " one node and can not be repaired"
7772                                    " automatically" % self.instance_name,
7773                                    errors.ECODE_STATE)
7774
7775       if faulty_primary:
7776         self.disks = faulty_primary
7777         self.target_node = instance.primary_node
7778         self.other_node = secondary_node
7779         check_nodes = [self.target_node, self.other_node]
7780       elif faulty_secondary:
7781         self.disks = faulty_secondary
7782         self.target_node = secondary_node
7783         self.other_node = instance.primary_node
7784         check_nodes = [self.target_node, self.other_node]
7785       else:
7786         self.disks = []
7787         check_nodes = []
7788
7789     else:
7790       # Non-automatic modes
7791       if self.mode == constants.REPLACE_DISK_PRI:
7792         self.target_node = instance.primary_node
7793         self.other_node = secondary_node
7794         check_nodes = [self.target_node, self.other_node]
7795
7796       elif self.mode == constants.REPLACE_DISK_SEC:
7797         self.target_node = secondary_node
7798         self.other_node = instance.primary_node
7799         check_nodes = [self.target_node, self.other_node]
7800
7801       elif self.mode == constants.REPLACE_DISK_CHG:
7802         self.new_node = remote_node
7803         self.other_node = instance.primary_node
7804         self.target_node = secondary_node
7805         check_nodes = [self.new_node, self.other_node]
7806
7807         _CheckNodeNotDrained(self.lu, remote_node)
7808
7809         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7810         assert old_node_info is not None
7811         if old_node_info.offline and not self.early_release:
7812           # doesn't make sense to delay the release
7813           self.early_release = True
7814           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7815                           " early-release mode", secondary_node)
7816
7817       else:
7818         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7819                                      self.mode)
7820
7821       # If not specified all disks should be replaced
7822       if not self.disks:
7823         self.disks = range(len(self.instance.disks))
7824
7825     for node in check_nodes:
7826       _CheckNodeOnline(self.lu, node)
7827
7828     # Check whether disks are valid
7829     for disk_idx in self.disks:
7830       instance.FindDisk(disk_idx)
7831
7832     # Get secondary node IP addresses
7833     node_2nd_ip = {}
7834
7835     for node_name in [self.target_node, self.other_node, self.new_node]:
7836       if node_name is not None:
7837         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7838
7839     self.node_secondary_ip = node_2nd_ip
7840
7841   def Exec(self, feedback_fn):
7842     """Execute disk replacement.
7843
7844     This dispatches the disk replacement to the appropriate handler.
7845
7846     """
7847     if self.delay_iallocator:
7848       self._CheckPrereq2()
7849
7850     if not self.disks:
7851       feedback_fn("No disks need replacement")
7852       return
7853
7854     feedback_fn("Replacing disk(s) %s for %s" %
7855                 (utils.CommaJoin(self.disks), self.instance.name))
7856
7857     activate_disks = (not self.instance.admin_up)
7858
7859     # Activate the instance disks if we're replacing them on a down instance
7860     if activate_disks:
7861       _StartInstanceDisks(self.lu, self.instance, True)
7862
7863     try:
7864       # Should we replace the secondary node?
7865       if self.new_node is not None:
7866         fn = self._ExecDrbd8Secondary
7867       else:
7868         fn = self._ExecDrbd8DiskOnly
7869
7870       return fn(feedback_fn)
7871
7872     finally:
7873       # Deactivate the instance disks if we're replacing them on a
7874       # down instance
7875       if activate_disks:
7876         _SafeShutdownInstanceDisks(self.lu, self.instance)
7877
7878   def _CheckVolumeGroup(self, nodes):
7879     self.lu.LogInfo("Checking volume groups")
7880
7881     vgname = self.cfg.GetVGName()
7882
7883     # Make sure volume group exists on all involved nodes
7884     results = self.rpc.call_vg_list(nodes)
7885     if not results:
7886       raise errors.OpExecError("Can't list volume groups on the nodes")
7887
7888     for node in nodes:
7889       res = results[node]
7890       res.Raise("Error checking node %s" % node)
7891       if vgname not in res.payload:
7892         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7893                                  (vgname, node))
7894
7895   def _CheckDisksExistence(self, nodes):
7896     # Check disk existence
7897     for idx, dev in enumerate(self.instance.disks):
7898       if idx not in self.disks:
7899         continue
7900
7901       for node in nodes:
7902         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7903         self.cfg.SetDiskID(dev, node)
7904
7905         result = self.rpc.call_blockdev_find(node, dev)
7906
7907         msg = result.fail_msg
7908         if msg or not result.payload:
7909           if not msg:
7910             msg = "disk not found"
7911           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7912                                    (idx, node, msg))
7913
7914   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7915     for idx, dev in enumerate(self.instance.disks):
7916       if idx not in self.disks:
7917         continue
7918
7919       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7920                       (idx, node_name))
7921
7922       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7923                                    ldisk=ldisk):
7924         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7925                                  " replace disks for instance %s" %
7926                                  (node_name, self.instance.name))
7927
7928   def _CreateNewStorage(self, node_name):
7929     vgname = self.cfg.GetVGName()
7930     iv_names = {}
7931
7932     for idx, dev in enumerate(self.instance.disks):
7933       if idx not in self.disks:
7934         continue
7935
7936       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7937
7938       self.cfg.SetDiskID(dev, node_name)
7939
7940       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7941       names = _GenerateUniqueNames(self.lu, lv_names)
7942
7943       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7944                              logical_id=(vgname, names[0]))
7945       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7946                              logical_id=(vgname, names[1]))
7947
7948       new_lvs = [lv_data, lv_meta]
7949       old_lvs = dev.children
7950       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7951
7952       # we pass force_create=True to force the LVM creation
7953       for new_lv in new_lvs:
7954         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7955                         _GetInstanceInfoText(self.instance), False)
7956
7957     return iv_names
7958
7959   def _CheckDevices(self, node_name, iv_names):
7960     for name, (dev, _, _) in iv_names.iteritems():
7961       self.cfg.SetDiskID(dev, node_name)
7962
7963       result = self.rpc.call_blockdev_find(node_name, dev)
7964
7965       msg = result.fail_msg
7966       if msg or not result.payload:
7967         if not msg:
7968           msg = "disk not found"
7969         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7970                                  (name, msg))
7971
7972       if result.payload.is_degraded:
7973         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7974
7975   def _RemoveOldStorage(self, node_name, iv_names):
7976     for name, (_, old_lvs, _) in iv_names.iteritems():
7977       self.lu.LogInfo("Remove logical volumes for %s" % name)
7978
7979       for lv in old_lvs:
7980         self.cfg.SetDiskID(lv, node_name)
7981
7982         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7983         if msg:
7984           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7985                              hint="remove unused LVs manually")
7986
7987   def _ReleaseNodeLock(self, node_name):
7988     """Releases the lock for a given node."""
7989     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7990
7991   def _ExecDrbd8DiskOnly(self, feedback_fn):
7992     """Replace a disk on the primary or secondary for DRBD 8.
7993
7994     The algorithm for replace is quite complicated:
7995
7996       1. for each disk to be replaced:
7997
7998         1. create new LVs on the target node with unique names
7999         1. detach old LVs from the drbd device
8000         1. rename old LVs to name_replaced.<time_t>
8001         1. rename new LVs to old LVs
8002         1. attach the new LVs (with the old names now) to the drbd device
8003
8004       1. wait for sync across all devices
8005
8006       1. for each modified disk:
8007
8008         1. remove old LVs (which have the name name_replaces.<time_t>)
8009
8010     Failures are not very well handled.
8011
8012     """
8013     steps_total = 6
8014
8015     # Step: check device activation
8016     self.lu.LogStep(1, steps_total, "Check device existence")
8017     self._CheckDisksExistence([self.other_node, self.target_node])
8018     self._CheckVolumeGroup([self.target_node, self.other_node])
8019
8020     # Step: check other node consistency
8021     self.lu.LogStep(2, steps_total, "Check peer consistency")
8022     self._CheckDisksConsistency(self.other_node,
8023                                 self.other_node == self.instance.primary_node,
8024                                 False)
8025
8026     # Step: create new storage
8027     self.lu.LogStep(3, steps_total, "Allocate new storage")
8028     iv_names = self._CreateNewStorage(self.target_node)
8029
8030     # Step: for each lv, detach+rename*2+attach
8031     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8032     for dev, old_lvs, new_lvs in iv_names.itervalues():
8033       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8034
8035       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8036                                                      old_lvs)
8037       result.Raise("Can't detach drbd from local storage on node"
8038                    " %s for device %s" % (self.target_node, dev.iv_name))
8039       #dev.children = []
8040       #cfg.Update(instance)
8041
8042       # ok, we created the new LVs, so now we know we have the needed
8043       # storage; as such, we proceed on the target node to rename
8044       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8045       # using the assumption that logical_id == physical_id (which in
8046       # turn is the unique_id on that node)
8047
8048       # FIXME(iustin): use a better name for the replaced LVs
8049       temp_suffix = int(time.time())
8050       ren_fn = lambda d, suff: (d.physical_id[0],
8051                                 d.physical_id[1] + "_replaced-%s" % suff)
8052
8053       # Build the rename list based on what LVs exist on the node
8054       rename_old_to_new = []
8055       for to_ren in old_lvs:
8056         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8057         if not result.fail_msg and result.payload:
8058           # device exists
8059           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8060
8061       self.lu.LogInfo("Renaming the old LVs on the target node")
8062       result = self.rpc.call_blockdev_rename(self.target_node,
8063                                              rename_old_to_new)
8064       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8065
8066       # Now we rename the new LVs to the old LVs
8067       self.lu.LogInfo("Renaming the new LVs on the target node")
8068       rename_new_to_old = [(new, old.physical_id)
8069                            for old, new in zip(old_lvs, new_lvs)]
8070       result = self.rpc.call_blockdev_rename(self.target_node,
8071                                              rename_new_to_old)
8072       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8073
8074       for old, new in zip(old_lvs, new_lvs):
8075         new.logical_id = old.logical_id
8076         self.cfg.SetDiskID(new, self.target_node)
8077
8078       for disk in old_lvs:
8079         disk.logical_id = ren_fn(disk, temp_suffix)
8080         self.cfg.SetDiskID(disk, self.target_node)
8081
8082       # Now that the new lvs have the old name, we can add them to the device
8083       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8084       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8085                                                   new_lvs)
8086       msg = result.fail_msg
8087       if msg:
8088         for new_lv in new_lvs:
8089           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8090                                                new_lv).fail_msg
8091           if msg2:
8092             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8093                                hint=("cleanup manually the unused logical"
8094                                      "volumes"))
8095         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8096
8097       dev.children = new_lvs
8098
8099       self.cfg.Update(self.instance, feedback_fn)
8100
8101     cstep = 5
8102     if self.early_release:
8103       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8104       cstep += 1
8105       self._RemoveOldStorage(self.target_node, iv_names)
8106       # WARNING: we release both node locks here, do not do other RPCs
8107       # than WaitForSync to the primary node
8108       self._ReleaseNodeLock([self.target_node, self.other_node])
8109
8110     # Wait for sync
8111     # This can fail as the old devices are degraded and _WaitForSync
8112     # does a combined result over all disks, so we don't check its return value
8113     self.lu.LogStep(cstep, steps_total, "Sync devices")
8114     cstep += 1
8115     _WaitForSync(self.lu, self.instance)
8116
8117     # Check all devices manually
8118     self._CheckDevices(self.instance.primary_node, iv_names)
8119
8120     # Step: remove old storage
8121     if not self.early_release:
8122       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8123       cstep += 1
8124       self._RemoveOldStorage(self.target_node, iv_names)
8125
8126   def _ExecDrbd8Secondary(self, feedback_fn):
8127     """Replace the secondary node for DRBD 8.
8128
8129     The algorithm for replace is quite complicated:
8130       - for all disks of the instance:
8131         - create new LVs on the new node with same names
8132         - shutdown the drbd device on the old secondary
8133         - disconnect the drbd network on the primary
8134         - create the drbd device on the new secondary
8135         - network attach the drbd on the primary, using an artifice:
8136           the drbd code for Attach() will connect to the network if it
8137           finds a device which is connected to the good local disks but
8138           not network enabled
8139       - wait for sync across all devices
8140       - remove all disks from the old secondary
8141
8142     Failures are not very well handled.
8143
8144     """
8145     steps_total = 6
8146
8147     # Step: check device activation
8148     self.lu.LogStep(1, steps_total, "Check device existence")
8149     self._CheckDisksExistence([self.instance.primary_node])
8150     self._CheckVolumeGroup([self.instance.primary_node])
8151
8152     # Step: check other node consistency
8153     self.lu.LogStep(2, steps_total, "Check peer consistency")
8154     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8155
8156     # Step: create new storage
8157     self.lu.LogStep(3, steps_total, "Allocate new storage")
8158     for idx, dev in enumerate(self.instance.disks):
8159       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8160                       (self.new_node, idx))
8161       # we pass force_create=True to force LVM creation
8162       for new_lv in dev.children:
8163         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8164                         _GetInstanceInfoText(self.instance), False)
8165
8166     # Step 4: dbrd minors and drbd setups changes
8167     # after this, we must manually remove the drbd minors on both the
8168     # error and the success paths
8169     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8170     minors = self.cfg.AllocateDRBDMinor([self.new_node
8171                                          for dev in self.instance.disks],
8172                                         self.instance.name)
8173     logging.debug("Allocated minors %r", minors)
8174
8175     iv_names = {}
8176     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8177       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8178                       (self.new_node, idx))
8179       # create new devices on new_node; note that we create two IDs:
8180       # one without port, so the drbd will be activated without
8181       # networking information on the new node at this stage, and one
8182       # with network, for the latter activation in step 4
8183       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8184       if self.instance.primary_node == o_node1:
8185         p_minor = o_minor1
8186       else:
8187         assert self.instance.primary_node == o_node2, "Three-node instance?"
8188         p_minor = o_minor2
8189
8190       new_alone_id = (self.instance.primary_node, self.new_node, None,
8191                       p_minor, new_minor, o_secret)
8192       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8193                     p_minor, new_minor, o_secret)
8194
8195       iv_names[idx] = (dev, dev.children, new_net_id)
8196       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8197                     new_net_id)
8198       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8199                               logical_id=new_alone_id,
8200                               children=dev.children,
8201                               size=dev.size)
8202       try:
8203         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8204                               _GetInstanceInfoText(self.instance), False)
8205       except errors.GenericError:
8206         self.cfg.ReleaseDRBDMinors(self.instance.name)
8207         raise
8208
8209     # We have new devices, shutdown the drbd on the old secondary
8210     for idx, dev in enumerate(self.instance.disks):
8211       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8212       self.cfg.SetDiskID(dev, self.target_node)
8213       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8214       if msg:
8215         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8216                            "node: %s" % (idx, msg),
8217                            hint=("Please cleanup this device manually as"
8218                                  " soon as possible"))
8219
8220     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8221     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8222                                                self.node_secondary_ip,
8223                                                self.instance.disks)\
8224                                               [self.instance.primary_node]
8225
8226     msg = result.fail_msg
8227     if msg:
8228       # detaches didn't succeed (unlikely)
8229       self.cfg.ReleaseDRBDMinors(self.instance.name)
8230       raise errors.OpExecError("Can't detach the disks from the network on"
8231                                " old node: %s" % (msg,))
8232
8233     # if we managed to detach at least one, we update all the disks of
8234     # the instance to point to the new secondary
8235     self.lu.LogInfo("Updating instance configuration")
8236     for dev, _, new_logical_id in iv_names.itervalues():
8237       dev.logical_id = new_logical_id
8238       self.cfg.SetDiskID(dev, self.instance.primary_node)
8239
8240     self.cfg.Update(self.instance, feedback_fn)
8241
8242     # and now perform the drbd attach
8243     self.lu.LogInfo("Attaching primary drbds to new secondary"
8244                     " (standalone => connected)")
8245     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8246                                             self.new_node],
8247                                            self.node_secondary_ip,
8248                                            self.instance.disks,
8249                                            self.instance.name,
8250                                            False)
8251     for to_node, to_result in result.items():
8252       msg = to_result.fail_msg
8253       if msg:
8254         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8255                            to_node, msg,
8256                            hint=("please do a gnt-instance info to see the"
8257                                  " status of disks"))
8258     cstep = 5
8259     if self.early_release:
8260       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8261       cstep += 1
8262       self._RemoveOldStorage(self.target_node, iv_names)
8263       # WARNING: we release all node locks here, do not do other RPCs
8264       # than WaitForSync to the primary node
8265       self._ReleaseNodeLock([self.instance.primary_node,
8266                              self.target_node,
8267                              self.new_node])
8268
8269     # Wait for sync
8270     # This can fail as the old devices are degraded and _WaitForSync
8271     # does a combined result over all disks, so we don't check its return value
8272     self.lu.LogStep(cstep, steps_total, "Sync devices")
8273     cstep += 1
8274     _WaitForSync(self.lu, self.instance)
8275
8276     # Check all devices manually
8277     self._CheckDevices(self.instance.primary_node, iv_names)
8278
8279     # Step: remove old storage
8280     if not self.early_release:
8281       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8282       self._RemoveOldStorage(self.target_node, iv_names)
8283
8284
8285 class LURepairNodeStorage(NoHooksLU):
8286   """Repairs the volume group on a node.
8287
8288   """
8289   _OP_PARAMS = [
8290     _PNodeName,
8291     ("storage_type", _NoDefault, _CheckStorageType),
8292     ("name", _NoDefault, _TNonEmptyString),
8293     ("ignore_consistency", False, _TBool),
8294     ]
8295   REQ_BGL = False
8296
8297   def CheckArguments(self):
8298     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8299
8300     storage_type = self.op.storage_type
8301
8302     if (constants.SO_FIX_CONSISTENCY not in
8303         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8304       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8305                                  " repaired" % storage_type,
8306                                  errors.ECODE_INVAL)
8307
8308   def ExpandNames(self):
8309     self.needed_locks = {
8310       locking.LEVEL_NODE: [self.op.node_name],
8311       }
8312
8313   def _CheckFaultyDisks(self, instance, node_name):
8314     """Ensure faulty disks abort the opcode or at least warn."""
8315     try:
8316       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8317                                   node_name, True):
8318         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8319                                    " node '%s'" % (instance.name, node_name),
8320                                    errors.ECODE_STATE)
8321     except errors.OpPrereqError, err:
8322       if self.op.ignore_consistency:
8323         self.proc.LogWarning(str(err.args[0]))
8324       else:
8325         raise
8326
8327   def CheckPrereq(self):
8328     """Check prerequisites.
8329
8330     """
8331     # Check whether any instance on this node has faulty disks
8332     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8333       if not inst.admin_up:
8334         continue
8335       check_nodes = set(inst.all_nodes)
8336       check_nodes.discard(self.op.node_name)
8337       for inst_node_name in check_nodes:
8338         self._CheckFaultyDisks(inst, inst_node_name)
8339
8340   def Exec(self, feedback_fn):
8341     feedback_fn("Repairing storage unit '%s' on %s ..." %
8342                 (self.op.name, self.op.node_name))
8343
8344     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8345     result = self.rpc.call_storage_execute(self.op.node_name,
8346                                            self.op.storage_type, st_args,
8347                                            self.op.name,
8348                                            constants.SO_FIX_CONSISTENCY)
8349     result.Raise("Failed to repair storage unit '%s' on %s" %
8350                  (self.op.name, self.op.node_name))
8351
8352
8353 class LUNodeEvacuationStrategy(NoHooksLU):
8354   """Computes the node evacuation strategy.
8355
8356   """
8357   _OP_PARAMS = [
8358     ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8359     ("remote_node", None, _TMaybeString),
8360     ("iallocator", None, _TMaybeString),
8361     ]
8362   REQ_BGL = False
8363
8364   def CheckArguments(self):
8365     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8366
8367   def ExpandNames(self):
8368     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8369     self.needed_locks = locks = {}
8370     if self.op.remote_node is None:
8371       locks[locking.LEVEL_NODE] = locking.ALL_SET
8372     else:
8373       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8374       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8375
8376   def Exec(self, feedback_fn):
8377     if self.op.remote_node is not None:
8378       instances = []
8379       for node in self.op.nodes:
8380         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8381       result = []
8382       for i in instances:
8383         if i.primary_node == self.op.remote_node:
8384           raise errors.OpPrereqError("Node %s is the primary node of"
8385                                      " instance %s, cannot use it as"
8386                                      " secondary" %
8387                                      (self.op.remote_node, i.name),
8388                                      errors.ECODE_INVAL)
8389         result.append([i.name, self.op.remote_node])
8390     else:
8391       ial = IAllocator(self.cfg, self.rpc,
8392                        mode=constants.IALLOCATOR_MODE_MEVAC,
8393                        evac_nodes=self.op.nodes)
8394       ial.Run(self.op.iallocator, validate=True)
8395       if not ial.success:
8396         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8397                                  errors.ECODE_NORES)
8398       result = ial.result
8399     return result
8400
8401
8402 class LUGrowDisk(LogicalUnit):
8403   """Grow a disk of an instance.
8404
8405   """
8406   HPATH = "disk-grow"
8407   HTYPE = constants.HTYPE_INSTANCE
8408   _OP_PARAMS = [
8409     _PInstanceName,
8410     ("disk", _NoDefault, _TInt),
8411     ("amount", _NoDefault, _TInt),
8412     ("wait_for_sync", True, _TBool),
8413     ]
8414   REQ_BGL = False
8415
8416   def ExpandNames(self):
8417     self._ExpandAndLockInstance()
8418     self.needed_locks[locking.LEVEL_NODE] = []
8419     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8420
8421   def DeclareLocks(self, level):
8422     if level == locking.LEVEL_NODE:
8423       self._LockInstancesNodes()
8424
8425   def BuildHooksEnv(self):
8426     """Build hooks env.
8427
8428     This runs on the master, the primary and all the secondaries.
8429
8430     """
8431     env = {
8432       "DISK": self.op.disk,
8433       "AMOUNT": self.op.amount,
8434       }
8435     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8436     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8437     return env, nl, nl
8438
8439   def CheckPrereq(self):
8440     """Check prerequisites.
8441
8442     This checks that the instance is in the cluster.
8443
8444     """
8445     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8446     assert instance is not None, \
8447       "Cannot retrieve locked instance %s" % self.op.instance_name
8448     nodenames = list(instance.all_nodes)
8449     for node in nodenames:
8450       _CheckNodeOnline(self, node)
8451
8452     self.instance = instance
8453
8454     if instance.disk_template not in constants.DTS_GROWABLE:
8455       raise errors.OpPrereqError("Instance's disk layout does not support"
8456                                  " growing.", errors.ECODE_INVAL)
8457
8458     self.disk = instance.FindDisk(self.op.disk)
8459
8460     if instance.disk_template != constants.DT_FILE:
8461       # TODO: check the free disk space for file, when that feature will be
8462       # supported
8463       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8464
8465   def Exec(self, feedback_fn):
8466     """Execute disk grow.
8467
8468     """
8469     instance = self.instance
8470     disk = self.disk
8471
8472     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8473     if not disks_ok:
8474       raise errors.OpExecError("Cannot activate block device to grow")
8475
8476     for node in instance.all_nodes:
8477       self.cfg.SetDiskID(disk, node)
8478       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8479       result.Raise("Grow request failed to node %s" % node)
8480
8481       # TODO: Rewrite code to work properly
8482       # DRBD goes into sync mode for a short amount of time after executing the
8483       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8484       # calling "resize" in sync mode fails. Sleeping for a short amount of
8485       # time is a work-around.
8486       time.sleep(5)
8487
8488     disk.RecordGrow(self.op.amount)
8489     self.cfg.Update(instance, feedback_fn)
8490     if self.op.wait_for_sync:
8491       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8492       if disk_abort:
8493         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8494                              " status.\nPlease check the instance.")
8495       if not instance.admin_up:
8496         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8497     elif not instance.admin_up:
8498       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8499                            " not supposed to be running because no wait for"
8500                            " sync mode was requested.")
8501
8502
8503 class LUQueryInstanceData(NoHooksLU):
8504   """Query runtime instance data.
8505
8506   """
8507   _OP_PARAMS = [
8508     ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8509     ("static", False, _TBool),
8510     ]
8511   REQ_BGL = False
8512
8513   def ExpandNames(self):
8514     self.needed_locks = {}
8515     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8516
8517     if self.op.instances:
8518       self.wanted_names = []
8519       for name in self.op.instances:
8520         full_name = _ExpandInstanceName(self.cfg, name)
8521         self.wanted_names.append(full_name)
8522       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8523     else:
8524       self.wanted_names = None
8525       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8526
8527     self.needed_locks[locking.LEVEL_NODE] = []
8528     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8529
8530   def DeclareLocks(self, level):
8531     if level == locking.LEVEL_NODE:
8532       self._LockInstancesNodes()
8533
8534   def CheckPrereq(self):
8535     """Check prerequisites.
8536
8537     This only checks the optional instance list against the existing names.
8538
8539     """
8540     if self.wanted_names is None:
8541       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8542
8543     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8544                              in self.wanted_names]
8545
8546   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8547     """Returns the status of a block device
8548
8549     """
8550     if self.op.static or not node:
8551       return None
8552
8553     self.cfg.SetDiskID(dev, node)
8554
8555     result = self.rpc.call_blockdev_find(node, dev)
8556     if result.offline:
8557       return None
8558
8559     result.Raise("Can't compute disk status for %s" % instance_name)
8560
8561     status = result.payload
8562     if status is None:
8563       return None
8564
8565     return (status.dev_path, status.major, status.minor,
8566             status.sync_percent, status.estimated_time,
8567             status.is_degraded, status.ldisk_status)
8568
8569   def _ComputeDiskStatus(self, instance, snode, dev):
8570     """Compute block device status.
8571
8572     """
8573     if dev.dev_type in constants.LDS_DRBD:
8574       # we change the snode then (otherwise we use the one passed in)
8575       if dev.logical_id[0] == instance.primary_node:
8576         snode = dev.logical_id[1]
8577       else:
8578         snode = dev.logical_id[0]
8579
8580     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8581                                               instance.name, dev)
8582     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8583
8584     if dev.children:
8585       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8586                       for child in dev.children]
8587     else:
8588       dev_children = []
8589
8590     data = {
8591       "iv_name": dev.iv_name,
8592       "dev_type": dev.dev_type,
8593       "logical_id": dev.logical_id,
8594       "physical_id": dev.physical_id,
8595       "pstatus": dev_pstatus,
8596       "sstatus": dev_sstatus,
8597       "children": dev_children,
8598       "mode": dev.mode,
8599       "size": dev.size,
8600       }
8601
8602     return data
8603
8604   def Exec(self, feedback_fn):
8605     """Gather and return data"""
8606     result = {}
8607
8608     cluster = self.cfg.GetClusterInfo()
8609
8610     for instance in self.wanted_instances:
8611       if not self.op.static:
8612         remote_info = self.rpc.call_instance_info(instance.primary_node,
8613                                                   instance.name,
8614                                                   instance.hypervisor)
8615         remote_info.Raise("Error checking node %s" % instance.primary_node)
8616         remote_info = remote_info.payload
8617         if remote_info and "state" in remote_info:
8618           remote_state = "up"
8619         else:
8620           remote_state = "down"
8621       else:
8622         remote_state = None
8623       if instance.admin_up:
8624         config_state = "up"
8625       else:
8626         config_state = "down"
8627
8628       disks = [self._ComputeDiskStatus(instance, None, device)
8629                for device in instance.disks]
8630
8631       idict = {
8632         "name": instance.name,
8633         "config_state": config_state,
8634         "run_state": remote_state,
8635         "pnode": instance.primary_node,
8636         "snodes": instance.secondary_nodes,
8637         "os": instance.os,
8638         # this happens to be the same format used for hooks
8639         "nics": _NICListToTuple(self, instance.nics),
8640         "disk_template": instance.disk_template,
8641         "disks": disks,
8642         "hypervisor": instance.hypervisor,
8643         "network_port": instance.network_port,
8644         "hv_instance": instance.hvparams,
8645         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8646         "be_instance": instance.beparams,
8647         "be_actual": cluster.FillBE(instance),
8648         "os_instance": instance.osparams,
8649         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8650         "serial_no": instance.serial_no,
8651         "mtime": instance.mtime,
8652         "ctime": instance.ctime,
8653         "uuid": instance.uuid,
8654         }
8655
8656       result[instance.name] = idict
8657
8658     return result
8659
8660
8661 class LUSetInstanceParams(LogicalUnit):
8662   """Modifies an instances's parameters.
8663
8664   """
8665   HPATH = "instance-modify"
8666   HTYPE = constants.HTYPE_INSTANCE
8667   _OP_PARAMS = [
8668     _PInstanceName,
8669     ("nics", _EmptyList, _TList),
8670     ("disks", _EmptyList, _TList),
8671     ("beparams", _EmptyDict, _TDict),
8672     ("hvparams", _EmptyDict, _TDict),
8673     ("disk_template", None, _TMaybeString),
8674     ("remote_node", None, _TMaybeString),
8675     ("os_name", None, _TMaybeString),
8676     ("force_variant", False, _TBool),
8677     ("osparams", None, _TOr(_TDict, _TNone)),
8678     _PForce,
8679     ]
8680   REQ_BGL = False
8681
8682   def CheckArguments(self):
8683     if not (self.op.nics or self.op.disks or self.op.disk_template or
8684             self.op.hvparams or self.op.beparams or self.op.os_name):
8685       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8686
8687     if self.op.hvparams:
8688       _CheckGlobalHvParams(self.op.hvparams)
8689
8690     # Disk validation
8691     disk_addremove = 0
8692     for disk_op, disk_dict in self.op.disks:
8693       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8694       if disk_op == constants.DDM_REMOVE:
8695         disk_addremove += 1
8696         continue
8697       elif disk_op == constants.DDM_ADD:
8698         disk_addremove += 1
8699       else:
8700         if not isinstance(disk_op, int):
8701           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8702         if not isinstance(disk_dict, dict):
8703           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8704           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8705
8706       if disk_op == constants.DDM_ADD:
8707         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8708         if mode not in constants.DISK_ACCESS_SET:
8709           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8710                                      errors.ECODE_INVAL)
8711         size = disk_dict.get('size', None)
8712         if size is None:
8713           raise errors.OpPrereqError("Required disk parameter size missing",
8714                                      errors.ECODE_INVAL)
8715         try:
8716           size = int(size)
8717         except (TypeError, ValueError), err:
8718           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8719                                      str(err), errors.ECODE_INVAL)
8720         disk_dict['size'] = size
8721       else:
8722         # modification of disk
8723         if 'size' in disk_dict:
8724           raise errors.OpPrereqError("Disk size change not possible, use"
8725                                      " grow-disk", errors.ECODE_INVAL)
8726
8727     if disk_addremove > 1:
8728       raise errors.OpPrereqError("Only one disk add or remove operation"
8729                                  " supported at a time", errors.ECODE_INVAL)
8730
8731     if self.op.disks and self.op.disk_template is not None:
8732       raise errors.OpPrereqError("Disk template conversion and other disk"
8733                                  " changes not supported at the same time",
8734                                  errors.ECODE_INVAL)
8735
8736     if self.op.disk_template:
8737       _CheckDiskTemplate(self.op.disk_template)
8738       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8739           self.op.remote_node is None):
8740         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8741                                    " one requires specifying a secondary node",
8742                                    errors.ECODE_INVAL)
8743
8744     # NIC validation
8745     nic_addremove = 0
8746     for nic_op, nic_dict in self.op.nics:
8747       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8748       if nic_op == constants.DDM_REMOVE:
8749         nic_addremove += 1
8750         continue
8751       elif nic_op == constants.DDM_ADD:
8752         nic_addremove += 1
8753       else:
8754         if not isinstance(nic_op, int):
8755           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8756         if not isinstance(nic_dict, dict):
8757           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8758           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8759
8760       # nic_dict should be a dict
8761       nic_ip = nic_dict.get('ip', None)
8762       if nic_ip is not None:
8763         if nic_ip.lower() == constants.VALUE_NONE:
8764           nic_dict['ip'] = None
8765         else:
8766           if not netutils.IPAddress.IsValid(nic_ip):
8767             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8768                                        errors.ECODE_INVAL)
8769
8770       nic_bridge = nic_dict.get('bridge', None)
8771       nic_link = nic_dict.get('link', None)
8772       if nic_bridge and nic_link:
8773         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8774                                    " at the same time", errors.ECODE_INVAL)
8775       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8776         nic_dict['bridge'] = None
8777       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8778         nic_dict['link'] = None
8779
8780       if nic_op == constants.DDM_ADD:
8781         nic_mac = nic_dict.get('mac', None)
8782         if nic_mac is None:
8783           nic_dict['mac'] = constants.VALUE_AUTO
8784
8785       if 'mac' in nic_dict:
8786         nic_mac = nic_dict['mac']
8787         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8788           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8789
8790         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8791           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8792                                      " modifying an existing nic",
8793                                      errors.ECODE_INVAL)
8794
8795     if nic_addremove > 1:
8796       raise errors.OpPrereqError("Only one NIC add or remove operation"
8797                                  " supported at a time", errors.ECODE_INVAL)
8798
8799   def ExpandNames(self):
8800     self._ExpandAndLockInstance()
8801     self.needed_locks[locking.LEVEL_NODE] = []
8802     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8803
8804   def DeclareLocks(self, level):
8805     if level == locking.LEVEL_NODE:
8806       self._LockInstancesNodes()
8807       if self.op.disk_template and self.op.remote_node:
8808         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8809         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8810
8811   def BuildHooksEnv(self):
8812     """Build hooks env.
8813
8814     This runs on the master, primary and secondaries.
8815
8816     """
8817     args = dict()
8818     if constants.BE_MEMORY in self.be_new:
8819       args['memory'] = self.be_new[constants.BE_MEMORY]
8820     if constants.BE_VCPUS in self.be_new:
8821       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8822     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8823     # information at all.
8824     if self.op.nics:
8825       args['nics'] = []
8826       nic_override = dict(self.op.nics)
8827       for idx, nic in enumerate(self.instance.nics):
8828         if idx in nic_override:
8829           this_nic_override = nic_override[idx]
8830         else:
8831           this_nic_override = {}
8832         if 'ip' in this_nic_override:
8833           ip = this_nic_override['ip']
8834         else:
8835           ip = nic.ip
8836         if 'mac' in this_nic_override:
8837           mac = this_nic_override['mac']
8838         else:
8839           mac = nic.mac
8840         if idx in self.nic_pnew:
8841           nicparams = self.nic_pnew[idx]
8842         else:
8843           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8844         mode = nicparams[constants.NIC_MODE]
8845         link = nicparams[constants.NIC_LINK]
8846         args['nics'].append((ip, mac, mode, link))
8847       if constants.DDM_ADD in nic_override:
8848         ip = nic_override[constants.DDM_ADD].get('ip', None)
8849         mac = nic_override[constants.DDM_ADD]['mac']
8850         nicparams = self.nic_pnew[constants.DDM_ADD]
8851         mode = nicparams[constants.NIC_MODE]
8852         link = nicparams[constants.NIC_LINK]
8853         args['nics'].append((ip, mac, mode, link))
8854       elif constants.DDM_REMOVE in nic_override:
8855         del args['nics'][-1]
8856
8857     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8858     if self.op.disk_template:
8859       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8860     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8861     return env, nl, nl
8862
8863   def CheckPrereq(self):
8864     """Check prerequisites.
8865
8866     This only checks the instance list against the existing names.
8867
8868     """
8869     # checking the new params on the primary/secondary nodes
8870
8871     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8872     cluster = self.cluster = self.cfg.GetClusterInfo()
8873     assert self.instance is not None, \
8874       "Cannot retrieve locked instance %s" % self.op.instance_name
8875     pnode = instance.primary_node
8876     nodelist = list(instance.all_nodes)
8877
8878     # OS change
8879     if self.op.os_name and not self.op.force:
8880       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8881                       self.op.force_variant)
8882       instance_os = self.op.os_name
8883     else:
8884       instance_os = instance.os
8885
8886     if self.op.disk_template:
8887       if instance.disk_template == self.op.disk_template:
8888         raise errors.OpPrereqError("Instance already has disk template %s" %
8889                                    instance.disk_template, errors.ECODE_INVAL)
8890
8891       if (instance.disk_template,
8892           self.op.disk_template) not in self._DISK_CONVERSIONS:
8893         raise errors.OpPrereqError("Unsupported disk template conversion from"
8894                                    " %s to %s" % (instance.disk_template,
8895                                                   self.op.disk_template),
8896                                    errors.ECODE_INVAL)
8897       _CheckInstanceDown(self, instance, "cannot change disk template")
8898       if self.op.disk_template in constants.DTS_NET_MIRROR:
8899         if self.op.remote_node == pnode:
8900           raise errors.OpPrereqError("Given new secondary node %s is the same"
8901                                      " as the primary node of the instance" %
8902                                      self.op.remote_node, errors.ECODE_STATE)
8903         _CheckNodeOnline(self, self.op.remote_node)
8904         _CheckNodeNotDrained(self, self.op.remote_node)
8905         disks = [{"size": d.size} for d in instance.disks]
8906         required = _ComputeDiskSize(self.op.disk_template, disks)
8907         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8908
8909     # hvparams processing
8910     if self.op.hvparams:
8911       hv_type = instance.hypervisor
8912       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8913       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8914       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8915
8916       # local check
8917       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8918       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8919       self.hv_new = hv_new # the new actual values
8920       self.hv_inst = i_hvdict # the new dict (without defaults)
8921     else:
8922       self.hv_new = self.hv_inst = {}
8923
8924     # beparams processing
8925     if self.op.beparams:
8926       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8927                                    use_none=True)
8928       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8929       be_new = cluster.SimpleFillBE(i_bedict)
8930       self.be_new = be_new # the new actual values
8931       self.be_inst = i_bedict # the new dict (without defaults)
8932     else:
8933       self.be_new = self.be_inst = {}
8934
8935     # osparams processing
8936     if self.op.osparams:
8937       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8938       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8939       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8940       self.os_inst = i_osdict # the new dict (without defaults)
8941     else:
8942       self.os_new = self.os_inst = {}
8943
8944     self.warn = []
8945
8946     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8947       mem_check_list = [pnode]
8948       if be_new[constants.BE_AUTO_BALANCE]:
8949         # either we changed auto_balance to yes or it was from before
8950         mem_check_list.extend(instance.secondary_nodes)
8951       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8952                                                   instance.hypervisor)
8953       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8954                                          instance.hypervisor)
8955       pninfo = nodeinfo[pnode]
8956       msg = pninfo.fail_msg
8957       if msg:
8958         # Assume the primary node is unreachable and go ahead
8959         self.warn.append("Can't get info from primary node %s: %s" %
8960                          (pnode,  msg))
8961       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8962         self.warn.append("Node data from primary node %s doesn't contain"
8963                          " free memory information" % pnode)
8964       elif instance_info.fail_msg:
8965         self.warn.append("Can't get instance runtime information: %s" %
8966                         instance_info.fail_msg)
8967       else:
8968         if instance_info.payload:
8969           current_mem = int(instance_info.payload['memory'])
8970         else:
8971           # Assume instance not running
8972           # (there is a slight race condition here, but it's not very probable,
8973           # and we have no other way to check)
8974           current_mem = 0
8975         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8976                     pninfo.payload['memory_free'])
8977         if miss_mem > 0:
8978           raise errors.OpPrereqError("This change will prevent the instance"
8979                                      " from starting, due to %d MB of memory"
8980                                      " missing on its primary node" % miss_mem,
8981                                      errors.ECODE_NORES)
8982
8983       if be_new[constants.BE_AUTO_BALANCE]:
8984         for node, nres in nodeinfo.items():
8985           if node not in instance.secondary_nodes:
8986             continue
8987           msg = nres.fail_msg
8988           if msg:
8989             self.warn.append("Can't get info from secondary node %s: %s" %
8990                              (node, msg))
8991           elif not isinstance(nres.payload.get('memory_free', None), int):
8992             self.warn.append("Secondary node %s didn't return free"
8993                              " memory information" % node)
8994           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8995             self.warn.append("Not enough memory to failover instance to"
8996                              " secondary node %s" % node)
8997
8998     # NIC processing
8999     self.nic_pnew = {}
9000     self.nic_pinst = {}
9001     for nic_op, nic_dict in self.op.nics:
9002       if nic_op == constants.DDM_REMOVE:
9003         if not instance.nics:
9004           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9005                                      errors.ECODE_INVAL)
9006         continue
9007       if nic_op != constants.DDM_ADD:
9008         # an existing nic
9009         if not instance.nics:
9010           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9011                                      " no NICs" % nic_op,
9012                                      errors.ECODE_INVAL)
9013         if nic_op < 0 or nic_op >= len(instance.nics):
9014           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9015                                      " are 0 to %d" %
9016                                      (nic_op, len(instance.nics) - 1),
9017                                      errors.ECODE_INVAL)
9018         old_nic_params = instance.nics[nic_op].nicparams
9019         old_nic_ip = instance.nics[nic_op].ip
9020       else:
9021         old_nic_params = {}
9022         old_nic_ip = None
9023
9024       update_params_dict = dict([(key, nic_dict[key])
9025                                  for key in constants.NICS_PARAMETERS
9026                                  if key in nic_dict])
9027
9028       if 'bridge' in nic_dict:
9029         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9030
9031       new_nic_params = _GetUpdatedParams(old_nic_params,
9032                                          update_params_dict)
9033       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9034       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9035       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9036       self.nic_pinst[nic_op] = new_nic_params
9037       self.nic_pnew[nic_op] = new_filled_nic_params
9038       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9039
9040       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9041         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9042         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9043         if msg:
9044           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9045           if self.op.force:
9046             self.warn.append(msg)
9047           else:
9048             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9049       if new_nic_mode == constants.NIC_MODE_ROUTED:
9050         if 'ip' in nic_dict:
9051           nic_ip = nic_dict['ip']
9052         else:
9053           nic_ip = old_nic_ip
9054         if nic_ip is None:
9055           raise errors.OpPrereqError('Cannot set the nic ip to None'
9056                                      ' on a routed nic', errors.ECODE_INVAL)
9057       if 'mac' in nic_dict:
9058         nic_mac = nic_dict['mac']
9059         if nic_mac is None:
9060           raise errors.OpPrereqError('Cannot set the nic mac to None',
9061                                      errors.ECODE_INVAL)
9062         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9063           # otherwise generate the mac
9064           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9065         else:
9066           # or validate/reserve the current one
9067           try:
9068             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9069           except errors.ReservationError:
9070             raise errors.OpPrereqError("MAC address %s already in use"
9071                                        " in cluster" % nic_mac,
9072                                        errors.ECODE_NOTUNIQUE)
9073
9074     # DISK processing
9075     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9076       raise errors.OpPrereqError("Disk operations not supported for"
9077                                  " diskless instances",
9078                                  errors.ECODE_INVAL)
9079     for disk_op, _ in self.op.disks:
9080       if disk_op == constants.DDM_REMOVE:
9081         if len(instance.disks) == 1:
9082           raise errors.OpPrereqError("Cannot remove the last disk of"
9083                                      " an instance", errors.ECODE_INVAL)
9084         _CheckInstanceDown(self, instance, "cannot remove disks")
9085
9086       if (disk_op == constants.DDM_ADD and
9087           len(instance.nics) >= constants.MAX_DISKS):
9088         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9089                                    " add more" % constants.MAX_DISKS,
9090                                    errors.ECODE_STATE)
9091       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9092         # an existing disk
9093         if disk_op < 0 or disk_op >= len(instance.disks):
9094           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9095                                      " are 0 to %d" %
9096                                      (disk_op, len(instance.disks)),
9097                                      errors.ECODE_INVAL)
9098
9099     return
9100
9101   def _ConvertPlainToDrbd(self, feedback_fn):
9102     """Converts an instance from plain to drbd.
9103
9104     """
9105     feedback_fn("Converting template to drbd")
9106     instance = self.instance
9107     pnode = instance.primary_node
9108     snode = self.op.remote_node
9109
9110     # create a fake disk info for _GenerateDiskTemplate
9111     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9112     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9113                                       instance.name, pnode, [snode],
9114                                       disk_info, None, None, 0)
9115     info = _GetInstanceInfoText(instance)
9116     feedback_fn("Creating aditional volumes...")
9117     # first, create the missing data and meta devices
9118     for disk in new_disks:
9119       # unfortunately this is... not too nice
9120       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9121                             info, True)
9122       for child in disk.children:
9123         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9124     # at this stage, all new LVs have been created, we can rename the
9125     # old ones
9126     feedback_fn("Renaming original volumes...")
9127     rename_list = [(o, n.children[0].logical_id)
9128                    for (o, n) in zip(instance.disks, new_disks)]
9129     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9130     result.Raise("Failed to rename original LVs")
9131
9132     feedback_fn("Initializing DRBD devices...")
9133     # all child devices are in place, we can now create the DRBD devices
9134     for disk in new_disks:
9135       for node in [pnode, snode]:
9136         f_create = node == pnode
9137         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9138
9139     # at this point, the instance has been modified
9140     instance.disk_template = constants.DT_DRBD8
9141     instance.disks = new_disks
9142     self.cfg.Update(instance, feedback_fn)
9143
9144     # disks are created, waiting for sync
9145     disk_abort = not _WaitForSync(self, instance)
9146     if disk_abort:
9147       raise errors.OpExecError("There are some degraded disks for"
9148                                " this instance, please cleanup manually")
9149
9150   def _ConvertDrbdToPlain(self, feedback_fn):
9151     """Converts an instance from drbd to plain.
9152
9153     """
9154     instance = self.instance
9155     assert len(instance.secondary_nodes) == 1
9156     pnode = instance.primary_node
9157     snode = instance.secondary_nodes[0]
9158     feedback_fn("Converting template to plain")
9159
9160     old_disks = instance.disks
9161     new_disks = [d.children[0] for d in old_disks]
9162
9163     # copy over size and mode
9164     for parent, child in zip(old_disks, new_disks):
9165       child.size = parent.size
9166       child.mode = parent.mode
9167
9168     # update instance structure
9169     instance.disks = new_disks
9170     instance.disk_template = constants.DT_PLAIN
9171     self.cfg.Update(instance, feedback_fn)
9172
9173     feedback_fn("Removing volumes on the secondary node...")
9174     for disk in old_disks:
9175       self.cfg.SetDiskID(disk, snode)
9176       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9177       if msg:
9178         self.LogWarning("Could not remove block device %s on node %s,"
9179                         " continuing anyway: %s", disk.iv_name, snode, msg)
9180
9181     feedback_fn("Removing unneeded volumes on the primary node...")
9182     for idx, disk in enumerate(old_disks):
9183       meta = disk.children[1]
9184       self.cfg.SetDiskID(meta, pnode)
9185       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9186       if msg:
9187         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9188                         " continuing anyway: %s", idx, pnode, msg)
9189
9190
9191   def Exec(self, feedback_fn):
9192     """Modifies an instance.
9193
9194     All parameters take effect only at the next restart of the instance.
9195
9196     """
9197     # Process here the warnings from CheckPrereq, as we don't have a
9198     # feedback_fn there.
9199     for warn in self.warn:
9200       feedback_fn("WARNING: %s" % warn)
9201
9202     result = []
9203     instance = self.instance
9204     # disk changes
9205     for disk_op, disk_dict in self.op.disks:
9206       if disk_op == constants.DDM_REMOVE:
9207         # remove the last disk
9208         device = instance.disks.pop()
9209         device_idx = len(instance.disks)
9210         for node, disk in device.ComputeNodeTree(instance.primary_node):
9211           self.cfg.SetDiskID(disk, node)
9212           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9213           if msg:
9214             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9215                             " continuing anyway", device_idx, node, msg)
9216         result.append(("disk/%d" % device_idx, "remove"))
9217       elif disk_op == constants.DDM_ADD:
9218         # add a new disk
9219         if instance.disk_template == constants.DT_FILE:
9220           file_driver, file_path = instance.disks[0].logical_id
9221           file_path = os.path.dirname(file_path)
9222         else:
9223           file_driver = file_path = None
9224         disk_idx_base = len(instance.disks)
9225         new_disk = _GenerateDiskTemplate(self,
9226                                          instance.disk_template,
9227                                          instance.name, instance.primary_node,
9228                                          instance.secondary_nodes,
9229                                          [disk_dict],
9230                                          file_path,
9231                                          file_driver,
9232                                          disk_idx_base)[0]
9233         instance.disks.append(new_disk)
9234         info = _GetInstanceInfoText(instance)
9235
9236         logging.info("Creating volume %s for instance %s",
9237                      new_disk.iv_name, instance.name)
9238         # Note: this needs to be kept in sync with _CreateDisks
9239         #HARDCODE
9240         for node in instance.all_nodes:
9241           f_create = node == instance.primary_node
9242           try:
9243             _CreateBlockDev(self, node, instance, new_disk,
9244                             f_create, info, f_create)
9245           except errors.OpExecError, err:
9246             self.LogWarning("Failed to create volume %s (%s) on"
9247                             " node %s: %s",
9248                             new_disk.iv_name, new_disk, node, err)
9249         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9250                        (new_disk.size, new_disk.mode)))
9251       else:
9252         # change a given disk
9253         instance.disks[disk_op].mode = disk_dict['mode']
9254         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9255
9256     if self.op.disk_template:
9257       r_shut = _ShutdownInstanceDisks(self, instance)
9258       if not r_shut:
9259         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9260                                  " proceed with disk template conversion")
9261       mode = (instance.disk_template, self.op.disk_template)
9262       try:
9263         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9264       except:
9265         self.cfg.ReleaseDRBDMinors(instance.name)
9266         raise
9267       result.append(("disk_template", self.op.disk_template))
9268
9269     # NIC changes
9270     for nic_op, nic_dict in self.op.nics:
9271       if nic_op == constants.DDM_REMOVE:
9272         # remove the last nic
9273         del instance.nics[-1]
9274         result.append(("nic.%d" % len(instance.nics), "remove"))
9275       elif nic_op == constants.DDM_ADD:
9276         # mac and bridge should be set, by now
9277         mac = nic_dict['mac']
9278         ip = nic_dict.get('ip', None)
9279         nicparams = self.nic_pinst[constants.DDM_ADD]
9280         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9281         instance.nics.append(new_nic)
9282         result.append(("nic.%d" % (len(instance.nics) - 1),
9283                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9284                        (new_nic.mac, new_nic.ip,
9285                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9286                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9287                        )))
9288       else:
9289         for key in 'mac', 'ip':
9290           if key in nic_dict:
9291             setattr(instance.nics[nic_op], key, nic_dict[key])
9292         if nic_op in self.nic_pinst:
9293           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9294         for key, val in nic_dict.iteritems():
9295           result.append(("nic.%s/%d" % (key, nic_op), val))
9296
9297     # hvparams changes
9298     if self.op.hvparams:
9299       instance.hvparams = self.hv_inst
9300       for key, val in self.op.hvparams.iteritems():
9301         result.append(("hv/%s" % key, val))
9302
9303     # beparams changes
9304     if self.op.beparams:
9305       instance.beparams = self.be_inst
9306       for key, val in self.op.beparams.iteritems():
9307         result.append(("be/%s" % key, val))
9308
9309     # OS change
9310     if self.op.os_name:
9311       instance.os = self.op.os_name
9312
9313     # osparams changes
9314     if self.op.osparams:
9315       instance.osparams = self.os_inst
9316       for key, val in self.op.osparams.iteritems():
9317         result.append(("os/%s" % key, val))
9318
9319     self.cfg.Update(instance, feedback_fn)
9320
9321     return result
9322
9323   _DISK_CONVERSIONS = {
9324     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9325     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9326     }
9327
9328
9329 class LUQueryExports(NoHooksLU):
9330   """Query the exports list
9331
9332   """
9333   _OP_PARAMS = [
9334     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9335     ("use_locking", False, _TBool),
9336     ]
9337   REQ_BGL = False
9338
9339   def ExpandNames(self):
9340     self.needed_locks = {}
9341     self.share_locks[locking.LEVEL_NODE] = 1
9342     if not self.op.nodes:
9343       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9344     else:
9345       self.needed_locks[locking.LEVEL_NODE] = \
9346         _GetWantedNodes(self, self.op.nodes)
9347
9348   def Exec(self, feedback_fn):
9349     """Compute the list of all the exported system images.
9350
9351     @rtype: dict
9352     @return: a dictionary with the structure node->(export-list)
9353         where export-list is a list of the instances exported on
9354         that node.
9355
9356     """
9357     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9358     rpcresult = self.rpc.call_export_list(self.nodes)
9359     result = {}
9360     for node in rpcresult:
9361       if rpcresult[node].fail_msg:
9362         result[node] = False
9363       else:
9364         result[node] = rpcresult[node].payload
9365
9366     return result
9367
9368
9369 class LUPrepareExport(NoHooksLU):
9370   """Prepares an instance for an export and returns useful information.
9371
9372   """
9373   _OP_PARAMS = [
9374     _PInstanceName,
9375     ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9376     ]
9377   REQ_BGL = False
9378
9379   def ExpandNames(self):
9380     self._ExpandAndLockInstance()
9381
9382   def CheckPrereq(self):
9383     """Check prerequisites.
9384
9385     """
9386     instance_name = self.op.instance_name
9387
9388     self.instance = self.cfg.GetInstanceInfo(instance_name)
9389     assert self.instance is not None, \
9390           "Cannot retrieve locked instance %s" % self.op.instance_name
9391     _CheckNodeOnline(self, self.instance.primary_node)
9392
9393     self._cds = _GetClusterDomainSecret()
9394
9395   def Exec(self, feedback_fn):
9396     """Prepares an instance for an export.
9397
9398     """
9399     instance = self.instance
9400
9401     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9402       salt = utils.GenerateSecret(8)
9403
9404       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9405       result = self.rpc.call_x509_cert_create(instance.primary_node,
9406                                               constants.RIE_CERT_VALIDITY)
9407       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9408
9409       (name, cert_pem) = result.payload
9410
9411       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9412                                              cert_pem)
9413
9414       return {
9415         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9416         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9417                           salt),
9418         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9419         }
9420
9421     return None
9422
9423
9424 class LUExportInstance(LogicalUnit):
9425   """Export an instance to an image in the cluster.
9426
9427   """
9428   HPATH = "instance-export"
9429   HTYPE = constants.HTYPE_INSTANCE
9430   _OP_PARAMS = [
9431     _PInstanceName,
9432     ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9433     ("shutdown", True, _TBool),
9434     _PShutdownTimeout,
9435     ("remove_instance", False, _TBool),
9436     ("ignore_remove_failures", False, _TBool),
9437     ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9438     ("x509_key_name", None, _TOr(_TList, _TNone)),
9439     ("destination_x509_ca", None, _TMaybeString),
9440     ]
9441   REQ_BGL = False
9442
9443   def CheckArguments(self):
9444     """Check the arguments.
9445
9446     """
9447     self.x509_key_name = self.op.x509_key_name
9448     self.dest_x509_ca_pem = self.op.destination_x509_ca
9449
9450     if self.op.remove_instance and not self.op.shutdown:
9451       raise errors.OpPrereqError("Can not remove instance without shutting it"
9452                                  " down before")
9453
9454     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9455       if not self.x509_key_name:
9456         raise errors.OpPrereqError("Missing X509 key name for encryption",
9457                                    errors.ECODE_INVAL)
9458
9459       if not self.dest_x509_ca_pem:
9460         raise errors.OpPrereqError("Missing destination X509 CA",
9461                                    errors.ECODE_INVAL)
9462
9463   def ExpandNames(self):
9464     self._ExpandAndLockInstance()
9465
9466     # Lock all nodes for local exports
9467     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9468       # FIXME: lock only instance primary and destination node
9469       #
9470       # Sad but true, for now we have do lock all nodes, as we don't know where
9471       # the previous export might be, and in this LU we search for it and
9472       # remove it from its current node. In the future we could fix this by:
9473       #  - making a tasklet to search (share-lock all), then create the
9474       #    new one, then one to remove, after
9475       #  - removing the removal operation altogether
9476       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9477
9478   def DeclareLocks(self, level):
9479     """Last minute lock declaration."""
9480     # All nodes are locked anyway, so nothing to do here.
9481
9482   def BuildHooksEnv(self):
9483     """Build hooks env.
9484
9485     This will run on the master, primary node and target node.
9486
9487     """
9488     env = {
9489       "EXPORT_MODE": self.op.mode,
9490       "EXPORT_NODE": self.op.target_node,
9491       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9492       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9493       # TODO: Generic function for boolean env variables
9494       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9495       }
9496
9497     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9498
9499     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9500
9501     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9502       nl.append(self.op.target_node)
9503
9504     return env, nl, nl
9505
9506   def CheckPrereq(self):
9507     """Check prerequisites.
9508
9509     This checks that the instance and node names are valid.
9510
9511     """
9512     instance_name = self.op.instance_name
9513
9514     self.instance = self.cfg.GetInstanceInfo(instance_name)
9515     assert self.instance is not None, \
9516           "Cannot retrieve locked instance %s" % self.op.instance_name
9517     _CheckNodeOnline(self, self.instance.primary_node)
9518
9519     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9520       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9521       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9522       assert self.dst_node is not None
9523
9524       _CheckNodeOnline(self, self.dst_node.name)
9525       _CheckNodeNotDrained(self, self.dst_node.name)
9526
9527       self._cds = None
9528       self.dest_disk_info = None
9529       self.dest_x509_ca = None
9530
9531     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9532       self.dst_node = None
9533
9534       if len(self.op.target_node) != len(self.instance.disks):
9535         raise errors.OpPrereqError(("Received destination information for %s"
9536                                     " disks, but instance %s has %s disks") %
9537                                    (len(self.op.target_node), instance_name,
9538                                     len(self.instance.disks)),
9539                                    errors.ECODE_INVAL)
9540
9541       cds = _GetClusterDomainSecret()
9542
9543       # Check X509 key name
9544       try:
9545         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9546       except (TypeError, ValueError), err:
9547         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9548
9549       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9550         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9551                                    errors.ECODE_INVAL)
9552
9553       # Load and verify CA
9554       try:
9555         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9556       except OpenSSL.crypto.Error, err:
9557         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9558                                    (err, ), errors.ECODE_INVAL)
9559
9560       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9561       if errcode is not None:
9562         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9563                                    (msg, ), errors.ECODE_INVAL)
9564
9565       self.dest_x509_ca = cert
9566
9567       # Verify target information
9568       disk_info = []
9569       for idx, disk_data in enumerate(self.op.target_node):
9570         try:
9571           (host, port, magic) = \
9572             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9573         except errors.GenericError, err:
9574           raise errors.OpPrereqError("Target info for disk %s: %s" %
9575                                      (idx, err), errors.ECODE_INVAL)
9576
9577         disk_info.append((host, port, magic))
9578
9579       assert len(disk_info) == len(self.op.target_node)
9580       self.dest_disk_info = disk_info
9581
9582     else:
9583       raise errors.ProgrammerError("Unhandled export mode %r" %
9584                                    self.op.mode)
9585
9586     # instance disk type verification
9587     # TODO: Implement export support for file-based disks
9588     for disk in self.instance.disks:
9589       if disk.dev_type == constants.LD_FILE:
9590         raise errors.OpPrereqError("Export not supported for instances with"
9591                                    " file-based disks", errors.ECODE_INVAL)
9592
9593   def _CleanupExports(self, feedback_fn):
9594     """Removes exports of current instance from all other nodes.
9595
9596     If an instance in a cluster with nodes A..D was exported to node C, its
9597     exports will be removed from the nodes A, B and D.
9598
9599     """
9600     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9601
9602     nodelist = self.cfg.GetNodeList()
9603     nodelist.remove(self.dst_node.name)
9604
9605     # on one-node clusters nodelist will be empty after the removal
9606     # if we proceed the backup would be removed because OpQueryExports
9607     # substitutes an empty list with the full cluster node list.
9608     iname = self.instance.name
9609     if nodelist:
9610       feedback_fn("Removing old exports for instance %s" % iname)
9611       exportlist = self.rpc.call_export_list(nodelist)
9612       for node in exportlist:
9613         if exportlist[node].fail_msg:
9614           continue
9615         if iname in exportlist[node].payload:
9616           msg = self.rpc.call_export_remove(node, iname).fail_msg
9617           if msg:
9618             self.LogWarning("Could not remove older export for instance %s"
9619                             " on node %s: %s", iname, node, msg)
9620
9621   def Exec(self, feedback_fn):
9622     """Export an instance to an image in the cluster.
9623
9624     """
9625     assert self.op.mode in constants.EXPORT_MODES
9626
9627     instance = self.instance
9628     src_node = instance.primary_node
9629
9630     if self.op.shutdown:
9631       # shutdown the instance, but not the disks
9632       feedback_fn("Shutting down instance %s" % instance.name)
9633       result = self.rpc.call_instance_shutdown(src_node, instance,
9634                                                self.op.shutdown_timeout)
9635       # TODO: Maybe ignore failures if ignore_remove_failures is set
9636       result.Raise("Could not shutdown instance %s on"
9637                    " node %s" % (instance.name, src_node))
9638
9639     # set the disks ID correctly since call_instance_start needs the
9640     # correct drbd minor to create the symlinks
9641     for disk in instance.disks:
9642       self.cfg.SetDiskID(disk, src_node)
9643
9644     activate_disks = (not instance.admin_up)
9645
9646     if activate_disks:
9647       # Activate the instance disks if we'exporting a stopped instance
9648       feedback_fn("Activating disks for %s" % instance.name)
9649       _StartInstanceDisks(self, instance, None)
9650
9651     try:
9652       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9653                                                      instance)
9654
9655       helper.CreateSnapshots()
9656       try:
9657         if (self.op.shutdown and instance.admin_up and
9658             not self.op.remove_instance):
9659           assert not activate_disks
9660           feedback_fn("Starting instance %s" % instance.name)
9661           result = self.rpc.call_instance_start(src_node, instance, None, None)
9662           msg = result.fail_msg
9663           if msg:
9664             feedback_fn("Failed to start instance: %s" % msg)
9665             _ShutdownInstanceDisks(self, instance)
9666             raise errors.OpExecError("Could not start instance: %s" % msg)
9667
9668         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9669           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9670         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9671           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9672           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9673
9674           (key_name, _, _) = self.x509_key_name
9675
9676           dest_ca_pem = \
9677             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9678                                             self.dest_x509_ca)
9679
9680           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9681                                                      key_name, dest_ca_pem,
9682                                                      timeouts)
9683       finally:
9684         helper.Cleanup()
9685
9686       # Check for backwards compatibility
9687       assert len(dresults) == len(instance.disks)
9688       assert compat.all(isinstance(i, bool) for i in dresults), \
9689              "Not all results are boolean: %r" % dresults
9690
9691     finally:
9692       if activate_disks:
9693         feedback_fn("Deactivating disks for %s" % instance.name)
9694         _ShutdownInstanceDisks(self, instance)
9695
9696     if not (compat.all(dresults) and fin_resu):
9697       failures = []
9698       if not fin_resu:
9699         failures.append("export finalization")
9700       if not compat.all(dresults):
9701         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9702                                if not dsk)
9703         failures.append("disk export: disk(s) %s" % fdsk)
9704
9705       raise errors.OpExecError("Export failed, errors in %s" %
9706                                utils.CommaJoin(failures))
9707
9708     # At this point, the export was successful, we can cleanup/finish
9709
9710     # Remove instance if requested
9711     if self.op.remove_instance:
9712       feedback_fn("Removing instance %s" % instance.name)
9713       _RemoveInstance(self, feedback_fn, instance,
9714                       self.op.ignore_remove_failures)
9715
9716     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9717       self._CleanupExports(feedback_fn)
9718
9719     return fin_resu, dresults
9720
9721
9722 class LURemoveExport(NoHooksLU):
9723   """Remove exports related to the named instance.
9724
9725   """
9726   _OP_PARAMS = [
9727     _PInstanceName,
9728     ]
9729   REQ_BGL = False
9730
9731   def ExpandNames(self):
9732     self.needed_locks = {}
9733     # We need all nodes to be locked in order for RemoveExport to work, but we
9734     # don't need to lock the instance itself, as nothing will happen to it (and
9735     # we can remove exports also for a removed instance)
9736     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9737
9738   def Exec(self, feedback_fn):
9739     """Remove any export.
9740
9741     """
9742     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9743     # If the instance was not found we'll try with the name that was passed in.
9744     # This will only work if it was an FQDN, though.
9745     fqdn_warn = False
9746     if not instance_name:
9747       fqdn_warn = True
9748       instance_name = self.op.instance_name
9749
9750     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9751     exportlist = self.rpc.call_export_list(locked_nodes)
9752     found = False
9753     for node in exportlist:
9754       msg = exportlist[node].fail_msg
9755       if msg:
9756         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9757         continue
9758       if instance_name in exportlist[node].payload:
9759         found = True
9760         result = self.rpc.call_export_remove(node, instance_name)
9761         msg = result.fail_msg
9762         if msg:
9763           logging.error("Could not remove export for instance %s"
9764                         " on node %s: %s", instance_name, node, msg)
9765
9766     if fqdn_warn and not found:
9767       feedback_fn("Export not found. If trying to remove an export belonging"
9768                   " to a deleted instance please use its Fully Qualified"
9769                   " Domain Name.")
9770
9771
9772 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9773   """Generic tags LU.
9774
9775   This is an abstract class which is the parent of all the other tags LUs.
9776
9777   """
9778
9779   def ExpandNames(self):
9780     self.needed_locks = {}
9781     if self.op.kind == constants.TAG_NODE:
9782       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9783       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9784     elif self.op.kind == constants.TAG_INSTANCE:
9785       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9786       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9787
9788   def CheckPrereq(self):
9789     """Check prerequisites.
9790
9791     """
9792     if self.op.kind == constants.TAG_CLUSTER:
9793       self.target = self.cfg.GetClusterInfo()
9794     elif self.op.kind == constants.TAG_NODE:
9795       self.target = self.cfg.GetNodeInfo(self.op.name)
9796     elif self.op.kind == constants.TAG_INSTANCE:
9797       self.target = self.cfg.GetInstanceInfo(self.op.name)
9798     else:
9799       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9800                                  str(self.op.kind), errors.ECODE_INVAL)
9801
9802
9803 class LUGetTags(TagsLU):
9804   """Returns the tags of a given object.
9805
9806   """
9807   _OP_PARAMS = [
9808     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9809     # Name is only meaningful for nodes and instances
9810     ("name", _NoDefault, _TMaybeString),
9811     ]
9812   REQ_BGL = False
9813
9814   def Exec(self, feedback_fn):
9815     """Returns the tag list.
9816
9817     """
9818     return list(self.target.GetTags())
9819
9820
9821 class LUSearchTags(NoHooksLU):
9822   """Searches the tags for a given pattern.
9823
9824   """
9825   _OP_PARAMS = [
9826     ("pattern", _NoDefault, _TNonEmptyString),
9827     ]
9828   REQ_BGL = False
9829
9830   def ExpandNames(self):
9831     self.needed_locks = {}
9832
9833   def CheckPrereq(self):
9834     """Check prerequisites.
9835
9836     This checks the pattern passed for validity by compiling it.
9837
9838     """
9839     try:
9840       self.re = re.compile(self.op.pattern)
9841     except re.error, err:
9842       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9843                                  (self.op.pattern, err), errors.ECODE_INVAL)
9844
9845   def Exec(self, feedback_fn):
9846     """Returns the tag list.
9847
9848     """
9849     cfg = self.cfg
9850     tgts = [("/cluster", cfg.GetClusterInfo())]
9851     ilist = cfg.GetAllInstancesInfo().values()
9852     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9853     nlist = cfg.GetAllNodesInfo().values()
9854     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9855     results = []
9856     for path, target in tgts:
9857       for tag in target.GetTags():
9858         if self.re.search(tag):
9859           results.append((path, tag))
9860     return results
9861
9862
9863 class LUAddTags(TagsLU):
9864   """Sets a tag on a given object.
9865
9866   """
9867   _OP_PARAMS = [
9868     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9869     # Name is only meaningful for nodes and instances
9870     ("name", _NoDefault, _TMaybeString),
9871     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9872     ]
9873   REQ_BGL = False
9874
9875   def CheckPrereq(self):
9876     """Check prerequisites.
9877
9878     This checks the type and length of the tag name and value.
9879
9880     """
9881     TagsLU.CheckPrereq(self)
9882     for tag in self.op.tags:
9883       objects.TaggableObject.ValidateTag(tag)
9884
9885   def Exec(self, feedback_fn):
9886     """Sets the tag.
9887
9888     """
9889     try:
9890       for tag in self.op.tags:
9891         self.target.AddTag(tag)
9892     except errors.TagError, err:
9893       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9894     self.cfg.Update(self.target, feedback_fn)
9895
9896
9897 class LUDelTags(TagsLU):
9898   """Delete a list of tags from a given object.
9899
9900   """
9901   _OP_PARAMS = [
9902     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9903     # Name is only meaningful for nodes and instances
9904     ("name", _NoDefault, _TMaybeString),
9905     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9906     ]
9907   REQ_BGL = False
9908
9909   def CheckPrereq(self):
9910     """Check prerequisites.
9911
9912     This checks that we have the given tag.
9913
9914     """
9915     TagsLU.CheckPrereq(self)
9916     for tag in self.op.tags:
9917       objects.TaggableObject.ValidateTag(tag)
9918     del_tags = frozenset(self.op.tags)
9919     cur_tags = self.target.GetTags()
9920     if not del_tags <= cur_tags:
9921       diff_tags = del_tags - cur_tags
9922       diff_names = ["'%s'" % tag for tag in diff_tags]
9923       diff_names.sort()
9924       raise errors.OpPrereqError("Tag(s) %s not found" %
9925                                  (",".join(diff_names)), errors.ECODE_NOENT)
9926
9927   def Exec(self, feedback_fn):
9928     """Remove the tag from the object.
9929
9930     """
9931     for tag in self.op.tags:
9932       self.target.RemoveTag(tag)
9933     self.cfg.Update(self.target, feedback_fn)
9934
9935
9936 class LUTestDelay(NoHooksLU):
9937   """Sleep for a specified amount of time.
9938
9939   This LU sleeps on the master and/or nodes for a specified amount of
9940   time.
9941
9942   """
9943   _OP_PARAMS = [
9944     ("duration", _NoDefault, _TFloat),
9945     ("on_master", True, _TBool),
9946     ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9947     ("repeat", 0, _TPositiveInt)
9948     ]
9949   REQ_BGL = False
9950
9951   def ExpandNames(self):
9952     """Expand names and set required locks.
9953
9954     This expands the node list, if any.
9955
9956     """
9957     self.needed_locks = {}
9958     if self.op.on_nodes:
9959       # _GetWantedNodes can be used here, but is not always appropriate to use
9960       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9961       # more information.
9962       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9963       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9964
9965   def _TestDelay(self):
9966     """Do the actual sleep.
9967
9968     """
9969     if self.op.on_master:
9970       if not utils.TestDelay(self.op.duration):
9971         raise errors.OpExecError("Error during master delay test")
9972     if self.op.on_nodes:
9973       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9974       for node, node_result in result.items():
9975         node_result.Raise("Failure during rpc call to node %s" % node)
9976
9977   def Exec(self, feedback_fn):
9978     """Execute the test delay opcode, with the wanted repetitions.
9979
9980     """
9981     if self.op.repeat == 0:
9982       self._TestDelay()
9983     else:
9984       top_value = self.op.repeat - 1
9985       for i in range(self.op.repeat):
9986         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9987         self._TestDelay()
9988
9989
9990 class LUTestJobqueue(NoHooksLU):
9991   """Utility LU to test some aspects of the job queue.
9992
9993   """
9994   _OP_PARAMS = [
9995     ("notify_waitlock", False, _TBool),
9996     ("notify_exec", False, _TBool),
9997     ("log_messages", _EmptyList, _TListOf(_TString)),
9998     ("fail", False, _TBool),
9999     ]
10000   REQ_BGL = False
10001
10002   # Must be lower than default timeout for WaitForJobChange to see whether it
10003   # notices changed jobs
10004   _CLIENT_CONNECT_TIMEOUT = 20.0
10005   _CLIENT_CONFIRM_TIMEOUT = 60.0
10006
10007   @classmethod
10008   def _NotifyUsingSocket(cls, cb, errcls):
10009     """Opens a Unix socket and waits for another program to connect.
10010
10011     @type cb: callable
10012     @param cb: Callback to send socket name to client
10013     @type errcls: class
10014     @param errcls: Exception class to use for errors
10015
10016     """
10017     # Using a temporary directory as there's no easy way to create temporary
10018     # sockets without writing a custom loop around tempfile.mktemp and
10019     # socket.bind
10020     tmpdir = tempfile.mkdtemp()
10021     try:
10022       tmpsock = utils.PathJoin(tmpdir, "sock")
10023
10024       logging.debug("Creating temporary socket at %s", tmpsock)
10025       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10026       try:
10027         sock.bind(tmpsock)
10028         sock.listen(1)
10029
10030         # Send details to client
10031         cb(tmpsock)
10032
10033         # Wait for client to connect before continuing
10034         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10035         try:
10036           (conn, _) = sock.accept()
10037         except socket.error, err:
10038           raise errcls("Client didn't connect in time (%s)" % err)
10039       finally:
10040         sock.close()
10041     finally:
10042       # Remove as soon as client is connected
10043       shutil.rmtree(tmpdir)
10044
10045     # Wait for client to close
10046     try:
10047       try:
10048         # pylint: disable-msg=E1101
10049         # Instance of '_socketobject' has no ... member
10050         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10051         conn.recv(1)
10052       except socket.error, err:
10053         raise errcls("Client failed to confirm notification (%s)" % err)
10054     finally:
10055       conn.close()
10056
10057   def _SendNotification(self, test, arg, sockname):
10058     """Sends a notification to the client.
10059
10060     @type test: string
10061     @param test: Test name
10062     @param arg: Test argument (depends on test)
10063     @type sockname: string
10064     @param sockname: Socket path
10065
10066     """
10067     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10068
10069   def _Notify(self, prereq, test, arg):
10070     """Notifies the client of a test.
10071
10072     @type prereq: bool
10073     @param prereq: Whether this is a prereq-phase test
10074     @type test: string
10075     @param test: Test name
10076     @param arg: Test argument (depends on test)
10077
10078     """
10079     if prereq:
10080       errcls = errors.OpPrereqError
10081     else:
10082       errcls = errors.OpExecError
10083
10084     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10085                                                   test, arg),
10086                                    errcls)
10087
10088   def CheckArguments(self):
10089     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10090     self.expandnames_calls = 0
10091
10092   def ExpandNames(self):
10093     checkargs_calls = getattr(self, "checkargs_calls", 0)
10094     if checkargs_calls < 1:
10095       raise errors.ProgrammerError("CheckArguments was not called")
10096
10097     self.expandnames_calls += 1
10098
10099     if self.op.notify_waitlock:
10100       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10101
10102     self.LogInfo("Expanding names")
10103
10104     # Get lock on master node (just to get a lock, not for a particular reason)
10105     self.needed_locks = {
10106       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10107       }
10108
10109   def Exec(self, feedback_fn):
10110     if self.expandnames_calls < 1:
10111       raise errors.ProgrammerError("ExpandNames was not called")
10112
10113     if self.op.notify_exec:
10114       self._Notify(False, constants.JQT_EXEC, None)
10115
10116     self.LogInfo("Executing")
10117
10118     if self.op.log_messages:
10119       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10120       for idx, msg in enumerate(self.op.log_messages):
10121         self.LogInfo("Sending log message %s", idx + 1)
10122         feedback_fn(constants.JQT_MSGPREFIX + msg)
10123         # Report how many test messages have been sent
10124         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10125
10126     if self.op.fail:
10127       raise errors.OpExecError("Opcode failure was requested")
10128
10129     return True
10130
10131
10132 class IAllocator(object):
10133   """IAllocator framework.
10134
10135   An IAllocator instance has three sets of attributes:
10136     - cfg that is needed to query the cluster
10137     - input data (all members of the _KEYS class attribute are required)
10138     - four buffer attributes (in|out_data|text), that represent the
10139       input (to the external script) in text and data structure format,
10140       and the output from it, again in two formats
10141     - the result variables from the script (success, info, nodes) for
10142       easy usage
10143
10144   """
10145   # pylint: disable-msg=R0902
10146   # lots of instance attributes
10147   _ALLO_KEYS = [
10148     "name", "mem_size", "disks", "disk_template",
10149     "os", "tags", "nics", "vcpus", "hypervisor",
10150     ]
10151   _RELO_KEYS = [
10152     "name", "relocate_from",
10153     ]
10154   _EVAC_KEYS = [
10155     "evac_nodes",
10156     ]
10157
10158   def __init__(self, cfg, rpc, mode, **kwargs):
10159     self.cfg = cfg
10160     self.rpc = rpc
10161     # init buffer variables
10162     self.in_text = self.out_text = self.in_data = self.out_data = None
10163     # init all input fields so that pylint is happy
10164     self.mode = mode
10165     self.mem_size = self.disks = self.disk_template = None
10166     self.os = self.tags = self.nics = self.vcpus = None
10167     self.hypervisor = None
10168     self.relocate_from = None
10169     self.name = None
10170     self.evac_nodes = None
10171     # computed fields
10172     self.required_nodes = None
10173     # init result fields
10174     self.success = self.info = self.result = None
10175     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10176       keyset = self._ALLO_KEYS
10177       fn = self._AddNewInstance
10178     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10179       keyset = self._RELO_KEYS
10180       fn = self._AddRelocateInstance
10181     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10182       keyset = self._EVAC_KEYS
10183       fn = self._AddEvacuateNodes
10184     else:
10185       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10186                                    " IAllocator" % self.mode)
10187     for key in kwargs:
10188       if key not in keyset:
10189         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10190                                      " IAllocator" % key)
10191       setattr(self, key, kwargs[key])
10192
10193     for key in keyset:
10194       if key not in kwargs:
10195         raise errors.ProgrammerError("Missing input parameter '%s' to"
10196                                      " IAllocator" % key)
10197     self._BuildInputData(fn)
10198
10199   def _ComputeClusterData(self):
10200     """Compute the generic allocator input data.
10201
10202     This is the data that is independent of the actual operation.
10203
10204     """
10205     cfg = self.cfg
10206     cluster_info = cfg.GetClusterInfo()
10207     # cluster data
10208     data = {
10209       "version": constants.IALLOCATOR_VERSION,
10210       "cluster_name": cfg.GetClusterName(),
10211       "cluster_tags": list(cluster_info.GetTags()),
10212       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10213       # we don't have job IDs
10214       }
10215     iinfo = cfg.GetAllInstancesInfo().values()
10216     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10217
10218     # node data
10219     node_results = {}
10220     node_list = cfg.GetNodeList()
10221
10222     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10223       hypervisor_name = self.hypervisor
10224     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10225       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10226     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10227       hypervisor_name = cluster_info.enabled_hypervisors[0]
10228
10229     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10230                                         hypervisor_name)
10231     node_iinfo = \
10232       self.rpc.call_all_instances_info(node_list,
10233                                        cluster_info.enabled_hypervisors)
10234     for nname, nresult in node_data.items():
10235       # first fill in static (config-based) values
10236       ninfo = cfg.GetNodeInfo(nname)
10237       pnr = {
10238         "tags": list(ninfo.GetTags()),
10239         "primary_ip": ninfo.primary_ip,
10240         "secondary_ip": ninfo.secondary_ip,
10241         "offline": ninfo.offline,
10242         "drained": ninfo.drained,
10243         "master_candidate": ninfo.master_candidate,
10244         }
10245
10246       if not (ninfo.offline or ninfo.drained):
10247         nresult.Raise("Can't get data for node %s" % nname)
10248         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10249                                 nname)
10250         remote_info = nresult.payload
10251
10252         for attr in ['memory_total', 'memory_free', 'memory_dom0',
10253                      'vg_size', 'vg_free', 'cpu_total']:
10254           if attr not in remote_info:
10255             raise errors.OpExecError("Node '%s' didn't return attribute"
10256                                      " '%s'" % (nname, attr))
10257           if not isinstance(remote_info[attr], int):
10258             raise errors.OpExecError("Node '%s' returned invalid value"
10259                                      " for '%s': %s" %
10260                                      (nname, attr, remote_info[attr]))
10261         # compute memory used by primary instances
10262         i_p_mem = i_p_up_mem = 0
10263         for iinfo, beinfo in i_list:
10264           if iinfo.primary_node == nname:
10265             i_p_mem += beinfo[constants.BE_MEMORY]
10266             if iinfo.name not in node_iinfo[nname].payload:
10267               i_used_mem = 0
10268             else:
10269               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10270             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10271             remote_info['memory_free'] -= max(0, i_mem_diff)
10272
10273             if iinfo.admin_up:
10274               i_p_up_mem += beinfo[constants.BE_MEMORY]
10275
10276         # compute memory used by instances
10277         pnr_dyn = {
10278           "total_memory": remote_info['memory_total'],
10279           "reserved_memory": remote_info['memory_dom0'],
10280           "free_memory": remote_info['memory_free'],
10281           "total_disk": remote_info['vg_size'],
10282           "free_disk": remote_info['vg_free'],
10283           "total_cpus": remote_info['cpu_total'],
10284           "i_pri_memory": i_p_mem,
10285           "i_pri_up_memory": i_p_up_mem,
10286           }
10287         pnr.update(pnr_dyn)
10288
10289       node_results[nname] = pnr
10290     data["nodes"] = node_results
10291
10292     # instance data
10293     instance_data = {}
10294     for iinfo, beinfo in i_list:
10295       nic_data = []
10296       for nic in iinfo.nics:
10297         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10298         nic_dict = {"mac": nic.mac,
10299                     "ip": nic.ip,
10300                     "mode": filled_params[constants.NIC_MODE],
10301                     "link": filled_params[constants.NIC_LINK],
10302                    }
10303         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10304           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10305         nic_data.append(nic_dict)
10306       pir = {
10307         "tags": list(iinfo.GetTags()),
10308         "admin_up": iinfo.admin_up,
10309         "vcpus": beinfo[constants.BE_VCPUS],
10310         "memory": beinfo[constants.BE_MEMORY],
10311         "os": iinfo.os,
10312         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10313         "nics": nic_data,
10314         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10315         "disk_template": iinfo.disk_template,
10316         "hypervisor": iinfo.hypervisor,
10317         }
10318       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10319                                                  pir["disks"])
10320       instance_data[iinfo.name] = pir
10321
10322     data["instances"] = instance_data
10323
10324     self.in_data = data
10325
10326   def _AddNewInstance(self):
10327     """Add new instance data to allocator structure.
10328
10329     This in combination with _AllocatorGetClusterData will create the
10330     correct structure needed as input for the allocator.
10331
10332     The checks for the completeness of the opcode must have already been
10333     done.
10334
10335     """
10336     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10337
10338     if self.disk_template in constants.DTS_NET_MIRROR:
10339       self.required_nodes = 2
10340     else:
10341       self.required_nodes = 1
10342     request = {
10343       "name": self.name,
10344       "disk_template": self.disk_template,
10345       "tags": self.tags,
10346       "os": self.os,
10347       "vcpus": self.vcpus,
10348       "memory": self.mem_size,
10349       "disks": self.disks,
10350       "disk_space_total": disk_space,
10351       "nics": self.nics,
10352       "required_nodes": self.required_nodes,
10353       }
10354     return request
10355
10356   def _AddRelocateInstance(self):
10357     """Add relocate instance data to allocator structure.
10358
10359     This in combination with _IAllocatorGetClusterData will create the
10360     correct structure needed as input for the allocator.
10361
10362     The checks for the completeness of the opcode must have already been
10363     done.
10364
10365     """
10366     instance = self.cfg.GetInstanceInfo(self.name)
10367     if instance is None:
10368       raise errors.ProgrammerError("Unknown instance '%s' passed to"
10369                                    " IAllocator" % self.name)
10370
10371     if instance.disk_template not in constants.DTS_NET_MIRROR:
10372       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10373                                  errors.ECODE_INVAL)
10374
10375     if len(instance.secondary_nodes) != 1:
10376       raise errors.OpPrereqError("Instance has not exactly one secondary node",
10377                                  errors.ECODE_STATE)
10378
10379     self.required_nodes = 1
10380     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10381     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10382
10383     request = {
10384       "name": self.name,
10385       "disk_space_total": disk_space,
10386       "required_nodes": self.required_nodes,
10387       "relocate_from": self.relocate_from,
10388       }
10389     return request
10390
10391   def _AddEvacuateNodes(self):
10392     """Add evacuate nodes data to allocator structure.
10393
10394     """
10395     request = {
10396       "evac_nodes": self.evac_nodes
10397       }
10398     return request
10399
10400   def _BuildInputData(self, fn):
10401     """Build input data structures.
10402
10403     """
10404     self._ComputeClusterData()
10405
10406     request = fn()
10407     request["type"] = self.mode
10408     self.in_data["request"] = request
10409
10410     self.in_text = serializer.Dump(self.in_data)
10411
10412   def Run(self, name, validate=True, call_fn=None):
10413     """Run an instance allocator and return the results.
10414
10415     """
10416     if call_fn is None:
10417       call_fn = self.rpc.call_iallocator_runner
10418
10419     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10420     result.Raise("Failure while running the iallocator script")
10421
10422     self.out_text = result.payload
10423     if validate:
10424       self._ValidateResult()
10425
10426   def _ValidateResult(self):
10427     """Process the allocator results.
10428
10429     This will process and if successful save the result in
10430     self.out_data and the other parameters.
10431
10432     """
10433     try:
10434       rdict = serializer.Load(self.out_text)
10435     except Exception, err:
10436       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10437
10438     if not isinstance(rdict, dict):
10439       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10440
10441     # TODO: remove backwards compatiblity in later versions
10442     if "nodes" in rdict and "result" not in rdict:
10443       rdict["result"] = rdict["nodes"]
10444       del rdict["nodes"]
10445
10446     for key in "success", "info", "result":
10447       if key not in rdict:
10448         raise errors.OpExecError("Can't parse iallocator results:"
10449                                  " missing key '%s'" % key)
10450       setattr(self, key, rdict[key])
10451
10452     if not isinstance(rdict["result"], list):
10453       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10454                                " is not a list")
10455     self.out_data = rdict
10456
10457
10458 class LUTestAllocator(NoHooksLU):
10459   """Run allocator tests.
10460
10461   This LU runs the allocator tests
10462
10463   """
10464   _OP_PARAMS = [
10465     ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10466     ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10467     ("name", _NoDefault, _TNonEmptyString),
10468     ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10469       _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10470                _TOr(_TNone, _TNonEmptyString))))),
10471     ("disks", _NoDefault, _TOr(_TNone, _TList)),
10472     ("hypervisor", None, _TMaybeString),
10473     ("allocator", None, _TMaybeString),
10474     ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10475     ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10476     ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10477     ("os", None, _TMaybeString),
10478     ("disk_template", None, _TMaybeString),
10479     ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10480     ]
10481
10482   def CheckPrereq(self):
10483     """Check prerequisites.
10484
10485     This checks the opcode parameters depending on the director and mode test.
10486
10487     """
10488     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10489       for attr in ["mem_size", "disks", "disk_template",
10490                    "os", "tags", "nics", "vcpus"]:
10491         if not hasattr(self.op, attr):
10492           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10493                                      attr, errors.ECODE_INVAL)
10494       iname = self.cfg.ExpandInstanceName(self.op.name)
10495       if iname is not None:
10496         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10497                                    iname, errors.ECODE_EXISTS)
10498       if not isinstance(self.op.nics, list):
10499         raise errors.OpPrereqError("Invalid parameter 'nics'",
10500                                    errors.ECODE_INVAL)
10501       if not isinstance(self.op.disks, list):
10502         raise errors.OpPrereqError("Invalid parameter 'disks'",
10503                                    errors.ECODE_INVAL)
10504       for row in self.op.disks:
10505         if (not isinstance(row, dict) or
10506             "size" not in row or
10507             not isinstance(row["size"], int) or
10508             "mode" not in row or
10509             row["mode"] not in ['r', 'w']):
10510           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10511                                      " parameter", errors.ECODE_INVAL)
10512       if self.op.hypervisor is None:
10513         self.op.hypervisor = self.cfg.GetHypervisorType()
10514     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10515       fname = _ExpandInstanceName(self.cfg, self.op.name)
10516       self.op.name = fname
10517       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10518     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10519       if not hasattr(self.op, "evac_nodes"):
10520         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10521                                    " opcode input", errors.ECODE_INVAL)
10522     else:
10523       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10524                                  self.op.mode, errors.ECODE_INVAL)
10525
10526     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10527       if self.op.allocator is None:
10528         raise errors.OpPrereqError("Missing allocator name",
10529                                    errors.ECODE_INVAL)
10530     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10531       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10532                                  self.op.direction, errors.ECODE_INVAL)
10533
10534   def Exec(self, feedback_fn):
10535     """Run the allocator test.
10536
10537     """
10538     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10539       ial = IAllocator(self.cfg, self.rpc,
10540                        mode=self.op.mode,
10541                        name=self.op.name,
10542                        mem_size=self.op.mem_size,
10543                        disks=self.op.disks,
10544                        disk_template=self.op.disk_template,
10545                        os=self.op.os,
10546                        tags=self.op.tags,
10547                        nics=self.op.nics,
10548                        vcpus=self.op.vcpus,
10549                        hypervisor=self.op.hypervisor,
10550                        )
10551     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10552       ial = IAllocator(self.cfg, self.rpc,
10553                        mode=self.op.mode,
10554                        name=self.op.name,
10555                        relocate_from=list(self.relocate_from),
10556                        )
10557     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10558       ial = IAllocator(self.cfg, self.rpc,
10559                        mode=self.op.mode,
10560                        evac_nodes=self.op.evac_nodes)
10561     else:
10562       raise errors.ProgrammerError("Uncatched mode %s in"
10563                                    " LUTestAllocator.Exec", self.op.mode)
10564
10565     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10566       result = ial.in_text
10567     else:
10568       ial.Run(self.op.allocator, validate=False)
10569       result = ial.out_text
10570     return result