code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42
  43 from ganeti import ssh
  44 from ganeti import utils
  45 from ganeti import errors
  46 from ganeti import hypervisor
  47 from ganeti import locking
  48 from ganeti import constants
  49 from ganeti import objects
  50 from ganeti import serializer
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56
  57 import ganeti.masterd.instance # pylint: disable-msg=W0611
  58
  59
  60 # Modifiable default values; need to define these here before the
  61 # actual LUs
  62
  63 def _EmptyList():
  64   """Returns an empty list.
  65
  66   """
  67   return []
  68
  69
  70 def _EmptyDict():
  71   """Returns an empty dict.
  72
  73   """
  74   return {}
  75
  76
  77 #: The without-default default value
  78 _NoDefault = object()
  79
  80
  81 #: The no-type (value to complex to check it in the type system)
  82 _NoType = object()
  83
  84
  85 # Some basic types
  86 def _TNotNone(val):
  87   """Checks if the given value is not None.
  88
  89   """
  90   return val is not None
  91
  92
  93 def _TNone(val):
  94   """Checks if the given value is None.
  95
  96   """
  97   return val is None
  98
  99
 100 def _TBool(val):
 101   """Checks if the given value is a boolean.
 102
 103   """
 104   return isinstance(val, bool)
 105
 106
 107 def _TInt(val):
 108   """Checks if the given value is an integer.
 109
 110   """
 111   return isinstance(val, int)
 112
 113
 114 def _TFloat(val):
 115   """Checks if the given value is a float.
 116
 117   """
 118   return isinstance(val, float)
 119
 120
 121 def _TString(val):
 122   """Checks if the given value is a string.
 123
 124   """
 125   return isinstance(val, basestring)
 126
 127
 128 def _TTrue(val):
 129   """Checks if a given value evaluates to a boolean True value.
 130
 131   """
 132   return bool(val)
 133
 134
 135 def _TElemOf(target_list):
 136   """Builds a function that checks if a given value is a member of a list.
 137
 138   """
 139   return lambda val: val in target_list
 140
 141
 142 # Container types
 143 def _TList(val):
 144   """Checks if the given value is a list.
 145
 146   """
 147   return isinstance(val, list)
 148
 149
 150 def _TDict(val):
 151   """Checks if the given value is a dictionary.
 152
 153   """
 154   return isinstance(val, dict)
 155
 156
 157 def _TIsLength(size):
 158   """Check is the given container is of the given size.
 159
 160   """
 161   return lambda container: len(container) == size
 162
 163
 164 # Combinator types
 165 def _TAnd(*args):
 166   """Combine multiple functions using an AND operation.
 167
 168   """
 169   def fn(val):
 170     return compat.all(t(val) for t in args)
 171   return fn
 172
 173
 174 def _TOr(*args):
 175   """Combine multiple functions using an AND operation.
 176
 177   """
 178   def fn(val):
 179     return compat.any(t(val) for t in args)
 180   return fn
 181
 182
 183 def _TMap(fn, test):
 184   """Checks that a modified version of the argument passes the given test.
 185
 186   """
 187   return lambda val: test(fn(val))
 188
 189
 190 # Type aliases
 191
 192 #: a non-empty string
 193 _TNonEmptyString = _TAnd(_TString, _TTrue)
 194
 195
 196 #: a maybe non-empty string
 197 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
 198
 199
 200 #: a maybe boolean (bool or none)
 201 _TMaybeBool = _TOr(_TBool, _TNone)
 202
 203
 204 #: a positive integer
 205 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
 206
 207 #: a strictly positive integer
 208 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
 209
 210
 211 def _TListOf(my_type):
 212   """Checks if a given value is a list with all elements of the same type.
 213
 214   """
 215   return _TAnd(_TList,
 216                lambda lst: compat.all(my_type(v) for v in lst))
 217
 218
 219 def _TDictOf(key_type, val_type):
 220   """Checks a dict type for the type of its key/values.
 221
 222   """
 223   return _TAnd(_TDict,
 224                lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
 225                                 and compat.all(val_type(v)
 226                                                for v in my_dict.values())))
 227
 228
 229 # Common opcode attributes
 230
 231 #: output fields for a query operation
 232 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
 233
 234
 235 #: the shutdown timeout
 236 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
 237                      _TPositiveInt)
 238
 239 #: the force parameter
 240 _PForce = ("force", False, _TBool)
 241
 242 #: a required instance name (for single-instance LUs)
 243 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
 244
 245
 246 #: a required node name (for single-node LUs)
 247 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
 248
 249 #: the migration type (live/non-live)
 250 _PMigrationMode = ("mode", None, _TOr(_TNone,
 251                                       _TElemOf(constants.HT_MIGRATION_MODES)))
 252
 253 #: the obsolete 'live' mode (boolean)
 254 _PMigrationLive = ("live", None, _TMaybeBool)
 255
 256
 257 # End types
 258 class LogicalUnit(object):
 259   """Logical Unit base class.
 260
 261   Subclasses must follow these rules:
 262     - implement ExpandNames
 263     - implement CheckPrereq (except when tasklets are used)
 264     - implement Exec (except when tasklets are used)
 265     - implement BuildHooksEnv
 266     - redefine HPATH and HTYPE
 267     - optionally redefine their run requirements:
 268         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 269
 270   Note that all commands require root permissions.
 271
 272   @ivar dry_run_result: the value (if any) that will be returned to the caller
 273       in dry-run mode (signalled by opcode dry_run parameter)
 274   @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
 275       they should get if not already defined, and types they must match
 276
 277   """
 278   HPATH = None
 279   HTYPE = None
 280   _OP_PARAMS = []
 281   REQ_BGL = True
 282
 283   def __init__(self, processor, op, context, rpc):
 284     """Constructor for LogicalUnit.
 285
 286     This needs to be overridden in derived classes in order to check op
 287     validity.
 288
 289     """
 290     self.proc = processor
 291     self.op = op
 292     self.cfg = context.cfg
 293     self.context = context
 294     self.rpc = rpc
 295     # Dicts used to declare locking needs to mcpu
 296     self.needed_locks = None
 297     self.acquired_locks = {}
 298     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 299     self.add_locks = {}
 300     self.remove_locks = {}
 301     # Used to force good behavior when calling helper functions
 302     self.recalculate_locks = {}
 303     self.__ssh = None
 304     # logging
 305     self.Log = processor.Log # pylint: disable-msg=C0103
 306     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 307     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 308     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 309     # support for dry-run
 310     self.dry_run_result = None
 311     # support for generic debug attribute
 312     if (not hasattr(self.op, "debug_level") or
 313         not isinstance(self.op.debug_level, int)):
 314       self.op.debug_level = 0
 315
 316     # Tasklets
 317     self.tasklets = None
 318
 319     # The new kind-of-type-system
 320     op_id = self.op.OP_ID
 321     for attr_name, aval, test in self._OP_PARAMS:
 322       if not hasattr(op, attr_name):
 323         if aval == _NoDefault:
 324           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 325                                      (op_id, attr_name), errors.ECODE_INVAL)
 326         else:
 327           if callable(aval):
 328             dval = aval()
 329           else:
 330             dval = aval
 331           setattr(self.op, attr_name, dval)
 332       attr_val = getattr(op, attr_name)
 333       if test == _NoType:
 334         # no tests here
 335         continue
 336       if not callable(test):
 337         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 338                                      " given type is not a proper type (%s)" %
 339                                      (op_id, attr_name, test))
 340       if not test(attr_val):
 341         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 342                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 343         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 344                                    (op_id, attr_name), errors.ECODE_INVAL)
 345
 346     self.CheckArguments()
 347
 348   def __GetSSH(self):
 349     """Returns the SshRunner object
 350
 351     """
 352     if not self.__ssh:
 353       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 354     return self.__ssh
 355
 356   ssh = property(fget=__GetSSH)
 357
 358   def CheckArguments(self):
 359     """Check syntactic validity for the opcode arguments.
 360
 361     This method is for doing a simple syntactic check and ensure
 362     validity of opcode parameters, without any cluster-related
 363     checks. While the same can be accomplished in ExpandNames and/or
 364     CheckPrereq, doing these separate is better because:
 365
 366       - ExpandNames is left as as purely a lock-related function
 367       - CheckPrereq is run after we have acquired locks (and possible
 368         waited for them)
 369
 370     The function is allowed to change the self.op attribute so that
 371     later methods can no longer worry about missing parameters.
 372
 373     """
 374     pass
 375
 376   def ExpandNames(self):
 377     """Expand names for this LU.
 378
 379     This method is called before starting to execute the opcode, and it should
 380     update all the parameters of the opcode to their canonical form (e.g. a
 381     short node name must be fully expanded after this method has successfully
 382     completed). This way locking, hooks, logging, ecc. can work correctly.
 383
 384     LUs which implement this method must also populate the self.needed_locks
 385     member, as a dict with lock levels as keys, and a list of needed lock names
 386     as values. Rules:
 387
 388       - use an empty dict if you don't need any lock
 389       - if you don't need any lock at a particular level omit that level
 390       - don't put anything for the BGL level
 391       - if you want all locks at a level use locking.ALL_SET as a value
 392
 393     If you need to share locks (rather than acquire them exclusively) at one
 394     level you can modify self.share_locks, setting a true value (usually 1) for
 395     that level. By default locks are not shared.
 396
 397     This function can also define a list of tasklets, which then will be
 398     executed in order instead of the usual LU-level CheckPrereq and Exec
 399     functions, if those are not defined by the LU.
 400
 401     Examples::
 402
 403       # Acquire all nodes and one instance
 404       self.needed_locks = {
 405         locking.LEVEL_NODE: locking.ALL_SET,
 406         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 407       }
 408       # Acquire just two nodes
 409       self.needed_locks = {
 410         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 411       }
 412       # Acquire no locks
 413       self.needed_locks = {} # No, you can't leave it to the default value None
 414
 415     """
 416     # The implementation of this method is mandatory only if the new LU is
 417     # concurrent, so that old LUs don't need to be changed all at the same
 418     # time.
 419     if self.REQ_BGL:
 420       self.needed_locks = {} # Exclusive LUs don't need locks.
 421     else:
 422       raise NotImplementedError
 423
 424   def DeclareLocks(self, level):
 425     """Declare LU locking needs for a level
 426
 427     While most LUs can just declare their locking needs at ExpandNames time,
 428     sometimes there's the need to calculate some locks after having acquired
 429     the ones before. This function is called just before acquiring locks at a
 430     particular level, but after acquiring the ones at lower levels, and permits
 431     such calculations. It can be used to modify self.needed_locks, and by
 432     default it does nothing.
 433
 434     This function is only called if you have something already set in
 435     self.needed_locks for the level.
 436
 437     @param level: Locking level which is going to be locked
 438     @type level: member of ganeti.locking.LEVELS
 439
 440     """
 441
 442   def CheckPrereq(self):
 443     """Check prerequisites for this LU.
 444
 445     This method should check that the prerequisites for the execution
 446     of this LU are fulfilled. It can do internode communication, but
 447     it should be idempotent - no cluster or system changes are
 448     allowed.
 449
 450     The method should raise errors.OpPrereqError in case something is
 451     not fulfilled. Its return value is ignored.
 452
 453     This method should also update all the parameters of the opcode to
 454     their canonical form if it hasn't been done by ExpandNames before.
 455
 456     """
 457     if self.tasklets is not None:
 458       for (idx, tl) in enumerate(self.tasklets):
 459         logging.debug("Checking prerequisites for tasklet %s/%s",
 460                       idx + 1, len(self.tasklets))
 461         tl.CheckPrereq()
 462     else:
 463       pass
 464
 465   def Exec(self, feedback_fn):
 466     """Execute the LU.
 467
 468     This method should implement the actual work. It should raise
 469     errors.OpExecError for failures that are somewhat dealt with in
 470     code, or expected.
 471
 472     """
 473     if self.tasklets is not None:
 474       for (idx, tl) in enumerate(self.tasklets):
 475         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 476         tl.Exec(feedback_fn)
 477     else:
 478       raise NotImplementedError
 479
 480   def BuildHooksEnv(self):
 481     """Build hooks environment for this LU.
 482
 483     This method should return a three-node tuple consisting of: a dict
 484     containing the environment that will be used for running the
 485     specific hook for this LU, a list of node names on which the hook
 486     should run before the execution, and a list of node names on which
 487     the hook should run after the execution.
 488
 489     The keys of the dict must not have 'GANETI_' prefixed as this will
 490     be handled in the hooks runner. Also note additional keys will be
 491     added by the hooks runner. If the LU doesn't define any
 492     environment, an empty dict (and not None) should be returned.
 493
 494     No nodes should be returned as an empty list (and not None).
 495
 496     Note that if the HPATH for a LU class is None, this function will
 497     not be called.
 498
 499     """
 500     raise NotImplementedError
 501
 502   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 503     """Notify the LU about the results of its hooks.
 504
 505     This method is called every time a hooks phase is executed, and notifies
 506     the Logical Unit about the hooks' result. The LU can then use it to alter
 507     its result based on the hooks.  By default the method does nothing and the
 508     previous result is passed back unchanged but any LU can define it if it
 509     wants to use the local cluster hook-scripts somehow.
 510
 511     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 512         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 513     @param hook_results: the results of the multi-node hooks rpc call
 514     @param feedback_fn: function used send feedback back to the caller
 515     @param lu_result: the previous Exec result this LU had, or None
 516         in the PRE phase
 517     @return: the new Exec result, based on the previous result
 518         and hook results
 519
 520     """
 521     # API must be kept, thus we ignore the unused argument and could
 522     # be a function warnings
 523     # pylint: disable-msg=W0613,R0201
 524     return lu_result
 525
 526   def _ExpandAndLockInstance(self):
 527     """Helper function to expand and lock an instance.
 528
 529     Many LUs that work on an instance take its name in self.op.instance_name
 530     and need to expand it and then declare the expanded name for locking. This
 531     function does it, and then updates self.op.instance_name to the expanded
 532     name. It also initializes needed_locks as a dict, if this hasn't been done
 533     before.
 534
 535     """
 536     if self.needed_locks is None:
 537       self.needed_locks = {}
 538     else:
 539       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 540         "_ExpandAndLockInstance called with instance-level locks set"
 541     self.op.instance_name = _ExpandInstanceName(self.cfg,
 542                                                 self.op.instance_name)
 543     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 544
 545   def _LockInstancesNodes(self, primary_only=False):
 546     """Helper function to declare instances' nodes for locking.
 547
 548     This function should be called after locking one or more instances to lock
 549     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 550     with all primary or secondary nodes for instances already locked and
 551     present in self.needed_locks[locking.LEVEL_INSTANCE].
 552
 553     It should be called from DeclareLocks, and for safety only works if
 554     self.recalculate_locks[locking.LEVEL_NODE] is set.
 555
 556     In the future it may grow parameters to just lock some instance's nodes, or
 557     to just lock primaries or secondary nodes, if needed.
 558
 559     If should be called in DeclareLocks in a way similar to::
 560
 561       if level == locking.LEVEL_NODE:
 562         self._LockInstancesNodes()
 563
 564     @type primary_only: boolean
 565     @param primary_only: only lock primary nodes of locked instances
 566
 567     """
 568     assert locking.LEVEL_NODE in self.recalculate_locks, \
 569       "_LockInstancesNodes helper function called with no nodes to recalculate"
 570
 571     # TODO: check if we're really been called with the instance locks held
 572
 573     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 574     # future we might want to have different behaviors depending on the value
 575     # of self.recalculate_locks[locking.LEVEL_NODE]
 576     wanted_nodes = []
 577     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 578       instance = self.context.cfg.GetInstanceInfo(instance_name)
 579       wanted_nodes.append(instance.primary_node)
 580       if not primary_only:
 581         wanted_nodes.extend(instance.secondary_nodes)
 582
 583     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 584       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 585     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 586       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 587
 588     del self.recalculate_locks[locking.LEVEL_NODE]
 589
 590
 591 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 592   """Simple LU which runs no hooks.
 593
 594   This LU is intended as a parent for other LogicalUnits which will
 595   run no hooks, in order to reduce duplicate code.
 596
 597   """
 598   HPATH = None
 599   HTYPE = None
 600
 601   def BuildHooksEnv(self):
 602     """Empty BuildHooksEnv for NoHooksLu.
 603
 604     This just raises an error.
 605
 606     """
 607     assert False, "BuildHooksEnv called for NoHooksLUs"
 608
 609
 610 class Tasklet:
 611   """Tasklet base class.
 612
 613   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 614   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 615   tasklets know nothing about locks.
 616
 617   Subclasses must follow these rules:
 618     - Implement CheckPrereq
 619     - Implement Exec
 620
 621   """
 622   def __init__(self, lu):
 623     self.lu = lu
 624
 625     # Shortcuts
 626     self.cfg = lu.cfg
 627     self.rpc = lu.rpc
 628
 629   def CheckPrereq(self):
 630     """Check prerequisites for this tasklets.
 631
 632     This method should check whether the prerequisites for the execution of
 633     this tasklet are fulfilled. It can do internode communication, but it
 634     should be idempotent - no cluster or system changes are allowed.
 635
 636     The method should raise errors.OpPrereqError in case something is not
 637     fulfilled. Its return value is ignored.
 638
 639     This method should also update all parameters to their canonical form if it
 640     hasn't been done before.
 641
 642     """
 643     pass
 644
 645   def Exec(self, feedback_fn):
 646     """Execute the tasklet.
 647
 648     This method should implement the actual work. It should raise
 649     errors.OpExecError for failures that are somewhat dealt with in code, or
 650     expected.
 651
 652     """
 653     raise NotImplementedError
 654
 655
 656 def _GetWantedNodes(lu, nodes):
 657   """Returns list of checked and expanded node names.
 658
 659   @type lu: L{LogicalUnit}
 660   @param lu: the logical unit on whose behalf we execute
 661   @type nodes: list
 662   @param nodes: list of node names or None for all nodes
 663   @rtype: list
 664   @return: the list of nodes, sorted
 665   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 666
 667   """
 668   if not nodes:
 669     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 670       " non-empty list of nodes whose name is to be expanded.")
 671
 672   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 673   return utils.NiceSort(wanted)
 674
 675
 676 def _GetWantedInstances(lu, instances):
 677   """Returns list of checked and expanded instance names.
 678
 679   @type lu: L{LogicalUnit}
 680   @param lu: the logical unit on whose behalf we execute
 681   @type instances: list
 682   @param instances: list of instance names or None for all instances
 683   @rtype: list
 684   @return: the list of instances, sorted
 685   @raise errors.OpPrereqError: if the instances parameter is wrong type
 686   @raise errors.OpPrereqError: if any of the passed instances is not found
 687
 688   """
 689   if instances:
 690     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 691   else:
 692     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 693   return wanted
 694
 695
 696 def _GetUpdatedParams(old_params, update_dict,
 697                       use_default=True, use_none=False):
 698   """Return the new version of a parameter dictionary.
 699
 700   @type old_params: dict
 701   @param old_params: old parameters
 702   @type update_dict: dict
 703   @param update_dict: dict containing new parameter values, or
 704       constants.VALUE_DEFAULT to reset the parameter to its default
 705       value
 706   @param use_default: boolean
 707   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 708       values as 'to be deleted' values
 709   @param use_none: boolean
 710   @type use_none: whether to recognise C{None} values as 'to be
 711       deleted' values
 712   @rtype: dict
 713   @return: the new parameter dictionary
 714
 715   """
 716   params_copy = copy.deepcopy(old_params)
 717   for key, val in update_dict.iteritems():
 718     if ((use_default and val == constants.VALUE_DEFAULT) or
 719         (use_none and val is None)):
 720       try:
 721         del params_copy[key]
 722       except KeyError:
 723         pass
 724     else:
 725       params_copy[key] = val
 726   return params_copy
 727
 728
 729 def _CheckOutputFields(static, dynamic, selected):
 730   """Checks whether all selected fields are valid.
 731
 732   @type static: L{utils.FieldSet}
 733   @param static: static fields set
 734   @type dynamic: L{utils.FieldSet}
 735   @param dynamic: dynamic fields set
 736
 737   """
 738   f = utils.FieldSet()
 739   f.Extend(static)
 740   f.Extend(dynamic)
 741
 742   delta = f.NonMatching(selected)
 743   if delta:
 744     raise errors.OpPrereqError("Unknown output fields selected: %s"
 745                                % ",".join(delta), errors.ECODE_INVAL)
 746
 747
 748 def _CheckGlobalHvParams(params):
 749   """Validates that given hypervisor params are not global ones.
 750
 751   This will ensure that instances don't get customised versions of
 752   global params.
 753
 754   """
 755   used_globals = constants.HVC_GLOBALS.intersection(params)
 756   if used_globals:
 757     msg = ("The following hypervisor parameters are global and cannot"
 758            " be customized at instance level, please modify them at"
 759            " cluster level: %s" % utils.CommaJoin(used_globals))
 760     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 761
 762
 763 def _CheckNodeOnline(lu, node):
 764   """Ensure that a given node is online.
 765
 766   @param lu: the LU on behalf of which we make the check
 767   @param node: the node to check
 768   @raise errors.OpPrereqError: if the node is offline
 769
 770   """
 771   if lu.cfg.GetNodeInfo(node).offline:
 772     raise errors.OpPrereqError("Can't use offline node %s" % node,
 773                                errors.ECODE_INVAL)
 774
 775
 776 def _CheckNodeNotDrained(lu, node):
 777   """Ensure that a given node is not drained.
 778
 779   @param lu: the LU on behalf of which we make the check
 780   @param node: the node to check
 781   @raise errors.OpPrereqError: if the node is drained
 782
 783   """
 784   if lu.cfg.GetNodeInfo(node).drained:
 785     raise errors.OpPrereqError("Can't use drained node %s" % node,
 786                                errors.ECODE_INVAL)
 787
 788
 789 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 790   """Ensure that a node supports a given OS.
 791
 792   @param lu: the LU on behalf of which we make the check
 793   @param node: the node to check
 794   @param os_name: the OS to query about
 795   @param force_variant: whether to ignore variant errors
 796   @raise errors.OpPrereqError: if the node is not supporting the OS
 797
 798   """
 799   result = lu.rpc.call_os_get(node, os_name)
 800   result.Raise("OS '%s' not in supported OS list for node %s" %
 801                (os_name, node),
 802                prereq=True, ecode=errors.ECODE_INVAL)
 803   if not force_variant:
 804     _CheckOSVariant(result.payload, os_name)
 805
 806
 807 def _RequireFileStorage():
 808   """Checks that file storage is enabled.
 809
 810   @raise errors.OpPrereqError: when file storage is disabled
 811
 812   """
 813   if not constants.ENABLE_FILE_STORAGE:
 814     raise errors.OpPrereqError("File storage disabled at configure time",
 815                                errors.ECODE_INVAL)
 816
 817
 818 def _CheckDiskTemplate(template):
 819   """Ensure a given disk template is valid.
 820
 821   """
 822   if template not in constants.DISK_TEMPLATES:
 823     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 824            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 825     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 826   if template == constants.DT_FILE:
 827     _RequireFileStorage()
 828   return True
 829
 830
 831 def _CheckStorageType(storage_type):
 832   """Ensure a given storage type is valid.
 833
 834   """
 835   if storage_type not in constants.VALID_STORAGE_TYPES:
 836     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 837                                errors.ECODE_INVAL)
 838   if storage_type == constants.ST_FILE:
 839     _RequireFileStorage()
 840   return True
 841
 842
 843 def _GetClusterDomainSecret():
 844   """Reads the cluster domain secret.
 845
 846   """
 847   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 848                                strict=True)
 849
 850
 851 def _CheckInstanceDown(lu, instance, reason):
 852   """Ensure that an instance is not running."""
 853   if instance.admin_up:
 854     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 855                                (instance.name, reason), errors.ECODE_STATE)
 856
 857   pnode = instance.primary_node
 858   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 859   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 860               prereq=True, ecode=errors.ECODE_ENVIRON)
 861
 862   if instance.name in ins_l.payload:
 863     raise errors.OpPrereqError("Instance %s is running, %s" %
 864                                (instance.name, reason), errors.ECODE_STATE)
 865
 866
 867 def _ExpandItemName(fn, name, kind):
 868   """Expand an item name.
 869
 870   @param fn: the function to use for expansion
 871   @param name: requested item name
 872   @param kind: text description ('Node' or 'Instance')
 873   @return: the resolved (full) name
 874   @raise errors.OpPrereqError: if the item is not found
 875
 876   """
 877   full_name = fn(name)
 878   if full_name is None:
 879     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 880                                errors.ECODE_NOENT)
 881   return full_name
 882
 883
 884 def _ExpandNodeName(cfg, name):
 885   """Wrapper over L{_ExpandItemName} for nodes."""
 886   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 887
 888
 889 def _ExpandInstanceName(cfg, name):
 890   """Wrapper over L{_ExpandItemName} for instance."""
 891   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 892
 893
 894 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 895                           memory, vcpus, nics, disk_template, disks,
 896                           bep, hvp, hypervisor_name):
 897   """Builds instance related env variables for hooks
 898
 899   This builds the hook environment from individual variables.
 900
 901   @type name: string
 902   @param name: the name of the instance
 903   @type primary_node: string
 904   @param primary_node: the name of the instance's primary node
 905   @type secondary_nodes: list
 906   @param secondary_nodes: list of secondary nodes as strings
 907   @type os_type: string
 908   @param os_type: the name of the instance's OS
 909   @type status: boolean
 910   @param status: the should_run status of the instance
 911   @type memory: string
 912   @param memory: the memory size of the instance
 913   @type vcpus: string
 914   @param vcpus: the count of VCPUs the instance has
 915   @type nics: list
 916   @param nics: list of tuples (ip, mac, mode, link) representing
 917       the NICs the instance has
 918   @type disk_template: string
 919   @param disk_template: the disk template of the instance
 920   @type disks: list
 921   @param disks: the list of (size, mode) pairs
 922   @type bep: dict
 923   @param bep: the backend parameters for the instance
 924   @type hvp: dict
 925   @param hvp: the hypervisor parameters for the instance
 926   @type hypervisor_name: string
 927   @param hypervisor_name: the hypervisor for the instance
 928   @rtype: dict
 929   @return: the hook environment for this instance
 930
 931   """
 932   if status:
 933     str_status = "up"
 934   else:
 935     str_status = "down"
 936   env = {
 937     "OP_TARGET": name,
 938     "INSTANCE_NAME": name,
 939     "INSTANCE_PRIMARY": primary_node,
 940     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 941     "INSTANCE_OS_TYPE": os_type,
 942     "INSTANCE_STATUS": str_status,
 943     "INSTANCE_MEMORY": memory,
 944     "INSTANCE_VCPUS": vcpus,
 945     "INSTANCE_DISK_TEMPLATE": disk_template,
 946     "INSTANCE_HYPERVISOR": hypervisor_name,
 947   }
 948
 949   if nics:
 950     nic_count = len(nics)
 951     for idx, (ip, mac, mode, link) in enumerate(nics):
 952       if ip is None:
 953         ip = ""
 954       env["INSTANCE_NIC%d_IP" % idx] = ip
 955       env["INSTANCE_NIC%d_MAC" % idx] = mac
 956       env["INSTANCE_NIC%d_MODE" % idx] = mode
 957       env["INSTANCE_NIC%d_LINK" % idx] = link
 958       if mode == constants.NIC_MODE_BRIDGED:
 959         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 960   else:
 961     nic_count = 0
 962
 963   env["INSTANCE_NIC_COUNT"] = nic_count
 964
 965   if disks:
 966     disk_count = len(disks)
 967     for idx, (size, mode) in enumerate(disks):
 968       env["INSTANCE_DISK%d_SIZE" % idx] = size
 969       env["INSTANCE_DISK%d_MODE" % idx] = mode
 970   else:
 971     disk_count = 0
 972
 973   env["INSTANCE_DISK_COUNT"] = disk_count
 974
 975   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 976     for key, value in source.items():
 977       env["INSTANCE_%s_%s" % (kind, key)] = value
 978
 979   return env
 980
 981
 982 def _NICListToTuple(lu, nics):
 983   """Build a list of nic information tuples.
 984
 985   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 986   value in LUQueryInstanceData.
 987
 988   @type lu:  L{LogicalUnit}
 989   @param lu: the logical unit on whose behalf we execute
 990   @type nics: list of L{objects.NIC}
 991   @param nics: list of nics to convert to hooks tuples
 992
 993   """
 994   hooks_nics = []
 995   cluster = lu.cfg.GetClusterInfo()
 996   for nic in nics:
 997     ip = nic.ip
 998     mac = nic.mac
 999     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000     mode = filled_params[constants.NIC_MODE]
1001     link = filled_params[constants.NIC_LINK]
1002     hooks_nics.append((ip, mac, mode, link))
1003   return hooks_nics
1004
1005
1006 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007   """Builds instance related env variables for hooks from an object.
1008
1009   @type lu: L{LogicalUnit}
1010   @param lu: the logical unit on whose behalf we execute
1011   @type instance: L{objects.Instance}
1012   @param instance: the instance for which we should build the
1013       environment
1014   @type override: dict
1015   @param override: dictionary with key/values that will override
1016       our values
1017   @rtype: dict
1018   @return: the hook environment dictionary
1019
1020   """
1021   cluster = lu.cfg.GetClusterInfo()
1022   bep = cluster.FillBE(instance)
1023   hvp = cluster.FillHV(instance)
1024   args = {
1025     'name': instance.name,
1026     'primary_node': instance.primary_node,
1027     'secondary_nodes': instance.secondary_nodes,
1028     'os_type': instance.os,
1029     'status': instance.admin_up,
1030     'memory': bep[constants.BE_MEMORY],
1031     'vcpus': bep[constants.BE_VCPUS],
1032     'nics': _NICListToTuple(lu, instance.nics),
1033     'disk_template': instance.disk_template,
1034     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1035     'bep': bep,
1036     'hvp': hvp,
1037     'hypervisor_name': instance.hypervisor,
1038   }
1039   if override:
1040     args.update(override)
1041   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1042
1043
1044 def _AdjustCandidatePool(lu, exceptions):
1045   """Adjust the candidate pool after node operations.
1046
1047   """
1048   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1049   if mod_list:
1050     lu.LogInfo("Promoted nodes to master candidate role: %s",
1051                utils.CommaJoin(node.name for node in mod_list))
1052     for name in mod_list:
1053       lu.context.ReaddNode(name)
1054   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1055   if mc_now > mc_max:
1056     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1057                (mc_now, mc_max))
1058
1059
1060 def _DecideSelfPromotion(lu, exceptions=None):
1061   """Decide whether I should promote myself as a master candidate.
1062
1063   """
1064   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1065   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1066   # the new node will increase mc_max with one, so:
1067   mc_should = min(mc_should + 1, cp_size)
1068   return mc_now < mc_should
1069
1070
1071 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072   """Check that the brigdes needed by a list of nics exist.
1073
1074   """
1075   cluster = lu.cfg.GetClusterInfo()
1076   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1077   brlist = [params[constants.NIC_LINK] for params in paramslist
1078             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1079   if brlist:
1080     result = lu.rpc.call_bridges_exist(target_node, brlist)
1081     result.Raise("Error checking bridges on destination node '%s'" %
1082                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1083
1084
1085 def _CheckInstanceBridgesExist(lu, instance, node=None):
1086   """Check that the brigdes needed by an instance exist.
1087
1088   """
1089   if node is None:
1090     node = instance.primary_node
1091   _CheckNicsBridgesExist(lu, instance.nics, node)
1092
1093
1094 def _CheckOSVariant(os_obj, name):
1095   """Check whether an OS name conforms to the os variants specification.
1096
1097   @type os_obj: L{objects.OS}
1098   @param os_obj: OS object to check
1099   @type name: string
1100   @param name: OS name passed by the user, to check for validity
1101
1102   """
1103   if not os_obj.supported_variants:
1104     return
1105   variant = objects.OS.GetVariant(name)
1106   if not variant:
1107     raise errors.OpPrereqError("OS name must include a variant",
1108                                errors.ECODE_INVAL)
1109
1110   if variant not in os_obj.supported_variants:
1111     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1112
1113
1114 def _GetNodeInstancesInner(cfg, fn):
1115   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1116
1117
1118 def _GetNodeInstances(cfg, node_name):
1119   """Returns a list of all primary and secondary instances on a node.
1120
1121   """
1122
1123   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1124
1125
1126 def _GetNodePrimaryInstances(cfg, node_name):
1127   """Returns primary instances on a node.
1128
1129   """
1130   return _GetNodeInstancesInner(cfg,
1131                                 lambda inst: node_name == inst.primary_node)
1132
1133
1134 def _GetNodeSecondaryInstances(cfg, node_name):
1135   """Returns secondary instances on a node.
1136
1137   """
1138   return _GetNodeInstancesInner(cfg,
1139                                 lambda inst: node_name in inst.secondary_nodes)
1140
1141
1142 def _GetStorageTypeArgs(cfg, storage_type):
1143   """Returns the arguments for a storage type.
1144
1145   """
1146   # Special case for file storage
1147   if storage_type == constants.ST_FILE:
1148     # storage.FileStorage wants a list of storage directories
1149     return [[cfg.GetFileStorageDir()]]
1150
1151   return []
1152
1153
1154 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1155   faulty = []
1156
1157   for dev in instance.disks:
1158     cfg.SetDiskID(dev, node_name)
1159
1160   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161   result.Raise("Failed to get disk status from node %s" % node_name,
1162                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1163
1164   for idx, bdev_status in enumerate(result.payload):
1165     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1166       faulty.append(idx)
1167
1168   return faulty
1169
1170
1171 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172   """Check the sanity of iallocator and node arguments and use the
1173   cluster-wide iallocator if appropriate.
1174
1175   Check that at most one of (iallocator, node) is specified. If none is
1176   specified, then the LU's opcode's iallocator slot is filled with the
1177   cluster-wide default iallocator.
1178
1179   @type iallocator_slot: string
1180   @param iallocator_slot: the name of the opcode iallocator slot
1181   @type node_slot: string
1182   @param node_slot: the name of the opcode target node slot
1183
1184   """
1185   node = getattr(lu.op, node_slot, None)
1186   iallocator = getattr(lu.op, iallocator_slot, None)
1187
1188   if node is not None and iallocator is not None:
1189     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1190                                errors.ECODE_INVAL)
1191   elif node is None and iallocator is None:
1192     default_iallocator = lu.cfg.GetDefaultIAllocator()
1193     if default_iallocator:
1194       setattr(lu.op, iallocator_slot, default_iallocator)
1195     else:
1196       raise errors.OpPrereqError("No iallocator or node given and no"
1197                                  " cluster-wide default iallocator found."
1198                                  " Please specify either an iallocator or a"
1199                                  " node, or set a cluster-wide default"
1200                                  " iallocator.")
1201
1202
1203 class LUPostInitCluster(LogicalUnit):
1204   """Logical unit for running hooks after cluster initialization.
1205
1206   """
1207   HPATH = "cluster-init"
1208   HTYPE = constants.HTYPE_CLUSTER
1209
1210   def BuildHooksEnv(self):
1211     """Build hooks env.
1212
1213     """
1214     env = {"OP_TARGET": self.cfg.GetClusterName()}
1215     mn = self.cfg.GetMasterNode()
1216     return env, [], [mn]
1217
1218   def Exec(self, feedback_fn):
1219     """Nothing to do.
1220
1221     """
1222     return True
1223
1224
1225 class LUDestroyCluster(LogicalUnit):
1226   """Logical unit for destroying the cluster.
1227
1228   """
1229   HPATH = "cluster-destroy"
1230   HTYPE = constants.HTYPE_CLUSTER
1231
1232   def BuildHooksEnv(self):
1233     """Build hooks env.
1234
1235     """
1236     env = {"OP_TARGET": self.cfg.GetClusterName()}
1237     return env, [], []
1238
1239   def CheckPrereq(self):
1240     """Check prerequisites.
1241
1242     This checks whether the cluster is empty.
1243
1244     Any errors are signaled by raising errors.OpPrereqError.
1245
1246     """
1247     master = self.cfg.GetMasterNode()
1248
1249     nodelist = self.cfg.GetNodeList()
1250     if len(nodelist) != 1 or nodelist[0] != master:
1251       raise errors.OpPrereqError("There are still %d node(s) in"
1252                                  " this cluster." % (len(nodelist) - 1),
1253                                  errors.ECODE_INVAL)
1254     instancelist = self.cfg.GetInstanceList()
1255     if instancelist:
1256       raise errors.OpPrereqError("There are still %d instance(s) in"
1257                                  " this cluster." % len(instancelist),
1258                                  errors.ECODE_INVAL)
1259
1260   def Exec(self, feedback_fn):
1261     """Destroys the cluster.
1262
1263     """
1264     master = self.cfg.GetMasterNode()
1265     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1266
1267     # Run post hooks on master node before it's removed
1268     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1269     try:
1270       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1271     except:
1272       # pylint: disable-msg=W0702
1273       self.LogWarning("Errors occurred running hooks on %s" % master)
1274
1275     result = self.rpc.call_node_stop_master(master, False)
1276     result.Raise("Could not disable the master role")
1277
1278     if modify_ssh_setup:
1279       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1280       utils.CreateBackup(priv_key)
1281       utils.CreateBackup(pub_key)
1282
1283     return master
1284
1285
1286 def _VerifyCertificate(filename):
1287   """Verifies a certificate for LUVerifyCluster.
1288
1289   @type filename: string
1290   @param filename: Path to PEM file
1291
1292   """
1293   try:
1294     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1295                                            utils.ReadFile(filename))
1296   except Exception, err: # pylint: disable-msg=W0703
1297     return (LUVerifyCluster.ETYPE_ERROR,
1298             "Failed to load X509 certificate %s: %s" % (filename, err))
1299
1300   (errcode, msg) = \
1301     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1302                                 constants.SSL_CERT_EXPIRATION_ERROR)
1303
1304   if msg:
1305     fnamemsg = "While verifying %s: %s" % (filename, msg)
1306   else:
1307     fnamemsg = None
1308
1309   if errcode is None:
1310     return (None, fnamemsg)
1311   elif errcode == utils.CERT_WARNING:
1312     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1313   elif errcode == utils.CERT_ERROR:
1314     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1315
1316   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1317
1318
1319 class LUVerifyCluster(LogicalUnit):
1320   """Verifies the cluster status.
1321
1322   """
1323   HPATH = "cluster-verify"
1324   HTYPE = constants.HTYPE_CLUSTER
1325   _OP_PARAMS = [
1326     ("skip_checks", _EmptyList,
1327      _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1328     ("verbose", False, _TBool),
1329     ("error_codes", False, _TBool),
1330     ("debug_simulate_errors", False, _TBool),
1331     ]
1332   REQ_BGL = False
1333
1334   TCLUSTER = "cluster"
1335   TNODE = "node"
1336   TINSTANCE = "instance"
1337
1338   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1339   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1340   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1341   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1342   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1343   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1344   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1345   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1346   ENODEDRBD = (TNODE, "ENODEDRBD")
1347   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1348   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1349   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1350   ENODEHV = (TNODE, "ENODEHV")
1351   ENODELVM = (TNODE, "ENODELVM")
1352   ENODEN1 = (TNODE, "ENODEN1")
1353   ENODENET = (TNODE, "ENODENET")
1354   ENODEOS = (TNODE, "ENODEOS")
1355   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1356   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1357   ENODERPC = (TNODE, "ENODERPC")
1358   ENODESSH = (TNODE, "ENODESSH")
1359   ENODEVERSION = (TNODE, "ENODEVERSION")
1360   ENODESETUP = (TNODE, "ENODESETUP")
1361   ENODETIME = (TNODE, "ENODETIME")
1362
1363   ETYPE_FIELD = "code"
1364   ETYPE_ERROR = "ERROR"
1365   ETYPE_WARNING = "WARNING"
1366
1367   class NodeImage(object):
1368     """A class representing the logical and physical status of a node.
1369
1370     @type name: string
1371     @ivar name: the node name to which this object refers
1372     @ivar volumes: a structure as returned from
1373         L{ganeti.backend.GetVolumeList} (runtime)
1374     @ivar instances: a list of running instances (runtime)
1375     @ivar pinst: list of configured primary instances (config)
1376     @ivar sinst: list of configured secondary instances (config)
1377     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1378         of this node (config)
1379     @ivar mfree: free memory, as reported by hypervisor (runtime)
1380     @ivar dfree: free disk, as reported by the node (runtime)
1381     @ivar offline: the offline status (config)
1382     @type rpc_fail: boolean
1383     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1384         not whether the individual keys were correct) (runtime)
1385     @type lvm_fail: boolean
1386     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1387     @type hyp_fail: boolean
1388     @ivar hyp_fail: whether the RPC call didn't return the instance list
1389     @type ghost: boolean
1390     @ivar ghost: whether this is a known node or not (config)
1391     @type os_fail: boolean
1392     @ivar os_fail: whether the RPC call didn't return valid OS data
1393     @type oslist: list
1394     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1395
1396     """
1397     def __init__(self, offline=False, name=None):
1398       self.name = name
1399       self.volumes = {}
1400       self.instances = []
1401       self.pinst = []
1402       self.sinst = []
1403       self.sbp = {}
1404       self.mfree = 0
1405       self.dfree = 0
1406       self.offline = offline
1407       self.rpc_fail = False
1408       self.lvm_fail = False
1409       self.hyp_fail = False
1410       self.ghost = False
1411       self.os_fail = False
1412       self.oslist = {}
1413
1414   def ExpandNames(self):
1415     self.needed_locks = {
1416       locking.LEVEL_NODE: locking.ALL_SET,
1417       locking.LEVEL_INSTANCE: locking.ALL_SET,
1418     }
1419     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1420
1421   def _Error(self, ecode, item, msg, *args, **kwargs):
1422     """Format an error message.
1423
1424     Based on the opcode's error_codes parameter, either format a
1425     parseable error code, or a simpler error string.
1426
1427     This must be called only from Exec and functions called from Exec.
1428
1429     """
1430     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1431     itype, etxt = ecode
1432     # first complete the msg
1433     if args:
1434       msg = msg % args
1435     # then format the whole message
1436     if self.op.error_codes:
1437       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1438     else:
1439       if item:
1440         item = " " + item
1441       else:
1442         item = ""
1443       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1444     # and finally report it via the feedback_fn
1445     self._feedback_fn("  - %s" % msg)
1446
1447   def _ErrorIf(self, cond, *args, **kwargs):
1448     """Log an error message if the passed condition is True.
1449
1450     """
1451     cond = bool(cond) or self.op.debug_simulate_errors
1452     if cond:
1453       self._Error(*args, **kwargs)
1454     # do not mark the operation as failed for WARN cases only
1455     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1456       self.bad = self.bad or cond
1457
1458   def _VerifyNode(self, ninfo, nresult):
1459     """Perform some basic validation on data returned from a node.
1460
1461       - check the result data structure is well formed and has all the
1462         mandatory fields
1463       - check ganeti version
1464
1465     @type ninfo: L{objects.Node}
1466     @param ninfo: the node to check
1467     @param nresult: the results from the node
1468     @rtype: boolean
1469     @return: whether overall this call was successful (and we can expect
1470          reasonable values in the respose)
1471
1472     """
1473     node = ninfo.name
1474     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1475
1476     # main result, nresult should be a non-empty dict
1477     test = not nresult or not isinstance(nresult, dict)
1478     _ErrorIf(test, self.ENODERPC, node,
1479                   "unable to verify node: no data returned")
1480     if test:
1481       return False
1482
1483     # compares ganeti version
1484     local_version = constants.PROTOCOL_VERSION
1485     remote_version = nresult.get("version", None)
1486     test = not (remote_version and
1487                 isinstance(remote_version, (list, tuple)) and
1488                 len(remote_version) == 2)
1489     _ErrorIf(test, self.ENODERPC, node,
1490              "connection to node returned invalid data")
1491     if test:
1492       return False
1493
1494     test = local_version != remote_version[0]
1495     _ErrorIf(test, self.ENODEVERSION, node,
1496              "incompatible protocol versions: master %s,"
1497              " node %s", local_version, remote_version[0])
1498     if test:
1499       return False
1500
1501     # node seems compatible, we can actually try to look into its results
1502
1503     # full package version
1504     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1505                   self.ENODEVERSION, node,
1506                   "software version mismatch: master %s, node %s",
1507                   constants.RELEASE_VERSION, remote_version[1],
1508                   code=self.ETYPE_WARNING)
1509
1510     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1511     if isinstance(hyp_result, dict):
1512       for hv_name, hv_result in hyp_result.iteritems():
1513         test = hv_result is not None
1514         _ErrorIf(test, self.ENODEHV, node,
1515                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1516
1517
1518     test = nresult.get(constants.NV_NODESETUP,
1519                            ["Missing NODESETUP results"])
1520     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1521              "; ".join(test))
1522
1523     return True
1524
1525   def _VerifyNodeTime(self, ninfo, nresult,
1526                       nvinfo_starttime, nvinfo_endtime):
1527     """Check the node time.
1528
1529     @type ninfo: L{objects.Node}
1530     @param ninfo: the node to check
1531     @param nresult: the remote results for the node
1532     @param nvinfo_starttime: the start time of the RPC call
1533     @param nvinfo_endtime: the end time of the RPC call
1534
1535     """
1536     node = ninfo.name
1537     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1538
1539     ntime = nresult.get(constants.NV_TIME, None)
1540     try:
1541       ntime_merged = utils.MergeTime(ntime)
1542     except (ValueError, TypeError):
1543       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1544       return
1545
1546     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1550     else:
1551       ntime_diff = None
1552
1553     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554              "Node time diverges by at least %s from master node time",
1555              ntime_diff)
1556
1557   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1558     """Check the node time.
1559
1560     @type ninfo: L{objects.Node}
1561     @param ninfo: the node to check
1562     @param nresult: the remote results for the node
1563     @param vg_name: the configured VG name
1564
1565     """
1566     if vg_name is None:
1567       return
1568
1569     node = ninfo.name
1570     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1571
1572     # checks vg existence and size > 20G
1573     vglist = nresult.get(constants.NV_VGLIST, None)
1574     test = not vglist
1575     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1576     if not test:
1577       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1578                                             constants.MIN_VG_SIZE)
1579       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1580
1581     # check pv names
1582     pvlist = nresult.get(constants.NV_PVLIST, None)
1583     test = pvlist is None
1584     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1585     if not test:
1586       # check that ':' is not present in PV names, since it's a
1587       # special character for lvcreate (denotes the range of PEs to
1588       # use on the PV)
1589       for _, pvname, owner_vg in pvlist:
1590         test = ":" in pvname
1591         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1592                  " '%s' of VG '%s'", pvname, owner_vg)
1593
1594   def _VerifyNodeNetwork(self, ninfo, nresult):
1595     """Check the node time.
1596
1597     @type ninfo: L{objects.Node}
1598     @param ninfo: the node to check
1599     @param nresult: the remote results for the node
1600
1601     """
1602     node = ninfo.name
1603     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1604
1605     test = constants.NV_NODELIST not in nresult
1606     _ErrorIf(test, self.ENODESSH, node,
1607              "node hasn't returned node ssh connectivity data")
1608     if not test:
1609       if nresult[constants.NV_NODELIST]:
1610         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1611           _ErrorIf(True, self.ENODESSH, node,
1612                    "ssh communication with node '%s': %s", a_node, a_msg)
1613
1614     test = constants.NV_NODENETTEST not in nresult
1615     _ErrorIf(test, self.ENODENET, node,
1616              "node hasn't returned node tcp connectivity data")
1617     if not test:
1618       if nresult[constants.NV_NODENETTEST]:
1619         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1620         for anode in nlist:
1621           _ErrorIf(True, self.ENODENET, node,
1622                    "tcp communication with node '%s': %s",
1623                    anode, nresult[constants.NV_NODENETTEST][anode])
1624
1625     test = constants.NV_MASTERIP not in nresult
1626     _ErrorIf(test, self.ENODENET, node,
1627              "node hasn't returned node master IP reachability data")
1628     if not test:
1629       if not nresult[constants.NV_MASTERIP]:
1630         if node == self.master_node:
1631           msg = "the master node cannot reach the master IP (not configured?)"
1632         else:
1633           msg = "cannot reach the master IP"
1634         _ErrorIf(True, self.ENODENET, node, msg)
1635
1636
1637   def _VerifyInstance(self, instance, instanceconfig, node_image):
1638     """Verify an instance.
1639
1640     This function checks to see if the required block devices are
1641     available on the instance's node.
1642
1643     """
1644     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1645     node_current = instanceconfig.primary_node
1646
1647     node_vol_should = {}
1648     instanceconfig.MapLVsByNode(node_vol_should)
1649
1650     for node in node_vol_should:
1651       n_img = node_image[node]
1652       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1653         # ignore missing volumes on offline or broken nodes
1654         continue
1655       for volume in node_vol_should[node]:
1656         test = volume not in n_img.volumes
1657         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1658                  "volume %s missing on node %s", volume, node)
1659
1660     if instanceconfig.admin_up:
1661       pri_img = node_image[node_current]
1662       test = instance not in pri_img.instances and not pri_img.offline
1663       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1664                "instance not running on its primary node %s",
1665                node_current)
1666
1667     for node, n_img in node_image.items():
1668       if (not node == node_current):
1669         test = instance in n_img.instances
1670         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1671                  "instance should not run on node %s", node)
1672
1673   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1674     """Verify if there are any unknown volumes in the cluster.
1675
1676     The .os, .swap and backup volumes are ignored. All other volumes are
1677     reported as unknown.
1678
1679     @type reserved: L{ganeti.utils.FieldSet}
1680     @param reserved: a FieldSet of reserved volume names
1681
1682     """
1683     for node, n_img in node_image.items():
1684       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1685         # skip non-healthy nodes
1686         continue
1687       for volume in n_img.volumes:
1688         test = ((node not in node_vol_should or
1689                 volume not in node_vol_should[node]) and
1690                 not reserved.Matches(volume))
1691         self._ErrorIf(test, self.ENODEORPHANLV, node,
1692                       "volume %s is unknown", volume)
1693
1694   def _VerifyOrphanInstances(self, instancelist, node_image):
1695     """Verify the list of running instances.
1696
1697     This checks what instances are running but unknown to the cluster.
1698
1699     """
1700     for node, n_img in node_image.items():
1701       for o_inst in n_img.instances:
1702         test = o_inst not in instancelist
1703         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1704                       "instance %s on node %s should not exist", o_inst, node)
1705
1706   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1707     """Verify N+1 Memory Resilience.
1708
1709     Check that if one single node dies we can still start all the
1710     instances it was primary for.
1711
1712     """
1713     for node, n_img in node_image.items():
1714       # This code checks that every node which is now listed as
1715       # secondary has enough memory to host all instances it is
1716       # supposed to should a single other node in the cluster fail.
1717       # FIXME: not ready for failover to an arbitrary node
1718       # FIXME: does not support file-backed instances
1719       # WARNING: we currently take into account down instances as well
1720       # as up ones, considering that even if they're down someone
1721       # might want to start them even in the event of a node failure.
1722       for prinode, instances in n_img.sbp.items():
1723         needed_mem = 0
1724         for instance in instances:
1725           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1726           if bep[constants.BE_AUTO_BALANCE]:
1727             needed_mem += bep[constants.BE_MEMORY]
1728         test = n_img.mfree < needed_mem
1729         self._ErrorIf(test, self.ENODEN1, node,
1730                       "not enough memory on to accommodate"
1731                       " failovers should peer node %s fail", prinode)
1732
1733   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1734                        master_files):
1735     """Verifies and computes the node required file checksums.
1736
1737     @type ninfo: L{objects.Node}
1738     @param ninfo: the node to check
1739     @param nresult: the remote results for the node
1740     @param file_list: required list of files
1741     @param local_cksum: dictionary of local files and their checksums
1742     @param master_files: list of files that only masters should have
1743
1744     """
1745     node = ninfo.name
1746     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1747
1748     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1749     test = not isinstance(remote_cksum, dict)
1750     _ErrorIf(test, self.ENODEFILECHECK, node,
1751              "node hasn't returned file checksum data")
1752     if test:
1753       return
1754
1755     for file_name in file_list:
1756       node_is_mc = ninfo.master_candidate
1757       must_have = (file_name not in master_files) or node_is_mc
1758       # missing
1759       test1 = file_name not in remote_cksum
1760       # invalid checksum
1761       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1762       # existing and good
1763       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1764       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1765                "file '%s' missing", file_name)
1766       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1767                "file '%s' has wrong checksum", file_name)
1768       # not candidate and this is not a must-have file
1769       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1770                "file '%s' should not exist on non master"
1771                " candidates (and the file is outdated)", file_name)
1772       # all good, except non-master/non-must have combination
1773       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1774                "file '%s' should not exist"
1775                " on non master candidates", file_name)
1776
1777   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1778                       drbd_map):
1779     """Verifies and the node DRBD status.
1780
1781     @type ninfo: L{objects.Node}
1782     @param ninfo: the node to check
1783     @param nresult: the remote results for the node
1784     @param instanceinfo: the dict of instances
1785     @param drbd_helper: the configured DRBD usermode helper
1786     @param drbd_map: the DRBD map as returned by
1787         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1788
1789     """
1790     node = ninfo.name
1791     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1792
1793     if drbd_helper:
1794       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1795       test = (helper_result == None)
1796       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1797                "no drbd usermode helper returned")
1798       if helper_result:
1799         status, payload = helper_result
1800         test = not status
1801         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1802                  "drbd usermode helper check unsuccessful: %s", payload)
1803         test = status and (payload != drbd_helper)
1804         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1805                  "wrong drbd usermode helper: %s", payload)
1806
1807     # compute the DRBD minors
1808     node_drbd = {}
1809     for minor, instance in drbd_map[node].items():
1810       test = instance not in instanceinfo
1811       _ErrorIf(test, self.ECLUSTERCFG, None,
1812                "ghost instance '%s' in temporary DRBD map", instance)
1813         # ghost instance should not be running, but otherwise we
1814         # don't give double warnings (both ghost instance and
1815         # unallocated minor in use)
1816       if test:
1817         node_drbd[minor] = (instance, False)
1818       else:
1819         instance = instanceinfo[instance]
1820         node_drbd[minor] = (instance.name, instance.admin_up)
1821
1822     # and now check them
1823     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1824     test = not isinstance(used_minors, (tuple, list))
1825     _ErrorIf(test, self.ENODEDRBD, node,
1826              "cannot parse drbd status file: %s", str(used_minors))
1827     if test:
1828       # we cannot check drbd status
1829       return
1830
1831     for minor, (iname, must_exist) in node_drbd.items():
1832       test = minor not in used_minors and must_exist
1833       _ErrorIf(test, self.ENODEDRBD, node,
1834                "drbd minor %d of instance %s is not active", minor, iname)
1835     for minor in used_minors:
1836       test = minor not in node_drbd
1837       _ErrorIf(test, self.ENODEDRBD, node,
1838                "unallocated drbd minor %d is in use", minor)
1839
1840   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1841     """Builds the node OS structures.
1842
1843     @type ninfo: L{objects.Node}
1844     @param ninfo: the node to check
1845     @param nresult: the remote results for the node
1846     @param nimg: the node image object
1847
1848     """
1849     node = ninfo.name
1850     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1851
1852     remote_os = nresult.get(constants.NV_OSLIST, None)
1853     test = (not isinstance(remote_os, list) or
1854             not compat.all(isinstance(v, list) and len(v) == 7
1855                            for v in remote_os))
1856
1857     _ErrorIf(test, self.ENODEOS, node,
1858              "node hasn't returned valid OS data")
1859
1860     nimg.os_fail = test
1861
1862     if test:
1863       return
1864
1865     os_dict = {}
1866
1867     for (name, os_path, status, diagnose,
1868          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1869
1870       if name not in os_dict:
1871         os_dict[name] = []
1872
1873       # parameters is a list of lists instead of list of tuples due to
1874       # JSON lacking a real tuple type, fix it:
1875       parameters = [tuple(v) for v in parameters]
1876       os_dict[name].append((os_path, status, diagnose,
1877                             set(variants), set(parameters), set(api_ver)))
1878
1879     nimg.oslist = os_dict
1880
1881   def _VerifyNodeOS(self, ninfo, nimg, base):
1882     """Verifies the node OS list.
1883
1884     @type ninfo: L{objects.Node}
1885     @param ninfo: the node to check
1886     @param nimg: the node image object
1887     @param base: the 'template' node we match against (e.g. from the master)
1888
1889     """
1890     node = ninfo.name
1891     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1892
1893     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1894
1895     for os_name, os_data in nimg.oslist.items():
1896       assert os_data, "Empty OS status for OS %s?!" % os_name
1897       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1898       _ErrorIf(not f_status, self.ENODEOS, node,
1899                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1900       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1901                "OS '%s' has multiple entries (first one shadows the rest): %s",
1902                os_name, utils.CommaJoin([v[0] for v in os_data]))
1903       # this will catched in backend too
1904       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1905                and not f_var, self.ENODEOS, node,
1906                "OS %s with API at least %d does not declare any variant",
1907                os_name, constants.OS_API_V15)
1908       # comparisons with the 'base' image
1909       test = os_name not in base.oslist
1910       _ErrorIf(test, self.ENODEOS, node,
1911                "Extra OS %s not present on reference node (%s)",
1912                os_name, base.name)
1913       if test:
1914         continue
1915       assert base.oslist[os_name], "Base node has empty OS status?"
1916       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1917       if not b_status:
1918         # base OS is invalid, skipping
1919         continue
1920       for kind, a, b in [("API version", f_api, b_api),
1921                          ("variants list", f_var, b_var),
1922                          ("parameters", f_param, b_param)]:
1923         _ErrorIf(a != b, self.ENODEOS, node,
1924                  "OS %s %s differs from reference node %s: %s vs. %s",
1925                  kind, os_name, base.name,
1926                  utils.CommaJoin(a), utils.CommaJoin(b))
1927
1928     # check any missing OSes
1929     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1930     _ErrorIf(missing, self.ENODEOS, node,
1931              "OSes present on reference node %s but missing on this node: %s",
1932              base.name, utils.CommaJoin(missing))
1933
1934   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1935     """Verifies and updates the node volume data.
1936
1937     This function will update a L{NodeImage}'s internal structures
1938     with data from the remote call.
1939
1940     @type ninfo: L{objects.Node}
1941     @param ninfo: the node to check
1942     @param nresult: the remote results for the node
1943     @param nimg: the node image object
1944     @param vg_name: the configured VG name
1945
1946     """
1947     node = ninfo.name
1948     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1949
1950     nimg.lvm_fail = True
1951     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1952     if vg_name is None:
1953       pass
1954     elif isinstance(lvdata, basestring):
1955       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1956                utils.SafeEncode(lvdata))
1957     elif not isinstance(lvdata, dict):
1958       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1959     else:
1960       nimg.volumes = lvdata
1961       nimg.lvm_fail = False
1962
1963   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1964     """Verifies and updates the node instance list.
1965
1966     If the listing was successful, then updates this node's instance
1967     list. Otherwise, it marks the RPC call as failed for the instance
1968     list key.
1969
1970     @type ninfo: L{objects.Node}
1971     @param ninfo: the node to check
1972     @param nresult: the remote results for the node
1973     @param nimg: the node image object
1974
1975     """
1976     idata = nresult.get(constants.NV_INSTANCELIST, None)
1977     test = not isinstance(idata, list)
1978     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1979                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1980     if test:
1981       nimg.hyp_fail = True
1982     else:
1983       nimg.instances = idata
1984
1985   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1986     """Verifies and computes a node information map
1987
1988     @type ninfo: L{objects.Node}
1989     @param ninfo: the node to check
1990     @param nresult: the remote results for the node
1991     @param nimg: the node image object
1992     @param vg_name: the configured VG name
1993
1994     """
1995     node = ninfo.name
1996     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1997
1998     # try to read free memory (from the hypervisor)
1999     hv_info = nresult.get(constants.NV_HVINFO, None)
2000     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2001     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2002     if not test:
2003       try:
2004         nimg.mfree = int(hv_info["memory_free"])
2005       except (ValueError, TypeError):
2006         _ErrorIf(True, self.ENODERPC, node,
2007                  "node returned invalid nodeinfo, check hypervisor")
2008
2009     # FIXME: devise a free space model for file based instances as well
2010     if vg_name is not None:
2011       test = (constants.NV_VGLIST not in nresult or
2012               vg_name not in nresult[constants.NV_VGLIST])
2013       _ErrorIf(test, self.ENODELVM, node,
2014                "node didn't return data for the volume group '%s'"
2015                " - it is either missing or broken", vg_name)
2016       if not test:
2017         try:
2018           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2019         except (ValueError, TypeError):
2020           _ErrorIf(True, self.ENODERPC, node,
2021                    "node returned invalid LVM info, check LVM status")
2022
2023   def BuildHooksEnv(self):
2024     """Build hooks env.
2025
2026     Cluster-Verify hooks just ran in the post phase and their failure makes
2027     the output be logged in the verify output and the verification to fail.
2028
2029     """
2030     all_nodes = self.cfg.GetNodeList()
2031     env = {
2032       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2033       }
2034     for node in self.cfg.GetAllNodesInfo().values():
2035       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2036
2037     return env, [], all_nodes
2038
2039   def Exec(self, feedback_fn):
2040     """Verify integrity of cluster, performing various test on nodes.
2041
2042     """
2043     self.bad = False
2044     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2045     verbose = self.op.verbose
2046     self._feedback_fn = feedback_fn
2047     feedback_fn("* Verifying global settings")
2048     for msg in self.cfg.VerifyConfig():
2049       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2050
2051     # Check the cluster certificates
2052     for cert_filename in constants.ALL_CERT_FILES:
2053       (errcode, msg) = _VerifyCertificate(cert_filename)
2054       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2055
2056     vg_name = self.cfg.GetVGName()
2057     drbd_helper = self.cfg.GetDRBDHelper()
2058     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2059     cluster = self.cfg.GetClusterInfo()
2060     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2061     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2062     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2063     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2064                         for iname in instancelist)
2065     i_non_redundant = [] # Non redundant instances
2066     i_non_a_balanced = [] # Non auto-balanced instances
2067     n_offline = 0 # Count of offline nodes
2068     n_drained = 0 # Count of nodes being drained
2069     node_vol_should = {}
2070
2071     # FIXME: verify OS list
2072     # do local checksums
2073     master_files = [constants.CLUSTER_CONF_FILE]
2074     master_node = self.master_node = self.cfg.GetMasterNode()
2075     master_ip = self.cfg.GetMasterIP()
2076
2077     file_names = ssconf.SimpleStore().GetFileList()
2078     file_names.extend(constants.ALL_CERT_FILES)
2079     file_names.extend(master_files)
2080     if cluster.modify_etc_hosts:
2081       file_names.append(constants.ETC_HOSTS)
2082
2083     local_checksums = utils.FingerprintFiles(file_names)
2084
2085     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2086     node_verify_param = {
2087       constants.NV_FILELIST: file_names,
2088       constants.NV_NODELIST: [node.name for node in nodeinfo
2089                               if not node.offline],
2090       constants.NV_HYPERVISOR: hypervisors,
2091       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2092                                   node.secondary_ip) for node in nodeinfo
2093                                  if not node.offline],
2094       constants.NV_INSTANCELIST: hypervisors,
2095       constants.NV_VERSION: None,
2096       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2097       constants.NV_NODESETUP: None,
2098       constants.NV_TIME: None,
2099       constants.NV_MASTERIP: (master_node, master_ip),
2100       constants.NV_OSLIST: None,
2101       }
2102
2103     if vg_name is not None:
2104       node_verify_param[constants.NV_VGLIST] = None
2105       node_verify_param[constants.NV_LVLIST] = vg_name
2106       node_verify_param[constants.NV_PVLIST] = [vg_name]
2107       node_verify_param[constants.NV_DRBDLIST] = None
2108
2109     if drbd_helper:
2110       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2111
2112     # Build our expected cluster state
2113     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2114                                                  name=node.name))
2115                       for node in nodeinfo)
2116
2117     for instance in instancelist:
2118       inst_config = instanceinfo[instance]
2119
2120       for nname in inst_config.all_nodes:
2121         if nname not in node_image:
2122           # ghost node
2123           gnode = self.NodeImage(name=nname)
2124           gnode.ghost = True
2125           node_image[nname] = gnode
2126
2127       inst_config.MapLVsByNode(node_vol_should)
2128
2129       pnode = inst_config.primary_node
2130       node_image[pnode].pinst.append(instance)
2131
2132       for snode in inst_config.secondary_nodes:
2133         nimg = node_image[snode]
2134         nimg.sinst.append(instance)
2135         if pnode not in nimg.sbp:
2136           nimg.sbp[pnode] = []
2137         nimg.sbp[pnode].append(instance)
2138
2139     # At this point, we have the in-memory data structures complete,
2140     # except for the runtime information, which we'll gather next
2141
2142     # Due to the way our RPC system works, exact response times cannot be
2143     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2144     # time before and after executing the request, we can at least have a time
2145     # window.
2146     nvinfo_starttime = time.time()
2147     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2148                                            self.cfg.GetClusterName())
2149     nvinfo_endtime = time.time()
2150
2151     all_drbd_map = self.cfg.ComputeDRBDMap()
2152
2153     feedback_fn("* Verifying node status")
2154
2155     refos_img = None
2156
2157     for node_i in nodeinfo:
2158       node = node_i.name
2159       nimg = node_image[node]
2160
2161       if node_i.offline:
2162         if verbose:
2163           feedback_fn("* Skipping offline node %s" % (node,))
2164         n_offline += 1
2165         continue
2166
2167       if node == master_node:
2168         ntype = "master"
2169       elif node_i.master_candidate:
2170         ntype = "master candidate"
2171       elif node_i.drained:
2172         ntype = "drained"
2173         n_drained += 1
2174       else:
2175         ntype = "regular"
2176       if verbose:
2177         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2178
2179       msg = all_nvinfo[node].fail_msg
2180       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2181       if msg:
2182         nimg.rpc_fail = True
2183         continue
2184
2185       nresult = all_nvinfo[node].payload
2186
2187       nimg.call_ok = self._VerifyNode(node_i, nresult)
2188       self._VerifyNodeNetwork(node_i, nresult)
2189       self._VerifyNodeLVM(node_i, nresult, vg_name)
2190       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2191                             master_files)
2192       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2193                            all_drbd_map)
2194       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2195
2196       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2197       self._UpdateNodeInstances(node_i, nresult, nimg)
2198       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2199       self._UpdateNodeOS(node_i, nresult, nimg)
2200       if not nimg.os_fail:
2201         if refos_img is None:
2202           refos_img = nimg
2203         self._VerifyNodeOS(node_i, nimg, refos_img)
2204
2205     feedback_fn("* Verifying instance status")
2206     for instance in instancelist:
2207       if verbose:
2208         feedback_fn("* Verifying instance %s" % instance)
2209       inst_config = instanceinfo[instance]
2210       self._VerifyInstance(instance, inst_config, node_image)
2211       inst_nodes_offline = []
2212
2213       pnode = inst_config.primary_node
2214       pnode_img = node_image[pnode]
2215       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2216                self.ENODERPC, pnode, "instance %s, connection to"
2217                " primary node failed", instance)
2218
2219       if pnode_img.offline:
2220         inst_nodes_offline.append(pnode)
2221
2222       # If the instance is non-redundant we cannot survive losing its primary
2223       # node, so we are not N+1 compliant. On the other hand we have no disk
2224       # templates with more than one secondary so that situation is not well
2225       # supported either.
2226       # FIXME: does not support file-backed instances
2227       if not inst_config.secondary_nodes:
2228         i_non_redundant.append(instance)
2229       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2230                instance, "instance has multiple secondary nodes: %s",
2231                utils.CommaJoin(inst_config.secondary_nodes),
2232                code=self.ETYPE_WARNING)
2233
2234       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2235         i_non_a_balanced.append(instance)
2236
2237       for snode in inst_config.secondary_nodes:
2238         s_img = node_image[snode]
2239         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2240                  "instance %s, connection to secondary node failed", instance)
2241
2242         if s_img.offline:
2243           inst_nodes_offline.append(snode)
2244
2245       # warn that the instance lives on offline nodes
2246       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2247                "instance lives on offline node(s) %s",
2248                utils.CommaJoin(inst_nodes_offline))
2249       # ... or ghost nodes
2250       for node in inst_config.all_nodes:
2251         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2252                  "instance lives on ghost node %s", node)
2253
2254     feedback_fn("* Verifying orphan volumes")
2255     reserved = utils.FieldSet(*cluster.reserved_lvs)
2256     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2257
2258     feedback_fn("* Verifying orphan instances")
2259     self._VerifyOrphanInstances(instancelist, node_image)
2260
2261     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2262       feedback_fn("* Verifying N+1 Memory redundancy")
2263       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2264
2265     feedback_fn("* Other Notes")
2266     if i_non_redundant:
2267       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2268                   % len(i_non_redundant))
2269
2270     if i_non_a_balanced:
2271       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2272                   % len(i_non_a_balanced))
2273
2274     if n_offline:
2275       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2276
2277     if n_drained:
2278       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2279
2280     return not self.bad
2281
2282   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2283     """Analyze the post-hooks' result
2284
2285     This method analyses the hook result, handles it, and sends some
2286     nicely-formatted feedback back to the user.
2287
2288     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2289         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2290     @param hooks_results: the results of the multi-node hooks rpc call
2291     @param feedback_fn: function used send feedback back to the caller
2292     @param lu_result: previous Exec result
2293     @return: the new Exec result, based on the previous result
2294         and hook results
2295
2296     """
2297     # We only really run POST phase hooks, and are only interested in
2298     # their results
2299     if phase == constants.HOOKS_PHASE_POST:
2300       # Used to change hooks' output to proper indentation
2301       indent_re = re.compile('^', re.M)
2302       feedback_fn("* Hooks Results")
2303       assert hooks_results, "invalid result from hooks"
2304
2305       for node_name in hooks_results:
2306         res = hooks_results[node_name]
2307         msg = res.fail_msg
2308         test = msg and not res.offline
2309         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2310                       "Communication failure in hooks execution: %s", msg)
2311         if res.offline or msg:
2312           # No need to investigate payload if node is offline or gave an error.
2313           # override manually lu_result here as _ErrorIf only
2314           # overrides self.bad
2315           lu_result = 1
2316           continue
2317         for script, hkr, output in res.payload:
2318           test = hkr == constants.HKR_FAIL
2319           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2320                         "Script %s failed, output:", script)
2321           if test:
2322             output = indent_re.sub('      ', output)
2323             feedback_fn("%s" % output)
2324             lu_result = 0
2325
2326       return lu_result
2327
2328
2329 class LUVerifyDisks(NoHooksLU):
2330   """Verifies the cluster disks status.
2331
2332   """
2333   REQ_BGL = False
2334
2335   def ExpandNames(self):
2336     self.needed_locks = {
2337       locking.LEVEL_NODE: locking.ALL_SET,
2338       locking.LEVEL_INSTANCE: locking.ALL_SET,
2339     }
2340     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2341
2342   def Exec(self, feedback_fn):
2343     """Verify integrity of cluster disks.
2344
2345     @rtype: tuple of three items
2346     @return: a tuple of (dict of node-to-node_error, list of instances
2347         which need activate-disks, dict of instance: (node, volume) for
2348         missing volumes
2349
2350     """
2351     result = res_nodes, res_instances, res_missing = {}, [], {}
2352
2353     vg_name = self.cfg.GetVGName()
2354     nodes = utils.NiceSort(self.cfg.GetNodeList())
2355     instances = [self.cfg.GetInstanceInfo(name)
2356                  for name in self.cfg.GetInstanceList()]
2357
2358     nv_dict = {}
2359     for inst in instances:
2360       inst_lvs = {}
2361       if (not inst.admin_up or
2362           inst.disk_template not in constants.DTS_NET_MIRROR):
2363         continue
2364       inst.MapLVsByNode(inst_lvs)
2365       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2366       for node, vol_list in inst_lvs.iteritems():
2367         for vol in vol_list:
2368           nv_dict[(node, vol)] = inst
2369
2370     if not nv_dict:
2371       return result
2372
2373     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2374
2375     for node in nodes:
2376       # node_volume
2377       node_res = node_lvs[node]
2378       if node_res.offline:
2379         continue
2380       msg = node_res.fail_msg
2381       if msg:
2382         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2383         res_nodes[node] = msg
2384         continue
2385
2386       lvs = node_res.payload
2387       for lv_name, (_, _, lv_online) in lvs.items():
2388         inst = nv_dict.pop((node, lv_name), None)
2389         if (not lv_online and inst is not None
2390             and inst.name not in res_instances):
2391           res_instances.append(inst.name)
2392
2393     # any leftover items in nv_dict are missing LVs, let's arrange the
2394     # data better
2395     for key, inst in nv_dict.iteritems():
2396       if inst.name not in res_missing:
2397         res_missing[inst.name] = []
2398       res_missing[inst.name].append(key)
2399
2400     return result
2401
2402
2403 class LURepairDiskSizes(NoHooksLU):
2404   """Verifies the cluster disks sizes.
2405
2406   """
2407   _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2408   REQ_BGL = False
2409
2410   def ExpandNames(self):
2411     if self.op.instances:
2412       self.wanted_names = []
2413       for name in self.op.instances:
2414         full_name = _ExpandInstanceName(self.cfg, name)
2415         self.wanted_names.append(full_name)
2416       self.needed_locks = {
2417         locking.LEVEL_NODE: [],
2418         locking.LEVEL_INSTANCE: self.wanted_names,
2419         }
2420       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2421     else:
2422       self.wanted_names = None
2423       self.needed_locks = {
2424         locking.LEVEL_NODE: locking.ALL_SET,
2425         locking.LEVEL_INSTANCE: locking.ALL_SET,
2426         }
2427     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2428
2429   def DeclareLocks(self, level):
2430     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2431       self._LockInstancesNodes(primary_only=True)
2432
2433   def CheckPrereq(self):
2434     """Check prerequisites.
2435
2436     This only checks the optional instance list against the existing names.
2437
2438     """
2439     if self.wanted_names is None:
2440       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2441
2442     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2443                              in self.wanted_names]
2444
2445   def _EnsureChildSizes(self, disk):
2446     """Ensure children of the disk have the needed disk size.
2447
2448     This is valid mainly for DRBD8 and fixes an issue where the
2449     children have smaller disk size.
2450
2451     @param disk: an L{ganeti.objects.Disk} object
2452
2453     """
2454     if disk.dev_type == constants.LD_DRBD8:
2455       assert disk.children, "Empty children for DRBD8?"
2456       fchild = disk.children[0]
2457       mismatch = fchild.size < disk.size
2458       if mismatch:
2459         self.LogInfo("Child disk has size %d, parent %d, fixing",
2460                      fchild.size, disk.size)
2461         fchild.size = disk.size
2462
2463       # and we recurse on this child only, not on the metadev
2464       return self._EnsureChildSizes(fchild) or mismatch
2465     else:
2466       return False
2467
2468   def Exec(self, feedback_fn):
2469     """Verify the size of cluster disks.
2470
2471     """
2472     # TODO: check child disks too
2473     # TODO: check differences in size between primary/secondary nodes
2474     per_node_disks = {}
2475     for instance in self.wanted_instances:
2476       pnode = instance.primary_node
2477       if pnode not in per_node_disks:
2478         per_node_disks[pnode] = []
2479       for idx, disk in enumerate(instance.disks):
2480         per_node_disks[pnode].append((instance, idx, disk))
2481
2482     changed = []
2483     for node, dskl in per_node_disks.items():
2484       newl = [v[2].Copy() for v in dskl]
2485       for dsk in newl:
2486         self.cfg.SetDiskID(dsk, node)
2487       result = self.rpc.call_blockdev_getsize(node, newl)
2488       if result.fail_msg:
2489         self.LogWarning("Failure in blockdev_getsize call to node"
2490                         " %s, ignoring", node)
2491         continue
2492       if len(result.data) != len(dskl):
2493         self.LogWarning("Invalid result from node %s, ignoring node results",
2494                         node)
2495         continue
2496       for ((instance, idx, disk), size) in zip(dskl, result.data):
2497         if size is None:
2498           self.LogWarning("Disk %d of instance %s did not return size"
2499                           " information, ignoring", idx, instance.name)
2500           continue
2501         if not isinstance(size, (int, long)):
2502           self.LogWarning("Disk %d of instance %s did not return valid"
2503                           " size information, ignoring", idx, instance.name)
2504           continue
2505         size = size >> 20
2506         if size != disk.size:
2507           self.LogInfo("Disk %d of instance %s has mismatched size,"
2508                        " correcting: recorded %d, actual %d", idx,
2509                        instance.name, disk.size, size)
2510           disk.size = size
2511           self.cfg.Update(instance, feedback_fn)
2512           changed.append((instance.name, idx, size))
2513         if self._EnsureChildSizes(disk):
2514           self.cfg.Update(instance, feedback_fn)
2515           changed.append((instance.name, idx, disk.size))
2516     return changed
2517
2518
2519 class LURenameCluster(LogicalUnit):
2520   """Rename the cluster.
2521
2522   """
2523   HPATH = "cluster-rename"
2524   HTYPE = constants.HTYPE_CLUSTER
2525   _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2526
2527   def BuildHooksEnv(self):
2528     """Build hooks env.
2529
2530     """
2531     env = {
2532       "OP_TARGET": self.cfg.GetClusterName(),
2533       "NEW_NAME": self.op.name,
2534       }
2535     mn = self.cfg.GetMasterNode()
2536     all_nodes = self.cfg.GetNodeList()
2537     return env, [mn], all_nodes
2538
2539   def CheckPrereq(self):
2540     """Verify that the passed name is a valid one.
2541
2542     """
2543     hostname = netutils.GetHostInfo(self.op.name)
2544
2545     new_name = hostname.name
2546     self.ip = new_ip = hostname.ip
2547     old_name = self.cfg.GetClusterName()
2548     old_ip = self.cfg.GetMasterIP()
2549     if new_name == old_name and new_ip == old_ip:
2550       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2551                                  " cluster has changed",
2552                                  errors.ECODE_INVAL)
2553     if new_ip != old_ip:
2554       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2555         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2556                                    " reachable on the network. Aborting." %
2557                                    new_ip, errors.ECODE_NOTUNIQUE)
2558
2559     self.op.name = new_name
2560
2561   def Exec(self, feedback_fn):
2562     """Rename the cluster.
2563
2564     """
2565     clustername = self.op.name
2566     ip = self.ip
2567
2568     # shutdown the master IP
2569     master = self.cfg.GetMasterNode()
2570     result = self.rpc.call_node_stop_master(master, False)
2571     result.Raise("Could not disable the master role")
2572
2573     try:
2574       cluster = self.cfg.GetClusterInfo()
2575       cluster.cluster_name = clustername
2576       cluster.master_ip = ip
2577       self.cfg.Update(cluster, feedback_fn)
2578
2579       # update the known hosts file
2580       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2581       node_list = self.cfg.GetNodeList()
2582       try:
2583         node_list.remove(master)
2584       except ValueError:
2585         pass
2586       result = self.rpc.call_upload_file(node_list,
2587                                          constants.SSH_KNOWN_HOSTS_FILE)
2588       for to_node, to_result in result.iteritems():
2589         msg = to_result.fail_msg
2590         if msg:
2591           msg = ("Copy of file %s to node %s failed: %s" %
2592                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2593           self.proc.LogWarning(msg)
2594
2595     finally:
2596       result = self.rpc.call_node_start_master(master, False, False)
2597       msg = result.fail_msg
2598       if msg:
2599         self.LogWarning("Could not re-enable the master role on"
2600                         " the master, please restart manually: %s", msg)
2601
2602     return clustername
2603
2604
2605 class LUSetClusterParams(LogicalUnit):
2606   """Change the parameters of the cluster.
2607
2608   """
2609   HPATH = "cluster-modify"
2610   HTYPE = constants.HTYPE_CLUSTER
2611   _OP_PARAMS = [
2612     ("vg_name", None, _TMaybeString),
2613     ("enabled_hypervisors", None,
2614      _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2615     ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2616     ("beparams", None, _TOr(_TDict, _TNone)),
2617     ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2618     ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2619     ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2620     ("uid_pool", None, _NoType),
2621     ("add_uids", None, _NoType),
2622     ("remove_uids", None, _NoType),
2623     ("maintain_node_health", None, _TMaybeBool),
2624     ("nicparams", None, _TOr(_TDict, _TNone)),
2625     ("drbd_helper", None, _TOr(_TString, _TNone)),
2626     ("default_iallocator", None, _TMaybeString),
2627     ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2628     ("hidden_os", None, _TOr(_TListOf(\
2629           _TAnd(_TList,
2630                 _TIsLength(2),
2631                 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2632           _TNone)),
2633     ("blacklisted_os", None, _TOr(_TListOf(\
2634           _TAnd(_TList,
2635                 _TIsLength(2),
2636                 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2637           _TNone)),
2638     ]
2639   REQ_BGL = False
2640
2641   def CheckArguments(self):
2642     """Check parameters
2643
2644     """
2645     if self.op.uid_pool:
2646       uidpool.CheckUidPool(self.op.uid_pool)
2647
2648     if self.op.add_uids:
2649       uidpool.CheckUidPool(self.op.add_uids)
2650
2651     if self.op.remove_uids:
2652       uidpool.CheckUidPool(self.op.remove_uids)
2653
2654   def ExpandNames(self):
2655     # FIXME: in the future maybe other cluster params won't require checking on
2656     # all nodes to be modified.
2657     self.needed_locks = {
2658       locking.LEVEL_NODE: locking.ALL_SET,
2659     }
2660     self.share_locks[locking.LEVEL_NODE] = 1
2661
2662   def BuildHooksEnv(self):
2663     """Build hooks env.
2664
2665     """
2666     env = {
2667       "OP_TARGET": self.cfg.GetClusterName(),
2668       "NEW_VG_NAME": self.op.vg_name,
2669       }
2670     mn = self.cfg.GetMasterNode()
2671     return env, [mn], [mn]
2672
2673   def CheckPrereq(self):
2674     """Check prerequisites.
2675
2676     This checks whether the given params don't conflict and
2677     if the given volume group is valid.
2678
2679     """
2680     if self.op.vg_name is not None and not self.op.vg_name:
2681       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2682         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2683                                    " instances exist", errors.ECODE_INVAL)
2684
2685     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2686       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2687         raise errors.OpPrereqError("Cannot disable drbd helper while"
2688                                    " drbd-based instances exist",
2689                                    errors.ECODE_INVAL)
2690
2691     node_list = self.acquired_locks[locking.LEVEL_NODE]
2692
2693     # if vg_name not None, checks given volume group on all nodes
2694     if self.op.vg_name:
2695       vglist = self.rpc.call_vg_list(node_list)
2696       for node in node_list:
2697         msg = vglist[node].fail_msg
2698         if msg:
2699           # ignoring down node
2700           self.LogWarning("Error while gathering data on node %s"
2701                           " (ignoring node): %s", node, msg)
2702           continue
2703         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2704                                               self.op.vg_name,
2705                                               constants.MIN_VG_SIZE)
2706         if vgstatus:
2707           raise errors.OpPrereqError("Error on node '%s': %s" %
2708                                      (node, vgstatus), errors.ECODE_ENVIRON)
2709
2710     if self.op.drbd_helper:
2711       # checks given drbd helper on all nodes
2712       helpers = self.rpc.call_drbd_helper(node_list)
2713       for node in node_list:
2714         ninfo = self.cfg.GetNodeInfo(node)
2715         if ninfo.offline:
2716           self.LogInfo("Not checking drbd helper on offline node %s", node)
2717           continue
2718         msg = helpers[node].fail_msg
2719         if msg:
2720           raise errors.OpPrereqError("Error checking drbd helper on node"
2721                                      " '%s': %s" % (node, msg),
2722                                      errors.ECODE_ENVIRON)
2723         node_helper = helpers[node].payload
2724         if node_helper != self.op.drbd_helper:
2725           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2726                                      (node, node_helper), errors.ECODE_ENVIRON)
2727
2728     self.cluster = cluster = self.cfg.GetClusterInfo()
2729     # validate params changes
2730     if self.op.beparams:
2731       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2732       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2733
2734     if self.op.nicparams:
2735       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2736       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2737       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2738       nic_errors = []
2739
2740       # check all instances for consistency
2741       for instance in self.cfg.GetAllInstancesInfo().values():
2742         for nic_idx, nic in enumerate(instance.nics):
2743           params_copy = copy.deepcopy(nic.nicparams)
2744           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2745
2746           # check parameter syntax
2747           try:
2748             objects.NIC.CheckParameterSyntax(params_filled)
2749           except errors.ConfigurationError, err:
2750             nic_errors.append("Instance %s, nic/%d: %s" %
2751                               (instance.name, nic_idx, err))
2752
2753           # if we're moving instances to routed, check that they have an ip
2754           target_mode = params_filled[constants.NIC_MODE]
2755           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2756             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2757                               (instance.name, nic_idx))
2758       if nic_errors:
2759         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2760                                    "\n".join(nic_errors))
2761
2762     # hypervisor list/parameters
2763     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2764     if self.op.hvparams:
2765       for hv_name, hv_dict in self.op.hvparams.items():
2766         if hv_name not in self.new_hvparams:
2767           self.new_hvparams[hv_name] = hv_dict
2768         else:
2769           self.new_hvparams[hv_name].update(hv_dict)
2770
2771     # os hypervisor parameters
2772     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2773     if self.op.os_hvp:
2774       for os_name, hvs in self.op.os_hvp.items():
2775         if os_name not in self.new_os_hvp:
2776           self.new_os_hvp[os_name] = hvs
2777         else:
2778           for hv_name, hv_dict in hvs.items():
2779             if hv_name not in self.new_os_hvp[os_name]:
2780               self.new_os_hvp[os_name][hv_name] = hv_dict
2781             else:
2782               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2783
2784     # os parameters
2785     self.new_osp = objects.FillDict(cluster.osparams, {})
2786     if self.op.osparams:
2787       for os_name, osp in self.op.osparams.items():
2788         if os_name not in self.new_osp:
2789           self.new_osp[os_name] = {}
2790
2791         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2792                                                   use_none=True)
2793
2794         if not self.new_osp[os_name]:
2795           # we removed all parameters
2796           del self.new_osp[os_name]
2797         else:
2798           # check the parameter validity (remote check)
2799           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2800                          os_name, self.new_osp[os_name])
2801
2802     # changes to the hypervisor list
2803     if self.op.enabled_hypervisors is not None:
2804       self.hv_list = self.op.enabled_hypervisors
2805       for hv in self.hv_list:
2806         # if the hypervisor doesn't already exist in the cluster
2807         # hvparams, we initialize it to empty, and then (in both
2808         # cases) we make sure to fill the defaults, as we might not
2809         # have a complete defaults list if the hypervisor wasn't
2810         # enabled before
2811         if hv not in new_hvp:
2812           new_hvp[hv] = {}
2813         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2814         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2815     else:
2816       self.hv_list = cluster.enabled_hypervisors
2817
2818     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2819       # either the enabled list has changed, or the parameters have, validate
2820       for hv_name, hv_params in self.new_hvparams.items():
2821         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2822             (self.op.enabled_hypervisors and
2823              hv_name in self.op.enabled_hypervisors)):
2824           # either this is a new hypervisor, or its parameters have changed
2825           hv_class = hypervisor.GetHypervisor(hv_name)
2826           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2827           hv_class.CheckParameterSyntax(hv_params)
2828           _CheckHVParams(self, node_list, hv_name, hv_params)
2829
2830     if self.op.os_hvp:
2831       # no need to check any newly-enabled hypervisors, since the
2832       # defaults have already been checked in the above code-block
2833       for os_name, os_hvp in self.new_os_hvp.items():
2834         for hv_name, hv_params in os_hvp.items():
2835           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2836           # we need to fill in the new os_hvp on top of the actual hv_p
2837           cluster_defaults = self.new_hvparams.get(hv_name, {})
2838           new_osp = objects.FillDict(cluster_defaults, hv_params)
2839           hv_class = hypervisor.GetHypervisor(hv_name)
2840           hv_class.CheckParameterSyntax(new_osp)
2841           _CheckHVParams(self, node_list, hv_name, new_osp)
2842
2843     if self.op.default_iallocator:
2844       alloc_script = utils.FindFile(self.op.default_iallocator,
2845                                     constants.IALLOCATOR_SEARCH_PATH,
2846                                     os.path.isfile)
2847       if alloc_script is None:
2848         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2849                                    " specified" % self.op.default_iallocator,
2850                                    errors.ECODE_INVAL)
2851
2852   def Exec(self, feedback_fn):
2853     """Change the parameters of the cluster.
2854
2855     """
2856     if self.op.vg_name is not None:
2857       new_volume = self.op.vg_name
2858       if not new_volume:
2859         new_volume = None
2860       if new_volume != self.cfg.GetVGName():
2861         self.cfg.SetVGName(new_volume)
2862       else:
2863         feedback_fn("Cluster LVM configuration already in desired"
2864                     " state, not changing")
2865     if self.op.drbd_helper is not None:
2866       new_helper = self.op.drbd_helper
2867       if not new_helper:
2868         new_helper = None
2869       if new_helper != self.cfg.GetDRBDHelper():
2870         self.cfg.SetDRBDHelper(new_helper)
2871       else:
2872         feedback_fn("Cluster DRBD helper already in desired state,"
2873                     " not changing")
2874     if self.op.hvparams:
2875       self.cluster.hvparams = self.new_hvparams
2876     if self.op.os_hvp:
2877       self.cluster.os_hvp = self.new_os_hvp
2878     if self.op.enabled_hypervisors is not None:
2879       self.cluster.hvparams = self.new_hvparams
2880       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2881     if self.op.beparams:
2882       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2883     if self.op.nicparams:
2884       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2885     if self.op.osparams:
2886       self.cluster.osparams = self.new_osp
2887
2888     if self.op.candidate_pool_size is not None:
2889       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2890       # we need to update the pool size here, otherwise the save will fail
2891       _AdjustCandidatePool(self, [])
2892
2893     if self.op.maintain_node_health is not None:
2894       self.cluster.maintain_node_health = self.op.maintain_node_health
2895
2896     if self.op.add_uids is not None:
2897       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2898
2899     if self.op.remove_uids is not None:
2900       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2901
2902     if self.op.uid_pool is not None:
2903       self.cluster.uid_pool = self.op.uid_pool
2904
2905     if self.op.default_iallocator is not None:
2906       self.cluster.default_iallocator = self.op.default_iallocator
2907
2908     if self.op.reserved_lvs is not None:
2909       self.cluster.reserved_lvs = self.op.reserved_lvs
2910
2911     def helper_os(aname, mods, desc):
2912       desc += " OS list"
2913       lst = getattr(self.cluster, aname)
2914       for key, val in mods:
2915         if key == constants.DDM_ADD:
2916           if val in lst:
2917             feedback_fn("OS %s already in %s, ignoring", val, desc)
2918           else:
2919             lst.append(val)
2920         elif key == constants.DDM_REMOVE:
2921           if val in lst:
2922             lst.remove(val)
2923           else:
2924             feedback_fn("OS %s not found in %s, ignoring", val, desc)
2925         else:
2926           raise errors.ProgrammerError("Invalid modification '%s'" % key)
2927
2928     if self.op.hidden_os:
2929       helper_os("hidden_os", self.op.hidden_os, "hidden")
2930
2931     if self.op.blacklisted_os:
2932       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2933
2934     self.cfg.Update(self.cluster, feedback_fn)
2935
2936
2937 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2938   """Distribute additional files which are part of the cluster configuration.
2939
2940   ConfigWriter takes care of distributing the config and ssconf files, but
2941   there are more files which should be distributed to all nodes. This function
2942   makes sure those are copied.
2943
2944   @param lu: calling logical unit
2945   @param additional_nodes: list of nodes not in the config to distribute to
2946
2947   """
2948   # 1. Gather target nodes
2949   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2950   dist_nodes = lu.cfg.GetOnlineNodeList()
2951   if additional_nodes is not None:
2952     dist_nodes.extend(additional_nodes)
2953   if myself.name in dist_nodes:
2954     dist_nodes.remove(myself.name)
2955
2956   # 2. Gather files to distribute
2957   dist_files = set([constants.ETC_HOSTS,
2958                     constants.SSH_KNOWN_HOSTS_FILE,
2959                     constants.RAPI_CERT_FILE,
2960                     constants.RAPI_USERS_FILE,
2961                     constants.CONFD_HMAC_KEY,
2962                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2963                    ])
2964
2965   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2966   for hv_name in enabled_hypervisors:
2967     hv_class = hypervisor.GetHypervisor(hv_name)
2968     dist_files.update(hv_class.GetAncillaryFiles())
2969
2970   # 3. Perform the files upload
2971   for fname in dist_files:
2972     if os.path.exists(fname):
2973       result = lu.rpc.call_upload_file(dist_nodes, fname)
2974       for to_node, to_result in result.items():
2975         msg = to_result.fail_msg
2976         if msg:
2977           msg = ("Copy of file %s to node %s failed: %s" %
2978                  (fname, to_node, msg))
2979           lu.proc.LogWarning(msg)
2980
2981
2982 class LURedistributeConfig(NoHooksLU):
2983   """Force the redistribution of cluster configuration.
2984
2985   This is a very simple LU.
2986
2987   """
2988   REQ_BGL = False
2989
2990   def ExpandNames(self):
2991     self.needed_locks = {
2992       locking.LEVEL_NODE: locking.ALL_SET,
2993     }
2994     self.share_locks[locking.LEVEL_NODE] = 1
2995
2996   def Exec(self, feedback_fn):
2997     """Redistribute the configuration.
2998
2999     """
3000     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3001     _RedistributeAncillaryFiles(self)
3002
3003
3004 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3005   """Sleep and poll for an instance's disk to sync.
3006
3007   """
3008   if not instance.disks or disks is not None and not disks:
3009     return True
3010
3011   disks = _ExpandCheckDisks(instance, disks)
3012
3013   if not oneshot:
3014     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3015
3016   node = instance.primary_node
3017
3018   for dev in disks:
3019     lu.cfg.SetDiskID(dev, node)
3020
3021   # TODO: Convert to utils.Retry
3022
3023   retries = 0
3024   degr_retries = 10 # in seconds, as we sleep 1 second each time
3025   while True:
3026     max_time = 0
3027     done = True
3028     cumul_degraded = False
3029     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3030     msg = rstats.fail_msg
3031     if msg:
3032       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3033       retries += 1
3034       if retries >= 10:
3035         raise errors.RemoteError("Can't contact node %s for mirror data,"
3036                                  " aborting." % node)
3037       time.sleep(6)
3038       continue
3039     rstats = rstats.payload
3040     retries = 0
3041     for i, mstat in enumerate(rstats):
3042       if mstat is None:
3043         lu.LogWarning("Can't compute data for node %s/%s",
3044                            node, disks[i].iv_name)
3045         continue
3046
3047       cumul_degraded = (cumul_degraded or
3048                         (mstat.is_degraded and mstat.sync_percent is None))
3049       if mstat.sync_percent is not None:
3050         done = False
3051         if mstat.estimated_time is not None:
3052           rem_time = ("%s remaining (estimated)" %
3053                       utils.FormatSeconds(mstat.estimated_time))
3054           max_time = mstat.estimated_time
3055         else:
3056           rem_time = "no time estimate"
3057         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3058                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3059
3060     # if we're done but degraded, let's do a few small retries, to
3061     # make sure we see a stable and not transient situation; therefore
3062     # we force restart of the loop
3063     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3064       logging.info("Degraded disks found, %d retries left", degr_retries)
3065       degr_retries -= 1
3066       time.sleep(1)
3067       continue
3068
3069     if done or oneshot:
3070       break
3071
3072     time.sleep(min(60, max_time))
3073
3074   if done:
3075     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3076   return not cumul_degraded
3077
3078
3079 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3080   """Check that mirrors are not degraded.
3081
3082   The ldisk parameter, if True, will change the test from the
3083   is_degraded attribute (which represents overall non-ok status for
3084   the device(s)) to the ldisk (representing the local storage status).
3085
3086   """
3087   lu.cfg.SetDiskID(dev, node)
3088
3089   result = True
3090
3091   if on_primary or dev.AssembleOnSecondary():
3092     rstats = lu.rpc.call_blockdev_find(node, dev)
3093     msg = rstats.fail_msg
3094     if msg:
3095       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3096       result = False
3097     elif not rstats.payload:
3098       lu.LogWarning("Can't find disk on node %s", node)
3099       result = False
3100     else:
3101       if ldisk:
3102         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3103       else:
3104         result = result and not rstats.payload.is_degraded
3105
3106   if dev.children:
3107     for child in dev.children:
3108       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3109
3110   return result
3111
3112
3113 class LUDiagnoseOS(NoHooksLU):
3114   """Logical unit for OS diagnose/query.
3115
3116   """
3117   _OP_PARAMS = [
3118     _POutputFields,
3119     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3120     ]
3121   REQ_BGL = False
3122   _HID = "hidden"
3123   _BLK = "blacklisted"
3124   _VLD = "valid"
3125   _FIELDS_STATIC = utils.FieldSet()
3126   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3127                                    "parameters", "api_versions", _HID, _BLK)
3128
3129   def CheckArguments(self):
3130     if self.op.names:
3131       raise errors.OpPrereqError("Selective OS query not supported",
3132                                  errors.ECODE_INVAL)
3133
3134     _CheckOutputFields(static=self._FIELDS_STATIC,
3135                        dynamic=self._FIELDS_DYNAMIC,
3136                        selected=self.op.output_fields)
3137
3138   def ExpandNames(self):
3139     # Lock all nodes, in shared mode
3140     # Temporary removal of locks, should be reverted later
3141     # TODO: reintroduce locks when they are lighter-weight
3142     self.needed_locks = {}
3143     #self.share_locks[locking.LEVEL_NODE] = 1
3144     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3145
3146   @staticmethod
3147   def _DiagnoseByOS(rlist):
3148     """Remaps a per-node return list into an a per-os per-node dictionary
3149
3150     @param rlist: a map with node names as keys and OS objects as values
3151
3152     @rtype: dict
3153     @return: a dictionary with osnames as keys and as value another
3154         map, with nodes as keys and tuples of (path, status, diagnose,
3155         variants, parameters, api_versions) as values, eg::
3156
3157           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3158                                      (/srv/..., False, "invalid api")],
3159                            "node2": [(/srv/..., True, "", [], [])]}
3160           }
3161
3162     """
3163     all_os = {}
3164     # we build here the list of nodes that didn't fail the RPC (at RPC
3165     # level), so that nodes with a non-responding node daemon don't
3166     # make all OSes invalid
3167     good_nodes = [node_name for node_name in rlist
3168                   if not rlist[node_name].fail_msg]
3169     for node_name, nr in rlist.items():
3170       if nr.fail_msg or not nr.payload:
3171         continue
3172       for (name, path, status, diagnose, variants,
3173            params, api_versions) in nr.payload:
3174         if name not in all_os:
3175           # build a list of nodes for this os containing empty lists
3176           # for each node in node_list
3177           all_os[name] = {}
3178           for nname in good_nodes:
3179             all_os[name][nname] = []
3180         # convert params from [name, help] to (name, help)
3181         params = [tuple(v) for v in params]
3182         all_os[name][node_name].append((path, status, diagnose,
3183                                         variants, params, api_versions))
3184     return all_os
3185
3186   def Exec(self, feedback_fn):
3187     """Compute the list of OSes.
3188
3189     """
3190     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3191     node_data = self.rpc.call_os_diagnose(valid_nodes)
3192     pol = self._DiagnoseByOS(node_data)
3193     output = []
3194     cluster = self.cfg.GetClusterInfo()
3195
3196     for os_name in utils.NiceSort(pol.keys()):
3197       os_data = pol[os_name]
3198       row = []
3199       valid = True
3200       (variants, params, api_versions) = null_state = (set(), set(), set())
3201       for idx, osl in enumerate(os_data.values()):
3202         valid = bool(valid and osl and osl[0][1])
3203         if not valid:
3204           (variants, params, api_versions) = null_state
3205           break
3206         node_variants, node_params, node_api = osl[0][3:6]
3207         if idx == 0: # first entry
3208           variants = set(node_variants)
3209           params = set(node_params)
3210           api_versions = set(node_api)
3211         else: # keep consistency
3212           variants.intersection_update(node_variants)
3213           params.intersection_update(node_params)
3214           api_versions.intersection_update(node_api)
3215
3216       is_hid = os_name in cluster.hidden_os
3217       is_blk = os_name in cluster.blacklisted_os
3218       if ((self._HID not in self.op.output_fields and is_hid) or
3219           (self._BLK not in self.op.output_fields and is_blk) or
3220           (self._VLD not in self.op.output_fields and not valid)):
3221         continue
3222
3223       for field in self.op.output_fields:
3224         if field == "name":
3225           val = os_name
3226         elif field == self._VLD:
3227           val = valid
3228         elif field == "node_status":
3229           # this is just a copy of the dict
3230           val = {}
3231           for node_name, nos_list in os_data.items():
3232             val[node_name] = nos_list
3233         elif field == "variants":
3234           val = utils.NiceSort(list(variants))
3235         elif field == "parameters":
3236           val = list(params)
3237         elif field == "api_versions":
3238           val = list(api_versions)
3239         elif field == self._HID:
3240           val = is_hid
3241         elif field == self._BLK:
3242           val = is_blk
3243         else:
3244           raise errors.ParameterError(field)
3245         row.append(val)
3246       output.append(row)
3247
3248     return output
3249
3250
3251 class LURemoveNode(LogicalUnit):
3252   """Logical unit for removing a node.
3253
3254   """
3255   HPATH = "node-remove"
3256   HTYPE = constants.HTYPE_NODE
3257   _OP_PARAMS = [
3258     _PNodeName,
3259     ]
3260
3261   def BuildHooksEnv(self):
3262     """Build hooks env.
3263
3264     This doesn't run on the target node in the pre phase as a failed
3265     node would then be impossible to remove.
3266
3267     """
3268     env = {
3269       "OP_TARGET": self.op.node_name,
3270       "NODE_NAME": self.op.node_name,
3271       }
3272     all_nodes = self.cfg.GetNodeList()
3273     try:
3274       all_nodes.remove(self.op.node_name)
3275     except ValueError:
3276       logging.warning("Node %s which is about to be removed not found"
3277                       " in the all nodes list", self.op.node_name)
3278     return env, all_nodes, all_nodes
3279
3280   def CheckPrereq(self):
3281     """Check prerequisites.
3282
3283     This checks:
3284      - the node exists in the configuration
3285      - it does not have primary or secondary instances
3286      - it's not the master
3287
3288     Any errors are signaled by raising errors.OpPrereqError.
3289
3290     """
3291     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3292     node = self.cfg.GetNodeInfo(self.op.node_name)
3293     assert node is not None
3294
3295     instance_list = self.cfg.GetInstanceList()
3296
3297     masternode = self.cfg.GetMasterNode()
3298     if node.name == masternode:
3299       raise errors.OpPrereqError("Node is the master node,"
3300                                  " you need to failover first.",
3301                                  errors.ECODE_INVAL)
3302
3303     for instance_name in instance_list:
3304       instance = self.cfg.GetInstanceInfo(instance_name)
3305       if node.name in instance.all_nodes:
3306         raise errors.OpPrereqError("Instance %s is still running on the node,"
3307                                    " please remove first." % instance_name,
3308                                    errors.ECODE_INVAL)
3309     self.op.node_name = node.name
3310     self.node = node
3311
3312   def Exec(self, feedback_fn):
3313     """Removes the node from the cluster.
3314
3315     """
3316     node = self.node
3317     logging.info("Stopping the node daemon and removing configs from node %s",
3318                  node.name)
3319
3320     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3321
3322     # Promote nodes to master candidate as needed
3323     _AdjustCandidatePool(self, exceptions=[node.name])
3324     self.context.RemoveNode(node.name)
3325
3326     # Run post hooks on the node before it's removed
3327     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3328     try:
3329       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3330     except:
3331       # pylint: disable-msg=W0702
3332       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3333
3334     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3335     msg = result.fail_msg
3336     if msg:
3337       self.LogWarning("Errors encountered on the remote node while leaving"
3338                       " the cluster: %s", msg)
3339
3340     # Remove node from our /etc/hosts
3341     if self.cfg.GetClusterInfo().modify_etc_hosts:
3342       # FIXME: this should be done via an rpc call to node daemon
3343       utils.RemoveHostFromEtcHosts(node.name)
3344       _RedistributeAncillaryFiles(self)
3345
3346
3347 class LUQueryNodes(NoHooksLU):
3348   """Logical unit for querying nodes.
3349
3350   """
3351   # pylint: disable-msg=W0142
3352   _OP_PARAMS = [
3353     _POutputFields,
3354     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3355     ("use_locking", False, _TBool),
3356     ]
3357   REQ_BGL = False
3358
3359   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3360                     "master_candidate", "offline", "drained"]
3361
3362   _FIELDS_DYNAMIC = utils.FieldSet(
3363     "dtotal", "dfree",
3364     "mtotal", "mnode", "mfree",
3365     "bootid",
3366     "ctotal", "cnodes", "csockets",
3367     )
3368
3369   _FIELDS_STATIC = utils.FieldSet(*[
3370     "pinst_cnt", "sinst_cnt",
3371     "pinst_list", "sinst_list",
3372     "pip", "sip", "tags",
3373     "master",
3374     "role"] + _SIMPLE_FIELDS
3375     )
3376
3377   def CheckArguments(self):
3378     _CheckOutputFields(static=self._FIELDS_STATIC,
3379                        dynamic=self._FIELDS_DYNAMIC,
3380                        selected=self.op.output_fields)
3381
3382   def ExpandNames(self):
3383     self.needed_locks = {}
3384     self.share_locks[locking.LEVEL_NODE] = 1
3385
3386     if self.op.names:
3387       self.wanted = _GetWantedNodes(self, self.op.names)
3388     else:
3389       self.wanted = locking.ALL_SET
3390
3391     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3392     self.do_locking = self.do_node_query and self.op.use_locking
3393     if self.do_locking:
3394       # if we don't request only static fields, we need to lock the nodes
3395       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3396
3397   def Exec(self, feedback_fn):
3398     """Computes the list of nodes and their attributes.
3399
3400     """
3401     all_info = self.cfg.GetAllNodesInfo()
3402     if self.do_locking:
3403       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3404     elif self.wanted != locking.ALL_SET:
3405       nodenames = self.wanted
3406       missing = set(nodenames).difference(all_info.keys())
3407       if missing:
3408         raise errors.OpExecError(
3409           "Some nodes were removed before retrieving their data: %s" % missing)
3410     else:
3411       nodenames = all_info.keys()
3412
3413     nodenames = utils.NiceSort(nodenames)
3414     nodelist = [all_info[name] for name in nodenames]
3415
3416     # begin data gathering
3417
3418     if self.do_node_query:
3419       live_data = {}
3420       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3421                                           self.cfg.GetHypervisorType())
3422       for name in nodenames:
3423         nodeinfo = node_data[name]
3424         if not nodeinfo.fail_msg and nodeinfo.payload:
3425           nodeinfo = nodeinfo.payload
3426           fn = utils.TryConvert
3427           live_data[name] = {
3428             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3429             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3430             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3431             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3432             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3433             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3434             "bootid": nodeinfo.get('bootid', None),
3435             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3436             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3437             }
3438         else:
3439           live_data[name] = {}
3440     else:
3441       live_data = dict.fromkeys(nodenames, {})
3442
3443     node_to_primary = dict([(name, set()) for name in nodenames])
3444     node_to_secondary = dict([(name, set()) for name in nodenames])
3445
3446     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3447                              "sinst_cnt", "sinst_list"))
3448     if inst_fields & frozenset(self.op.output_fields):
3449       inst_data = self.cfg.GetAllInstancesInfo()
3450
3451       for inst in inst_data.values():
3452         if inst.primary_node in node_to_primary:
3453           node_to_primary[inst.primary_node].add(inst.name)
3454         for secnode in inst.secondary_nodes:
3455           if secnode in node_to_secondary:
3456             node_to_secondary[secnode].add(inst.name)
3457
3458     master_node = self.cfg.GetMasterNode()
3459
3460     # end data gathering
3461
3462     output = []
3463     for node in nodelist:
3464       node_output = []
3465       for field in self.op.output_fields:
3466         if field in self._SIMPLE_FIELDS:
3467           val = getattr(node, field)
3468         elif field == "pinst_list":
3469           val = list(node_to_primary[node.name])
3470         elif field == "sinst_list":
3471           val = list(node_to_secondary[node.name])
3472         elif field == "pinst_cnt":
3473           val = len(node_to_primary[node.name])
3474         elif field == "sinst_cnt":
3475           val = len(node_to_secondary[node.name])
3476         elif field == "pip":
3477           val = node.primary_ip
3478         elif field == "sip":
3479           val = node.secondary_ip
3480         elif field == "tags":
3481           val = list(node.GetTags())
3482         elif field == "master":
3483           val = node.name == master_node
3484         elif self._FIELDS_DYNAMIC.Matches(field):
3485           val = live_data[node.name].get(field, None)
3486         elif field == "role":
3487           if node.name == master_node:
3488             val = "M"
3489           elif node.master_candidate:
3490             val = "C"
3491           elif node.drained:
3492             val = "D"
3493           elif node.offline:
3494             val = "O"
3495           else:
3496             val = "R"
3497         else:
3498           raise errors.ParameterError(field)
3499         node_output.append(val)
3500       output.append(node_output)
3501
3502     return output
3503
3504
3505 class LUQueryNodeVolumes(NoHooksLU):
3506   """Logical unit for getting volumes on node(s).
3507
3508   """
3509   _OP_PARAMS = [
3510     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3511     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3512     ]
3513   REQ_BGL = False
3514   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3515   _FIELDS_STATIC = utils.FieldSet("node")
3516
3517   def CheckArguments(self):
3518     _CheckOutputFields(static=self._FIELDS_STATIC,
3519                        dynamic=self._FIELDS_DYNAMIC,
3520                        selected=self.op.output_fields)
3521
3522   def ExpandNames(self):
3523     self.needed_locks = {}
3524     self.share_locks[locking.LEVEL_NODE] = 1
3525     if not self.op.nodes:
3526       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3527     else:
3528       self.needed_locks[locking.LEVEL_NODE] = \
3529         _GetWantedNodes(self, self.op.nodes)
3530
3531   def Exec(self, feedback_fn):
3532     """Computes the list of nodes and their attributes.
3533
3534     """
3535     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3536     volumes = self.rpc.call_node_volumes(nodenames)
3537
3538     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3539              in self.cfg.GetInstanceList()]
3540
3541     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3542
3543     output = []
3544     for node in nodenames:
3545       nresult = volumes[node]
3546       if nresult.offline:
3547         continue
3548       msg = nresult.fail_msg
3549       if msg:
3550         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3551         continue
3552
3553       node_vols = nresult.payload[:]
3554       node_vols.sort(key=lambda vol: vol['dev'])
3555
3556       for vol in node_vols:
3557         node_output = []
3558         for field in self.op.output_fields:
3559           if field == "node":
3560             val = node
3561           elif field == "phys":
3562             val = vol['dev']
3563           elif field == "vg":
3564             val = vol['vg']
3565           elif field == "name":
3566             val = vol['name']
3567           elif field == "size":
3568             val = int(float(vol['size']))
3569           elif field == "instance":
3570             for inst in ilist:
3571               if node not in lv_by_node[inst]:
3572                 continue
3573               if vol['name'] in lv_by_node[inst][node]:
3574                 val = inst.name
3575                 break
3576             else:
3577               val = '-'
3578           else:
3579             raise errors.ParameterError(field)
3580           node_output.append(str(val))
3581
3582         output.append(node_output)
3583
3584     return output
3585
3586
3587 class LUQueryNodeStorage(NoHooksLU):
3588   """Logical unit for getting information on storage units on node(s).
3589
3590   """
3591   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3592   _OP_PARAMS = [
3593     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3594     ("storage_type", _NoDefault, _CheckStorageType),
3595     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3596     ("name", None, _TMaybeString),
3597     ]
3598   REQ_BGL = False
3599
3600   def CheckArguments(self):
3601     _CheckOutputFields(static=self._FIELDS_STATIC,
3602                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3603                        selected=self.op.output_fields)
3604
3605   def ExpandNames(self):
3606     self.needed_locks = {}
3607     self.share_locks[locking.LEVEL_NODE] = 1
3608
3609     if self.op.nodes:
3610       self.needed_locks[locking.LEVEL_NODE] = \
3611         _GetWantedNodes(self, self.op.nodes)
3612     else:
3613       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3614
3615   def Exec(self, feedback_fn):
3616     """Computes the list of nodes and their attributes.
3617
3618     """
3619     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3620
3621     # Always get name to sort by
3622     if constants.SF_NAME in self.op.output_fields:
3623       fields = self.op.output_fields[:]
3624     else:
3625       fields = [constants.SF_NAME] + self.op.output_fields
3626
3627     # Never ask for node or type as it's only known to the LU
3628     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3629       while extra in fields:
3630         fields.remove(extra)
3631
3632     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3633     name_idx = field_idx[constants.SF_NAME]
3634
3635     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3636     data = self.rpc.call_storage_list(self.nodes,
3637                                       self.op.storage_type, st_args,
3638                                       self.op.name, fields)
3639
3640     result = []
3641
3642     for node in utils.NiceSort(self.nodes):
3643       nresult = data[node]
3644       if nresult.offline:
3645         continue
3646
3647       msg = nresult.fail_msg
3648       if msg:
3649         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3650         continue
3651
3652       rows = dict([(row[name_idx], row) for row in nresult.payload])
3653
3654       for name in utils.NiceSort(rows.keys()):
3655         row = rows[name]
3656
3657         out = []
3658
3659         for field in self.op.output_fields:
3660           if field == constants.SF_NODE:
3661             val = node
3662           elif field == constants.SF_TYPE:
3663             val = self.op.storage_type
3664           elif field in field_idx:
3665             val = row[field_idx[field]]
3666           else:
3667             raise errors.ParameterError(field)
3668
3669           out.append(val)
3670
3671         result.append(out)
3672
3673     return result
3674
3675
3676 class LUModifyNodeStorage(NoHooksLU):
3677   """Logical unit for modifying a storage volume on a node.
3678
3679   """
3680   _OP_PARAMS = [
3681     _PNodeName,
3682     ("storage_type", _NoDefault, _CheckStorageType),
3683     ("name", _NoDefault, _TNonEmptyString),
3684     ("changes", _NoDefault, _TDict),
3685     ]
3686   REQ_BGL = False
3687
3688   def CheckArguments(self):
3689     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3690
3691     storage_type = self.op.storage_type
3692
3693     try:
3694       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3695     except KeyError:
3696       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3697                                  " modified" % storage_type,
3698                                  errors.ECODE_INVAL)
3699
3700     diff = set(self.op.changes.keys()) - modifiable
3701     if diff:
3702       raise errors.OpPrereqError("The following fields can not be modified for"
3703                                  " storage units of type '%s': %r" %
3704                                  (storage_type, list(diff)),
3705                                  errors.ECODE_INVAL)
3706
3707   def ExpandNames(self):
3708     self.needed_locks = {
3709       locking.LEVEL_NODE: self.op.node_name,
3710       }
3711
3712   def Exec(self, feedback_fn):
3713     """Computes the list of nodes and their attributes.
3714
3715     """
3716     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3717     result = self.rpc.call_storage_modify(self.op.node_name,
3718                                           self.op.storage_type, st_args,
3719                                           self.op.name, self.op.changes)
3720     result.Raise("Failed to modify storage unit '%s' on %s" %
3721                  (self.op.name, self.op.node_name))
3722
3723
3724 class LUAddNode(LogicalUnit):
3725   """Logical unit for adding node to the cluster.
3726
3727   """
3728   HPATH = "node-add"
3729   HTYPE = constants.HTYPE_NODE
3730   _OP_PARAMS = [
3731     _PNodeName,
3732     ("primary_ip", None, _NoType),
3733     ("secondary_ip", None, _TMaybeString),
3734     ("readd", False, _TBool),
3735     ]
3736
3737   def CheckArguments(self):
3738     # validate/normalize the node name
3739     self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3740
3741   def BuildHooksEnv(self):
3742     """Build hooks env.
3743
3744     This will run on all nodes before, and on all nodes + the new node after.
3745
3746     """
3747     env = {
3748       "OP_TARGET": self.op.node_name,
3749       "NODE_NAME": self.op.node_name,
3750       "NODE_PIP": self.op.primary_ip,
3751       "NODE_SIP": self.op.secondary_ip,
3752       }
3753     nodes_0 = self.cfg.GetNodeList()
3754     nodes_1 = nodes_0 + [self.op.node_name, ]
3755     return env, nodes_0, nodes_1
3756
3757   def CheckPrereq(self):
3758     """Check prerequisites.
3759
3760     This checks:
3761      - the new node is not already in the config
3762      - it is resolvable
3763      - its parameters (single/dual homed) matches the cluster
3764
3765     Any errors are signaled by raising errors.OpPrereqError.
3766
3767     """
3768     node_name = self.op.node_name
3769     cfg = self.cfg
3770
3771     dns_data = netutils.GetHostInfo(node_name)
3772
3773     node = dns_data.name
3774     primary_ip = self.op.primary_ip = dns_data.ip
3775     if self.op.secondary_ip is None:
3776       self.op.secondary_ip = primary_ip
3777     if not netutils.IsValidIP4(self.op.secondary_ip):
3778       raise errors.OpPrereqError("Invalid secondary IP given",
3779                                  errors.ECODE_INVAL)
3780     secondary_ip = self.op.secondary_ip
3781
3782     node_list = cfg.GetNodeList()
3783     if not self.op.readd and node in node_list:
3784       raise errors.OpPrereqError("Node %s is already in the configuration" %
3785                                  node, errors.ECODE_EXISTS)
3786     elif self.op.readd and node not in node_list:
3787       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3788                                  errors.ECODE_NOENT)
3789
3790     self.changed_primary_ip = False
3791
3792     for existing_node_name in node_list:
3793       existing_node = cfg.GetNodeInfo(existing_node_name)
3794
3795       if self.op.readd and node == existing_node_name:
3796         if existing_node.secondary_ip != secondary_ip:
3797           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3798                                      " address configuration as before",
3799                                      errors.ECODE_INVAL)
3800         if existing_node.primary_ip != primary_ip:
3801           self.changed_primary_ip = True
3802
3803         continue
3804
3805       if (existing_node.primary_ip == primary_ip or
3806           existing_node.secondary_ip == primary_ip or
3807           existing_node.primary_ip == secondary_ip or
3808           existing_node.secondary_ip == secondary_ip):
3809         raise errors.OpPrereqError("New node ip address(es) conflict with"
3810                                    " existing node %s" % existing_node.name,
3811                                    errors.ECODE_NOTUNIQUE)
3812
3813     # check that the type of the node (single versus dual homed) is the
3814     # same as for the master
3815     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3816     master_singlehomed = myself.secondary_ip == myself.primary_ip
3817     newbie_singlehomed = secondary_ip == primary_ip
3818     if master_singlehomed != newbie_singlehomed:
3819       if master_singlehomed:
3820         raise errors.OpPrereqError("The master has no private ip but the"
3821                                    " new node has one",
3822                                    errors.ECODE_INVAL)
3823       else:
3824         raise errors.OpPrereqError("The master has a private ip but the"
3825                                    " new node doesn't have one",
3826                                    errors.ECODE_INVAL)
3827
3828     # checks reachability
3829     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3830       raise errors.OpPrereqError("Node not reachable by ping",
3831                                  errors.ECODE_ENVIRON)
3832
3833     if not newbie_singlehomed:
3834       # check reachability from my secondary ip to newbie's secondary ip
3835       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3836                            source=myself.secondary_ip):
3837         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3838                                    " based ping to noded port",
3839                                    errors.ECODE_ENVIRON)
3840
3841     if self.op.readd:
3842       exceptions = [node]
3843     else:
3844       exceptions = []
3845
3846     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3847
3848     if self.op.readd:
3849       self.new_node = self.cfg.GetNodeInfo(node)
3850       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3851     else:
3852       self.new_node = objects.Node(name=node,
3853                                    primary_ip=primary_ip,
3854                                    secondary_ip=secondary_ip,
3855                                    master_candidate=self.master_candidate,
3856                                    offline=False, drained=False)
3857
3858   def Exec(self, feedback_fn):
3859     """Adds the new node to the cluster.
3860
3861     """
3862     new_node = self.new_node
3863     node = new_node.name
3864
3865     # for re-adds, reset the offline/drained/master-candidate flags;
3866     # we need to reset here, otherwise offline would prevent RPC calls
3867     # later in the procedure; this also means that if the re-add
3868     # fails, we are left with a non-offlined, broken node
3869     if self.op.readd:
3870       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3871       self.LogInfo("Readding a node, the offline/drained flags were reset")
3872       # if we demote the node, we do cleanup later in the procedure
3873       new_node.master_candidate = self.master_candidate
3874       if self.changed_primary_ip:
3875         new_node.primary_ip = self.op.primary_ip
3876
3877     # notify the user about any possible mc promotion
3878     if new_node.master_candidate:
3879       self.LogInfo("Node will be a master candidate")
3880
3881     # check connectivity
3882     result = self.rpc.call_version([node])[node]
3883     result.Raise("Can't get version information from node %s" % node)
3884     if constants.PROTOCOL_VERSION == result.payload:
3885       logging.info("Communication to node %s fine, sw version %s match",
3886                    node, result.payload)
3887     else:
3888       raise errors.OpExecError("Version mismatch master version %s,"
3889                                " node version %s" %
3890                                (constants.PROTOCOL_VERSION, result.payload))
3891
3892     # setup ssh on node
3893     if self.cfg.GetClusterInfo().modify_ssh_setup:
3894       logging.info("Copy ssh key to node %s", node)
3895       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3896       keyarray = []
3897       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3898                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3899                   priv_key, pub_key]
3900
3901       for i in keyfiles:
3902         keyarray.append(utils.ReadFile(i))
3903
3904       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3905                                       keyarray[2], keyarray[3], keyarray[4],
3906                                       keyarray[5])
3907       result.Raise("Cannot transfer ssh keys to the new node")
3908
3909     # Add node to our /etc/hosts, and add key to known_hosts
3910     if self.cfg.GetClusterInfo().modify_etc_hosts:
3911       # FIXME: this should be done via an rpc call to node daemon
3912       utils.AddHostToEtcHosts(new_node.name)
3913
3914     if new_node.secondary_ip != new_node.primary_ip:
3915       result = self.rpc.call_node_has_ip_address(new_node.name,
3916                                                  new_node.secondary_ip)
3917       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3918                    prereq=True, ecode=errors.ECODE_ENVIRON)
3919       if not result.payload:
3920         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3921                                  " you gave (%s). Please fix and re-run this"
3922                                  " command." % new_node.secondary_ip)
3923
3924     node_verify_list = [self.cfg.GetMasterNode()]
3925     node_verify_param = {
3926       constants.NV_NODELIST: [node],
3927       # TODO: do a node-net-test as well?
3928     }
3929
3930     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3931                                        self.cfg.GetClusterName())
3932     for verifier in node_verify_list:
3933       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3934       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3935       if nl_payload:
3936         for failed in nl_payload:
3937           feedback_fn("ssh/hostname verification failed"
3938                       " (checking from %s): %s" %
3939                       (verifier, nl_payload[failed]))
3940         raise errors.OpExecError("ssh/hostname verification failed.")
3941
3942     if self.op.readd:
3943       _RedistributeAncillaryFiles(self)
3944       self.context.ReaddNode(new_node)
3945       # make sure we redistribute the config
3946       self.cfg.Update(new_node, feedback_fn)
3947       # and make sure the new node will not have old files around
3948       if not new_node.master_candidate:
3949         result = self.rpc.call_node_demote_from_mc(new_node.name)
3950         msg = result.fail_msg
3951         if msg:
3952           self.LogWarning("Node failed to demote itself from master"
3953                           " candidate status: %s" % msg)
3954     else:
3955       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3956       self.context.AddNode(new_node, self.proc.GetECId())
3957
3958
3959 class LUSetNodeParams(LogicalUnit):
3960   """Modifies the parameters of a node.
3961
3962   """
3963   HPATH = "node-modify"
3964   HTYPE = constants.HTYPE_NODE
3965   _OP_PARAMS = [
3966     _PNodeName,
3967     ("master_candidate", None, _TMaybeBool),
3968     ("offline", None, _TMaybeBool),
3969     ("drained", None, _TMaybeBool),
3970     ("auto_promote", False, _TBool),
3971     _PForce,
3972     ]
3973   REQ_BGL = False
3974
3975   def CheckArguments(self):
3976     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3977     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3978     if all_mods.count(None) == 3:
3979       raise errors.OpPrereqError("Please pass at least one modification",
3980                                  errors.ECODE_INVAL)
3981     if all_mods.count(True) > 1:
3982       raise errors.OpPrereqError("Can't set the node into more than one"
3983                                  " state at the same time",
3984                                  errors.ECODE_INVAL)
3985
3986     # Boolean value that tells us whether we're offlining or draining the node
3987     self.offline_or_drain = (self.op.offline == True or
3988                              self.op.drained == True)
3989     self.deoffline_or_drain = (self.op.offline == False or
3990                                self.op.drained == False)
3991     self.might_demote = (self.op.master_candidate == False or
3992                          self.offline_or_drain)
3993
3994     self.lock_all = self.op.auto_promote and self.might_demote
3995
3996
3997   def ExpandNames(self):
3998     if self.lock_all:
3999       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4000     else:
4001       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4002
4003   def BuildHooksEnv(self):
4004     """Build hooks env.
4005
4006     This runs on the master node.
4007
4008     """
4009     env = {
4010       "OP_TARGET": self.op.node_name,
4011       "MASTER_CANDIDATE": str(self.op.master_candidate),
4012       "OFFLINE": str(self.op.offline),
4013       "DRAINED": str(self.op.drained),
4014       }
4015     nl = [self.cfg.GetMasterNode(),
4016           self.op.node_name]
4017     return env, nl, nl
4018
4019   def CheckPrereq(self):
4020     """Check prerequisites.
4021
4022     This only checks the instance list against the existing names.
4023
4024     """
4025     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4026
4027     if (self.op.master_candidate is not None or
4028         self.op.drained is not None or
4029         self.op.offline is not None):
4030       # we can't change the master's node flags
4031       if self.op.node_name == self.cfg.GetMasterNode():
4032         raise errors.OpPrereqError("The master role can be changed"
4033                                    " only via master-failover",
4034                                    errors.ECODE_INVAL)
4035
4036
4037     if node.master_candidate and self.might_demote and not self.lock_all:
4038       assert not self.op.auto_promote, "auto-promote set but lock_all not"
4039       # check if after removing the current node, we're missing master
4040       # candidates
4041       (mc_remaining, mc_should, _) = \
4042           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4043       if mc_remaining < mc_should:
4044         raise errors.OpPrereqError("Not enough master candidates, please"
4045                                    " pass auto_promote to allow promotion",
4046                                    errors.ECODE_INVAL)
4047
4048     if (self.op.master_candidate == True and
4049         ((node.offline and not self.op.offline == False) or
4050          (node.drained and not self.op.drained == False))):
4051       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4052                                  " to master_candidate" % node.name,
4053                                  errors.ECODE_INVAL)
4054
4055     # If we're being deofflined/drained, we'll MC ourself if needed
4056     if (self.deoffline_or_drain and not self.offline_or_drain and not
4057         self.op.master_candidate == True and not node.master_candidate):
4058       self.op.master_candidate = _DecideSelfPromotion(self)
4059       if self.op.master_candidate:
4060         self.LogInfo("Autopromoting node to master candidate")
4061
4062     return
4063
4064   def Exec(self, feedback_fn):
4065     """Modifies a node.
4066
4067     """
4068     node = self.node
4069
4070     result = []
4071     changed_mc = False
4072
4073     if self.op.offline is not None:
4074       node.offline = self.op.offline
4075       result.append(("offline", str(self.op.offline)))
4076       if self.op.offline == True:
4077         if node.master_candidate:
4078           node.master_candidate = False
4079           changed_mc = True
4080           result.append(("master_candidate", "auto-demotion due to offline"))
4081         if node.drained:
4082           node.drained = False
4083           result.append(("drained", "clear drained status due to offline"))
4084
4085     if self.op.master_candidate is not None:
4086       node.master_candidate = self.op.master_candidate
4087       changed_mc = True
4088       result.append(("master_candidate", str(self.op.master_candidate)))
4089       if self.op.master_candidate == False:
4090         rrc = self.rpc.call_node_demote_from_mc(node.name)
4091         msg = rrc.fail_msg
4092         if msg:
4093           self.LogWarning("Node failed to demote itself: %s" % msg)
4094
4095     if self.op.drained is not None:
4096       node.drained = self.op.drained
4097       result.append(("drained", str(self.op.drained)))
4098       if self.op.drained == True:
4099         if node.master_candidate:
4100           node.master_candidate = False
4101           changed_mc = True
4102           result.append(("master_candidate", "auto-demotion due to drain"))
4103           rrc = self.rpc.call_node_demote_from_mc(node.name)
4104           msg = rrc.fail_msg
4105           if msg:
4106             self.LogWarning("Node failed to demote itself: %s" % msg)
4107         if node.offline:
4108           node.offline = False
4109           result.append(("offline", "clear offline status due to drain"))
4110
4111     # we locked all nodes, we adjust the CP before updating this node
4112     if self.lock_all:
4113       _AdjustCandidatePool(self, [node.name])
4114
4115     # this will trigger configuration file update, if needed
4116     self.cfg.Update(node, feedback_fn)
4117
4118     # this will trigger job queue propagation or cleanup
4119     if changed_mc:
4120       self.context.ReaddNode(node)
4121
4122     return result
4123
4124
4125 class LUPowercycleNode(NoHooksLU):
4126   """Powercycles a node.
4127
4128   """
4129   _OP_PARAMS = [
4130     _PNodeName,
4131     _PForce,
4132     ]
4133   REQ_BGL = False
4134
4135   def CheckArguments(self):
4136     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4137     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4138       raise errors.OpPrereqError("The node is the master and the force"
4139                                  " parameter was not set",
4140                                  errors.ECODE_INVAL)
4141
4142   def ExpandNames(self):
4143     """Locking for PowercycleNode.
4144
4145     This is a last-resort option and shouldn't block on other
4146     jobs. Therefore, we grab no locks.
4147
4148     """
4149     self.needed_locks = {}
4150
4151   def Exec(self, feedback_fn):
4152     """Reboots a node.
4153
4154     """
4155     result = self.rpc.call_node_powercycle(self.op.node_name,
4156                                            self.cfg.GetHypervisorType())
4157     result.Raise("Failed to schedule the reboot")
4158     return result.payload
4159
4160
4161 class LUQueryClusterInfo(NoHooksLU):
4162   """Query cluster configuration.
4163
4164   """
4165   REQ_BGL = False
4166
4167   def ExpandNames(self):
4168     self.needed_locks = {}
4169
4170   def Exec(self, feedback_fn):
4171     """Return cluster config.
4172
4173     """
4174     cluster = self.cfg.GetClusterInfo()
4175     os_hvp = {}
4176
4177     # Filter just for enabled hypervisors
4178     for os_name, hv_dict in cluster.os_hvp.items():
4179       os_hvp[os_name] = {}
4180       for hv_name, hv_params in hv_dict.items():
4181         if hv_name in cluster.enabled_hypervisors:
4182           os_hvp[os_name][hv_name] = hv_params
4183
4184     result = {
4185       "software_version": constants.RELEASE_VERSION,
4186       "protocol_version": constants.PROTOCOL_VERSION,
4187       "config_version": constants.CONFIG_VERSION,
4188       "os_api_version": max(constants.OS_API_VERSIONS),
4189       "export_version": constants.EXPORT_VERSION,
4190       "architecture": (platform.architecture()[0], platform.machine()),
4191       "name": cluster.cluster_name,
4192       "master": cluster.master_node,
4193       "default_hypervisor": cluster.enabled_hypervisors[0],
4194       "enabled_hypervisors": cluster.enabled_hypervisors,
4195       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4196                         for hypervisor_name in cluster.enabled_hypervisors]),
4197       "os_hvp": os_hvp,
4198       "beparams": cluster.beparams,
4199       "osparams": cluster.osparams,
4200       "nicparams": cluster.nicparams,
4201       "candidate_pool_size": cluster.candidate_pool_size,
4202       "master_netdev": cluster.master_netdev,
4203       "volume_group_name": cluster.volume_group_name,
4204       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4205       "file_storage_dir": cluster.file_storage_dir,
4206       "maintain_node_health": cluster.maintain_node_health,
4207       "ctime": cluster.ctime,
4208       "mtime": cluster.mtime,
4209       "uuid": cluster.uuid,
4210       "tags": list(cluster.GetTags()),
4211       "uid_pool": cluster.uid_pool,
4212       "default_iallocator": cluster.default_iallocator,
4213       "reserved_lvs": cluster.reserved_lvs,
4214       }
4215
4216     return result
4217
4218
4219 class LUQueryConfigValues(NoHooksLU):
4220   """Return configuration values.
4221
4222   """
4223   _OP_PARAMS = [_POutputFields]
4224   REQ_BGL = False
4225   _FIELDS_DYNAMIC = utils.FieldSet()
4226   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4227                                   "watcher_pause", "volume_group_name")
4228
4229   def CheckArguments(self):
4230     _CheckOutputFields(static=self._FIELDS_STATIC,
4231                        dynamic=self._FIELDS_DYNAMIC,
4232                        selected=self.op.output_fields)
4233
4234   def ExpandNames(self):
4235     self.needed_locks = {}
4236
4237   def Exec(self, feedback_fn):
4238     """Dump a representation of the cluster config to the standard output.
4239
4240     """
4241     values = []
4242     for field in self.op.output_fields:
4243       if field == "cluster_name":
4244         entry = self.cfg.GetClusterName()
4245       elif field == "master_node":
4246         entry = self.cfg.GetMasterNode()
4247       elif field == "drain_flag":
4248         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4249       elif field == "watcher_pause":
4250         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4251       elif field == "volume_group_name":
4252         entry = self.cfg.GetVGName()
4253       else:
4254         raise errors.ParameterError(field)
4255       values.append(entry)
4256     return values
4257
4258
4259 class LUActivateInstanceDisks(NoHooksLU):
4260   """Bring up an instance's disks.
4261
4262   """
4263   _OP_PARAMS = [
4264     _PInstanceName,
4265     ("ignore_size", False, _TBool),
4266     ]
4267   REQ_BGL = False
4268
4269   def ExpandNames(self):
4270     self._ExpandAndLockInstance()
4271     self.needed_locks[locking.LEVEL_NODE] = []
4272     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4273
4274   def DeclareLocks(self, level):
4275     if level == locking.LEVEL_NODE:
4276       self._LockInstancesNodes()
4277
4278   def CheckPrereq(self):
4279     """Check prerequisites.
4280
4281     This checks that the instance is in the cluster.
4282
4283     """
4284     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4285     assert self.instance is not None, \
4286       "Cannot retrieve locked instance %s" % self.op.instance_name
4287     _CheckNodeOnline(self, self.instance.primary_node)
4288
4289   def Exec(self, feedback_fn):
4290     """Activate the disks.
4291
4292     """
4293     disks_ok, disks_info = \
4294               _AssembleInstanceDisks(self, self.instance,
4295                                      ignore_size=self.op.ignore_size)
4296     if not disks_ok:
4297       raise errors.OpExecError("Cannot activate block devices")
4298
4299     return disks_info
4300
4301
4302 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4303                            ignore_size=False):
4304   """Prepare the block devices for an instance.
4305
4306   This sets up the block devices on all nodes.
4307
4308   @type lu: L{LogicalUnit}
4309   @param lu: the logical unit on whose behalf we execute
4310   @type instance: L{objects.Instance}
4311   @param instance: the instance for whose disks we assemble
4312   @type disks: list of L{objects.Disk} or None
4313   @param disks: which disks to assemble (or all, if None)
4314   @type ignore_secondaries: boolean
4315   @param ignore_secondaries: if true, errors on secondary nodes
4316       won't result in an error return from the function
4317   @type ignore_size: boolean
4318   @param ignore_size: if true, the current known size of the disk
4319       will not be used during the disk activation, useful for cases
4320       when the size is wrong
4321   @return: False if the operation failed, otherwise a list of
4322       (host, instance_visible_name, node_visible_name)
4323       with the mapping from node devices to instance devices
4324
4325   """
4326   device_info = []
4327   disks_ok = True
4328   iname = instance.name
4329   disks = _ExpandCheckDisks(instance, disks)
4330
4331   # With the two passes mechanism we try to reduce the window of
4332   # opportunity for the race condition of switching DRBD to primary
4333   # before handshaking occured, but we do not eliminate it
4334
4335   # The proper fix would be to wait (with some limits) until the
4336   # connection has been made and drbd transitions from WFConnection
4337   # into any other network-connected state (Connected, SyncTarget,
4338   # SyncSource, etc.)
4339
4340   # 1st pass, assemble on all nodes in secondary mode
4341   for inst_disk in disks:
4342     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4343       if ignore_size:
4344         node_disk = node_disk.Copy()
4345         node_disk.UnsetSize()
4346       lu.cfg.SetDiskID(node_disk, node)
4347       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4348       msg = result.fail_msg
4349       if msg:
4350         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4351                            " (is_primary=False, pass=1): %s",
4352                            inst_disk.iv_name, node, msg)
4353         if not ignore_secondaries:
4354           disks_ok = False
4355
4356   # FIXME: race condition on drbd migration to primary
4357
4358   # 2nd pass, do only the primary node
4359   for inst_disk in disks:
4360     dev_path = None
4361
4362     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4363       if node != instance.primary_node:
4364         continue
4365       if ignore_size:
4366         node_disk = node_disk.Copy()
4367         node_disk.UnsetSize()
4368       lu.cfg.SetDiskID(node_disk, node)
4369       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4370       msg = result.fail_msg
4371       if msg:
4372         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4373                            " (is_primary=True, pass=2): %s",
4374                            inst_disk.iv_name, node, msg)
4375         disks_ok = False
4376       else:
4377         dev_path = result.payload
4378
4379     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4380
4381   # leave the disks configured for the primary node
4382   # this is a workaround that would be fixed better by
4383   # improving the logical/physical id handling
4384   for disk in disks:
4385     lu.cfg.SetDiskID(disk, instance.primary_node)
4386
4387   return disks_ok, device_info
4388
4389
4390 def _StartInstanceDisks(lu, instance, force):
4391   """Start the disks of an instance.
4392
4393   """
4394   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4395                                            ignore_secondaries=force)
4396   if not disks_ok:
4397     _ShutdownInstanceDisks(lu, instance)
4398     if force is not None and not force:
4399       lu.proc.LogWarning("", hint="If the message above refers to a"
4400                          " secondary node,"
4401                          " you can retry the operation using '--force'.")
4402     raise errors.OpExecError("Disk consistency error")
4403
4404
4405 class LUDeactivateInstanceDisks(NoHooksLU):
4406   """Shutdown an instance's disks.
4407
4408   """
4409   _OP_PARAMS = [
4410     _PInstanceName,
4411     ]
4412   REQ_BGL = False
4413
4414   def ExpandNames(self):
4415     self._ExpandAndLockInstance()
4416     self.needed_locks[locking.LEVEL_NODE] = []
4417     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4418
4419   def DeclareLocks(self, level):
4420     if level == locking.LEVEL_NODE:
4421       self._LockInstancesNodes()
4422
4423   def CheckPrereq(self):
4424     """Check prerequisites.
4425
4426     This checks that the instance is in the cluster.
4427
4428     """
4429     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4430     assert self.instance is not None, \
4431       "Cannot retrieve locked instance %s" % self.op.instance_name
4432
4433   def Exec(self, feedback_fn):
4434     """Deactivate the disks
4435
4436     """
4437     instance = self.instance
4438     _SafeShutdownInstanceDisks(self, instance)
4439
4440
4441 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4442   """Shutdown block devices of an instance.
4443
4444   This function checks if an instance is running, before calling
4445   _ShutdownInstanceDisks.
4446
4447   """
4448   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4449   _ShutdownInstanceDisks(lu, instance, disks=disks)
4450
4451
4452 def _ExpandCheckDisks(instance, disks):
4453   """Return the instance disks selected by the disks list
4454
4455   @type disks: list of L{objects.Disk} or None
4456   @param disks: selected disks
4457   @rtype: list of L{objects.Disk}
4458   @return: selected instance disks to act on
4459
4460   """
4461   if disks is None:
4462     return instance.disks
4463   else:
4464     if not set(disks).issubset(instance.disks):
4465       raise errors.ProgrammerError("Can only act on disks belonging to the"
4466                                    " target instance")
4467     return disks
4468
4469
4470 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4471   """Shutdown block devices of an instance.
4472
4473   This does the shutdown on all nodes of the instance.
4474
4475   If the ignore_primary is false, errors on the primary node are
4476   ignored.
4477
4478   """
4479   all_result = True
4480   disks = _ExpandCheckDisks(instance, disks)
4481
4482   for disk in disks:
4483     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4484       lu.cfg.SetDiskID(top_disk, node)
4485       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4486       msg = result.fail_msg
4487       if msg:
4488         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4489                       disk.iv_name, node, msg)
4490         if not ignore_primary or node != instance.primary_node:
4491           all_result = False
4492   return all_result
4493
4494
4495 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4496   """Checks if a node has enough free memory.
4497
4498   This function check if a given node has the needed amount of free
4499   memory. In case the node has less memory or we cannot get the
4500   information from the node, this function raise an OpPrereqError
4501   exception.
4502
4503   @type lu: C{LogicalUnit}
4504   @param lu: a logical unit from which we get configuration data
4505   @type node: C{str}
4506   @param node: the node to check
4507   @type reason: C{str}
4508   @param reason: string to use in the error message
4509   @type requested: C{int}
4510   @param requested: the amount of memory in MiB to check for
4511   @type hypervisor_name: C{str}
4512   @param hypervisor_name: the hypervisor to ask for memory stats
4513   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4514       we cannot check the node
4515
4516   """
4517   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4518   nodeinfo[node].Raise("Can't get data from node %s" % node,
4519                        prereq=True, ecode=errors.ECODE_ENVIRON)
4520   free_mem = nodeinfo[node].payload.get('memory_free', None)
4521   if not isinstance(free_mem, int):
4522     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4523                                " was '%s'" % (node, free_mem),
4524                                errors.ECODE_ENVIRON)
4525   if requested > free_mem:
4526     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4527                                " needed %s MiB, available %s MiB" %
4528                                (node, reason, requested, free_mem),
4529                                errors.ECODE_NORES)
4530
4531
4532 def _CheckNodesFreeDisk(lu, nodenames, requested):
4533   """Checks if nodes have enough free disk space in the default VG.
4534
4535   This function check if all given nodes have the needed amount of
4536   free disk. In case any node has less disk or we cannot get the
4537   information from the node, this function raise an OpPrereqError
4538   exception.
4539
4540   @type lu: C{LogicalUnit}
4541   @param lu: a logical unit from which we get configuration data
4542   @type nodenames: C{list}
4543   @param nodenames: the list of node names to check
4544   @type requested: C{int}
4545   @param requested: the amount of disk in MiB to check for
4546   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4547       we cannot check the node
4548
4549   """
4550   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4551                                    lu.cfg.GetHypervisorType())
4552   for node in nodenames:
4553     info = nodeinfo[node]
4554     info.Raise("Cannot get current information from node %s" % node,
4555                prereq=True, ecode=errors.ECODE_ENVIRON)
4556     vg_free = info.payload.get("vg_free", None)
4557     if not isinstance(vg_free, int):
4558       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4559                                  " result was '%s'" % (node, vg_free),
4560                                  errors.ECODE_ENVIRON)
4561     if requested > vg_free:
4562       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4563                                  " required %d MiB, available %d MiB" %
4564                                  (node, requested, vg_free),
4565                                  errors.ECODE_NORES)
4566
4567
4568 class LUStartupInstance(LogicalUnit):
4569   """Starts an instance.
4570
4571   """
4572   HPATH = "instance-start"
4573   HTYPE = constants.HTYPE_INSTANCE
4574   _OP_PARAMS = [
4575     _PInstanceName,
4576     _PForce,
4577     ("hvparams", _EmptyDict, _TDict),
4578     ("beparams", _EmptyDict, _TDict),
4579     ]
4580   REQ_BGL = False
4581
4582   def CheckArguments(self):
4583     # extra beparams
4584     if self.op.beparams:
4585       # fill the beparams dict
4586       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4587
4588   def ExpandNames(self):
4589     self._ExpandAndLockInstance()
4590
4591   def BuildHooksEnv(self):
4592     """Build hooks env.
4593
4594     This runs on master, primary and secondary nodes of the instance.
4595
4596     """
4597     env = {
4598       "FORCE": self.op.force,
4599       }
4600     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4601     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4602     return env, nl, nl
4603
4604   def CheckPrereq(self):
4605     """Check prerequisites.
4606
4607     This checks that the instance is in the cluster.
4608
4609     """
4610     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4611     assert self.instance is not None, \
4612       "Cannot retrieve locked instance %s" % self.op.instance_name
4613
4614     # extra hvparams
4615     if self.op.hvparams:
4616       # check hypervisor parameter syntax (locally)
4617       cluster = self.cfg.GetClusterInfo()
4618       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4619       filled_hvp = cluster.FillHV(instance)
4620       filled_hvp.update(self.op.hvparams)
4621       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4622       hv_type.CheckParameterSyntax(filled_hvp)
4623       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4624
4625     _CheckNodeOnline(self, instance.primary_node)
4626
4627     bep = self.cfg.GetClusterInfo().FillBE(instance)
4628     # check bridges existence
4629     _CheckInstanceBridgesExist(self, instance)
4630
4631     remote_info = self.rpc.call_instance_info(instance.primary_node,
4632                                               instance.name,
4633                                               instance.hypervisor)
4634     remote_info.Raise("Error checking node %s" % instance.primary_node,
4635                       prereq=True, ecode=errors.ECODE_ENVIRON)
4636     if not remote_info.payload: # not running already
4637       _CheckNodeFreeMemory(self, instance.primary_node,
4638                            "starting instance %s" % instance.name,
4639                            bep[constants.BE_MEMORY], instance.hypervisor)
4640
4641   def Exec(self, feedback_fn):
4642     """Start the instance.
4643
4644     """
4645     instance = self.instance
4646     force = self.op.force
4647
4648     self.cfg.MarkInstanceUp(instance.name)
4649
4650     node_current = instance.primary_node
4651
4652     _StartInstanceDisks(self, instance, force)
4653
4654     result = self.rpc.call_instance_start(node_current, instance,
4655                                           self.op.hvparams, self.op.beparams)
4656     msg = result.fail_msg
4657     if msg:
4658       _ShutdownInstanceDisks(self, instance)
4659       raise errors.OpExecError("Could not start instance: %s" % msg)
4660
4661
4662 class LURebootInstance(LogicalUnit):
4663   """Reboot an instance.
4664
4665   """
4666   HPATH = "instance-reboot"
4667   HTYPE = constants.HTYPE_INSTANCE
4668   _OP_PARAMS = [
4669     _PInstanceName,
4670     ("ignore_secondaries", False, _TBool),
4671     ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4672     _PShutdownTimeout,
4673     ]
4674   REQ_BGL = False
4675
4676   def ExpandNames(self):
4677     self._ExpandAndLockInstance()
4678
4679   def BuildHooksEnv(self):
4680     """Build hooks env.
4681
4682     This runs on master, primary and secondary nodes of the instance.
4683
4684     """
4685     env = {
4686       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4687       "REBOOT_TYPE": self.op.reboot_type,
4688       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4689       }
4690     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4691     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4692     return env, nl, nl
4693
4694   def CheckPrereq(self):
4695     """Check prerequisites.
4696
4697     This checks that the instance is in the cluster.
4698
4699     """
4700     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4701     assert self.instance is not None, \
4702       "Cannot retrieve locked instance %s" % self.op.instance_name
4703
4704     _CheckNodeOnline(self, instance.primary_node)
4705
4706     # check bridges existence
4707     _CheckInstanceBridgesExist(self, instance)
4708
4709   def Exec(self, feedback_fn):
4710     """Reboot the instance.
4711
4712     """
4713     instance = self.instance
4714     ignore_secondaries = self.op.ignore_secondaries
4715     reboot_type = self.op.reboot_type
4716
4717     node_current = instance.primary_node
4718
4719     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4720                        constants.INSTANCE_REBOOT_HARD]:
4721       for disk in instance.disks:
4722         self.cfg.SetDiskID(disk, node_current)
4723       result = self.rpc.call_instance_reboot(node_current, instance,
4724                                              reboot_type,
4725                                              self.op.shutdown_timeout)
4726       result.Raise("Could not reboot instance")
4727     else:
4728       result = self.rpc.call_instance_shutdown(node_current, instance,
4729                                                self.op.shutdown_timeout)
4730       result.Raise("Could not shutdown instance for full reboot")
4731       _ShutdownInstanceDisks(self, instance)
4732       _StartInstanceDisks(self, instance, ignore_secondaries)
4733       result = self.rpc.call_instance_start(node_current, instance, None, None)
4734       msg = result.fail_msg
4735       if msg:
4736         _ShutdownInstanceDisks(self, instance)
4737         raise errors.OpExecError("Could not start instance for"
4738                                  " full reboot: %s" % msg)
4739
4740     self.cfg.MarkInstanceUp(instance.name)
4741
4742
4743 class LUShutdownInstance(LogicalUnit):
4744   """Shutdown an instance.
4745
4746   """
4747   HPATH = "instance-stop"
4748   HTYPE = constants.HTYPE_INSTANCE
4749   _OP_PARAMS = [
4750     _PInstanceName,
4751     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4752     ]
4753   REQ_BGL = False
4754
4755   def ExpandNames(self):
4756     self._ExpandAndLockInstance()
4757
4758   def BuildHooksEnv(self):
4759     """Build hooks env.
4760
4761     This runs on master, primary and secondary nodes of the instance.
4762
4763     """
4764     env = _BuildInstanceHookEnvByObject(self, self.instance)
4765     env["TIMEOUT"] = self.op.timeout
4766     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4767     return env, nl, nl
4768
4769   def CheckPrereq(self):
4770     """Check prerequisites.
4771
4772     This checks that the instance is in the cluster.
4773
4774     """
4775     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4776     assert self.instance is not None, \
4777       "Cannot retrieve locked instance %s" % self.op.instance_name
4778     _CheckNodeOnline(self, self.instance.primary_node)
4779
4780   def Exec(self, feedback_fn):
4781     """Shutdown the instance.
4782
4783     """
4784     instance = self.instance
4785     node_current = instance.primary_node
4786     timeout = self.op.timeout
4787     self.cfg.MarkInstanceDown(instance.name)
4788     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4789     msg = result.fail_msg
4790     if msg:
4791       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4792
4793     _ShutdownInstanceDisks(self, instance)
4794
4795
4796 class LUReinstallInstance(LogicalUnit):
4797   """Reinstall an instance.
4798
4799   """
4800   HPATH = "instance-reinstall"
4801   HTYPE = constants.HTYPE_INSTANCE
4802   _OP_PARAMS = [
4803     _PInstanceName,
4804     ("os_type", None, _TMaybeString),
4805     ("force_variant", False, _TBool),
4806     ]
4807   REQ_BGL = False
4808
4809   def ExpandNames(self):
4810     self._ExpandAndLockInstance()
4811
4812   def BuildHooksEnv(self):
4813     """Build hooks env.
4814
4815     This runs on master, primary and secondary nodes of the instance.
4816
4817     """
4818     env = _BuildInstanceHookEnvByObject(self, self.instance)
4819     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4820     return env, nl, nl
4821
4822   def CheckPrereq(self):
4823     """Check prerequisites.
4824
4825     This checks that the instance is in the cluster and is not running.
4826
4827     """
4828     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4829     assert instance is not None, \
4830       "Cannot retrieve locked instance %s" % self.op.instance_name
4831     _CheckNodeOnline(self, instance.primary_node)
4832
4833     if instance.disk_template == constants.DT_DISKLESS:
4834       raise errors.OpPrereqError("Instance '%s' has no disks" %
4835                                  self.op.instance_name,
4836                                  errors.ECODE_INVAL)
4837     _CheckInstanceDown(self, instance, "cannot reinstall")
4838
4839     if self.op.os_type is not None:
4840       # OS verification
4841       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4842       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4843
4844     self.instance = instance
4845
4846   def Exec(self, feedback_fn):
4847     """Reinstall the instance.
4848
4849     """
4850     inst = self.instance
4851
4852     if self.op.os_type is not None:
4853       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4854       inst.os = self.op.os_type
4855       self.cfg.Update(inst, feedback_fn)
4856
4857     _StartInstanceDisks(self, inst, None)
4858     try:
4859       feedback_fn("Running the instance OS create scripts...")
4860       # FIXME: pass debug option from opcode to backend
4861       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4862                                              self.op.debug_level)
4863       result.Raise("Could not install OS for instance %s on node %s" %
4864                    (inst.name, inst.primary_node))
4865     finally:
4866       _ShutdownInstanceDisks(self, inst)
4867
4868
4869 class LURecreateInstanceDisks(LogicalUnit):
4870   """Recreate an instance's missing disks.
4871
4872   """
4873   HPATH = "instance-recreate-disks"
4874   HTYPE = constants.HTYPE_INSTANCE
4875   _OP_PARAMS = [
4876     _PInstanceName,
4877     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4878     ]
4879   REQ_BGL = False
4880
4881   def ExpandNames(self):
4882     self._ExpandAndLockInstance()
4883
4884   def BuildHooksEnv(self):
4885     """Build hooks env.
4886
4887     This runs on master, primary and secondary nodes of the instance.
4888
4889     """
4890     env = _BuildInstanceHookEnvByObject(self, self.instance)
4891     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4892     return env, nl, nl
4893
4894   def CheckPrereq(self):
4895     """Check prerequisites.
4896
4897     This checks that the instance is in the cluster and is not running.
4898
4899     """
4900     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4901     assert instance is not None, \
4902       "Cannot retrieve locked instance %s" % self.op.instance_name
4903     _CheckNodeOnline(self, instance.primary_node)
4904
4905     if instance.disk_template == constants.DT_DISKLESS:
4906       raise errors.OpPrereqError("Instance '%s' has no disks" %
4907                                  self.op.instance_name, errors.ECODE_INVAL)
4908     _CheckInstanceDown(self, instance, "cannot recreate disks")
4909
4910     if not self.op.disks:
4911       self.op.disks = range(len(instance.disks))
4912     else:
4913       for idx in self.op.disks:
4914         if idx >= len(instance.disks):
4915           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4916                                      errors.ECODE_INVAL)
4917
4918     self.instance = instance
4919
4920   def Exec(self, feedback_fn):
4921     """Recreate the disks.
4922
4923     """
4924     to_skip = []
4925     for idx, _ in enumerate(self.instance.disks):
4926       if idx not in self.op.disks: # disk idx has not been passed in
4927         to_skip.append(idx)
4928         continue
4929
4930     _CreateDisks(self, self.instance, to_skip=to_skip)
4931
4932
4933 class LURenameInstance(LogicalUnit):
4934   """Rename an instance.
4935
4936   """
4937   HPATH = "instance-rename"
4938   HTYPE = constants.HTYPE_INSTANCE
4939   _OP_PARAMS = [
4940     _PInstanceName,
4941     ("new_name", _NoDefault, _TNonEmptyString),
4942     ("ip_check", False, _TBool),
4943     ("name_check", True, _TBool),
4944     ]
4945
4946   def CheckArguments(self):
4947     """Check arguments.
4948
4949     """
4950     if self.op.ip_check and not self.op.name_check:
4951       # TODO: make the ip check more flexible and not depend on the name check
4952       raise errors.OpPrereqError("Cannot do ip check without a name check",
4953                                  errors.ECODE_INVAL)
4954
4955   def BuildHooksEnv(self):
4956     """Build hooks env.
4957
4958     This runs on master, primary and secondary nodes of the instance.
4959
4960     """
4961     env = _BuildInstanceHookEnvByObject(self, self.instance)
4962     env["INSTANCE_NEW_NAME"] = self.op.new_name
4963     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4964     return env, nl, nl
4965
4966   def CheckPrereq(self):
4967     """Check prerequisites.
4968
4969     This checks that the instance is in the cluster and is not running.
4970
4971     """
4972     self.op.instance_name = _ExpandInstanceName(self.cfg,
4973                                                 self.op.instance_name)
4974     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4975     assert instance is not None
4976     _CheckNodeOnline(self, instance.primary_node)
4977     _CheckInstanceDown(self, instance, "cannot rename")
4978     self.instance = instance
4979
4980     new_name = self.op.new_name
4981     if self.op.name_check:
4982       hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name))
4983       new_name = self.op.new_name = hostinfo.name
4984       if (self.op.ip_check and
4985           netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)):
4986         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4987                                    (hostinfo.ip, new_name),
4988                                    errors.ECODE_NOTUNIQUE)
4989
4990     instance_list = self.cfg.GetInstanceList()
4991     if new_name in instance_list:
4992       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4993                                  new_name, errors.ECODE_EXISTS)
4994
4995
4996   def Exec(self, feedback_fn):
4997     """Reinstall the instance.
4998
4999     """
5000     inst = self.instance
5001     old_name = inst.name
5002
5003     if inst.disk_template == constants.DT_FILE:
5004       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5005
5006     self.cfg.RenameInstance(inst.name, self.op.new_name)
5007     # Change the instance lock. This is definitely safe while we hold the BGL
5008     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5009     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5010
5011     # re-read the instance from the configuration after rename
5012     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5013
5014     if inst.disk_template == constants.DT_FILE:
5015       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5016       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5017                                                      old_file_storage_dir,
5018                                                      new_file_storage_dir)
5019       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5020                    " (but the instance has been renamed in Ganeti)" %
5021                    (inst.primary_node, old_file_storage_dir,
5022                     new_file_storage_dir))
5023
5024     _StartInstanceDisks(self, inst, None)
5025     try:
5026       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5027                                                  old_name, self.op.debug_level)
5028       msg = result.fail_msg
5029       if msg:
5030         msg = ("Could not run OS rename script for instance %s on node %s"
5031                " (but the instance has been renamed in Ganeti): %s" %
5032                (inst.name, inst.primary_node, msg))
5033         self.proc.LogWarning(msg)
5034     finally:
5035       _ShutdownInstanceDisks(self, inst)
5036
5037     return inst.name
5038
5039
5040 class LURemoveInstance(LogicalUnit):
5041   """Remove an instance.
5042
5043   """
5044   HPATH = "instance-remove"
5045   HTYPE = constants.HTYPE_INSTANCE
5046   _OP_PARAMS = [
5047     _PInstanceName,
5048     ("ignore_failures", False, _TBool),
5049     _PShutdownTimeout,
5050     ]
5051   REQ_BGL = False
5052
5053   def ExpandNames(self):
5054     self._ExpandAndLockInstance()
5055     self.needed_locks[locking.LEVEL_NODE] = []
5056     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5057
5058   def DeclareLocks(self, level):
5059     if level == locking.LEVEL_NODE:
5060       self._LockInstancesNodes()
5061
5062   def BuildHooksEnv(self):
5063     """Build hooks env.
5064
5065     This runs on master, primary and secondary nodes of the instance.
5066
5067     """
5068     env = _BuildInstanceHookEnvByObject(self, self.instance)
5069     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5070     nl = [self.cfg.GetMasterNode()]
5071     nl_post = list(self.instance.all_nodes) + nl
5072     return env, nl, nl_post
5073
5074   def CheckPrereq(self):
5075     """Check prerequisites.
5076
5077     This checks that the instance is in the cluster.
5078
5079     """
5080     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5081     assert self.instance is not None, \
5082       "Cannot retrieve locked instance %s" % self.op.instance_name
5083
5084   def Exec(self, feedback_fn):
5085     """Remove the instance.
5086
5087     """
5088     instance = self.instance
5089     logging.info("Shutting down instance %s on node %s",
5090                  instance.name, instance.primary_node)
5091
5092     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5093                                              self.op.shutdown_timeout)
5094     msg = result.fail_msg
5095     if msg:
5096       if self.op.ignore_failures:
5097         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5098       else:
5099         raise errors.OpExecError("Could not shutdown instance %s on"
5100                                  " node %s: %s" %
5101                                  (instance.name, instance.primary_node, msg))
5102
5103     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5104
5105
5106 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5107   """Utility function to remove an instance.
5108
5109   """
5110   logging.info("Removing block devices for instance %s", instance.name)
5111
5112   if not _RemoveDisks(lu, instance):
5113     if not ignore_failures:
5114       raise errors.OpExecError("Can't remove instance's disks")
5115     feedback_fn("Warning: can't remove instance's disks")
5116
5117   logging.info("Removing instance %s out of cluster config", instance.name)
5118
5119   lu.cfg.RemoveInstance(instance.name)
5120
5121   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5122     "Instance lock removal conflict"
5123
5124   # Remove lock for the instance
5125   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5126
5127
5128 class LUQueryInstances(NoHooksLU):
5129   """Logical unit for querying instances.
5130
5131   """
5132   # pylint: disable-msg=W0142
5133   _OP_PARAMS = [
5134     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5135     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5136     ("use_locking", False, _TBool),
5137     ]
5138   REQ_BGL = False
5139   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5140                     "serial_no", "ctime", "mtime", "uuid"]
5141   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5142                                     "admin_state",
5143                                     "disk_template", "ip", "mac", "bridge",
5144                                     "nic_mode", "nic_link",
5145                                     "sda_size", "sdb_size", "vcpus", "tags",
5146                                     "network_port", "beparams",
5147                                     r"(disk)\.(size)/([0-9]+)",
5148                                     r"(disk)\.(sizes)", "disk_usage",
5149                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5150                                     r"(nic)\.(bridge)/([0-9]+)",
5151                                     r"(nic)\.(macs|ips|modes|links|bridges)",
5152                                     r"(disk|nic)\.(count)",
5153                                     "hvparams",
5154                                     ] + _SIMPLE_FIELDS +
5155                                   ["hv/%s" % name
5156                                    for name in constants.HVS_PARAMETERS
5157                                    if name not in constants.HVC_GLOBALS] +
5158                                   ["be/%s" % name
5159                                    for name in constants.BES_PARAMETERS])
5160   _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5161                                    "oper_ram",
5162                                    "oper_vcpus",
5163                                    "status")
5164
5165
5166   def CheckArguments(self):
5167     _CheckOutputFields(static=self._FIELDS_STATIC,
5168                        dynamic=self._FIELDS_DYNAMIC,
5169                        selected=self.op.output_fields)
5170
5171   def ExpandNames(self):
5172     self.needed_locks = {}
5173     self.share_locks[locking.LEVEL_INSTANCE] = 1
5174     self.share_locks[locking.LEVEL_NODE] = 1
5175
5176     if self.op.names:
5177       self.wanted = _GetWantedInstances(self, self.op.names)
5178     else:
5179       self.wanted = locking.ALL_SET
5180
5181     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5182     self.do_locking = self.do_node_query and self.op.use_locking
5183     if self.do_locking:
5184       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5185       self.needed_locks[locking.LEVEL_NODE] = []
5186       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5187
5188   def DeclareLocks(self, level):
5189     if level == locking.LEVEL_NODE and self.do_locking:
5190       self._LockInstancesNodes()
5191
5192   def Exec(self, feedback_fn):
5193     """Computes the list of nodes and their attributes.
5194
5195     """
5196     # pylint: disable-msg=R0912
5197     # way too many branches here
5198     all_info = self.cfg.GetAllInstancesInfo()
5199     if self.wanted == locking.ALL_SET:
5200       # caller didn't specify instance names, so ordering is not important
5201       if self.do_locking:
5202         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5203       else:
5204         instance_names = all_info.keys()
5205       instance_names = utils.NiceSort(instance_names)
5206     else:
5207       # caller did specify names, so we must keep the ordering
5208       if self.do_locking:
5209         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5210       else:
5211         tgt_set = all_info.keys()
5212       missing = set(self.wanted).difference(tgt_set)
5213       if missing:
5214         raise errors.OpExecError("Some instances were removed before"
5215                                  " retrieving their data: %s" % missing)
5216       instance_names = self.wanted
5217
5218     instance_list = [all_info[iname] for iname in instance_names]
5219
5220     # begin data gathering
5221
5222     nodes = frozenset([inst.primary_node for inst in instance_list])
5223     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5224
5225     bad_nodes = []
5226     off_nodes = []
5227     if self.do_node_query:
5228       live_data = {}
5229       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5230       for name in nodes:
5231         result = node_data[name]
5232         if result.offline:
5233           # offline nodes will be in both lists
5234           off_nodes.append(name)
5235         if result.fail_msg:
5236           bad_nodes.append(name)
5237         else:
5238           if result.payload:
5239             live_data.update(result.payload)
5240           # else no instance is alive
5241     else:
5242       live_data = dict([(name, {}) for name in instance_names])
5243
5244     # end data gathering
5245
5246     HVPREFIX = "hv/"
5247     BEPREFIX = "be/"
5248     output = []
5249     cluster = self.cfg.GetClusterInfo()
5250     for instance in instance_list:
5251       iout = []
5252       i_hv = cluster.FillHV(instance, skip_globals=True)
5253       i_be = cluster.FillBE(instance)
5254       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5255       for field in self.op.output_fields:
5256         st_match = self._FIELDS_STATIC.Matches(field)
5257         if field in self._SIMPLE_FIELDS:
5258           val = getattr(instance, field)
5259         elif field == "pnode":
5260           val = instance.primary_node
5261         elif field == "snodes":
5262           val = list(instance.secondary_nodes)
5263         elif field == "admin_state":
5264           val = instance.admin_up
5265         elif field == "oper_state":
5266           if instance.primary_node in bad_nodes:
5267             val = None
5268           else:
5269             val = bool(live_data.get(instance.name))
5270         elif field == "status":
5271           if instance.primary_node in off_nodes:
5272             val = "ERROR_nodeoffline"
5273           elif instance.primary_node in bad_nodes:
5274             val = "ERROR_nodedown"
5275           else:
5276             running = bool(live_data.get(instance.name))
5277             if running:
5278               if instance.admin_up:
5279                 val = "running"
5280               else:
5281                 val = "ERROR_up"
5282             else:
5283               if instance.admin_up:
5284                 val = "ERROR_down"
5285               else:
5286                 val = "ADMIN_down"
5287         elif field == "oper_ram":
5288           if instance.primary_node in bad_nodes:
5289             val = None
5290           elif instance.name in live_data:
5291             val = live_data[instance.name].get("memory", "?")
5292           else:
5293             val = "-"
5294         elif field == "oper_vcpus":
5295           if instance.primary_node in bad_nodes:
5296             val = None
5297           elif instance.name in live_data:
5298             val = live_data[instance.name].get("vcpus", "?")
5299           else:
5300             val = "-"
5301         elif field == "vcpus":
5302           val = i_be[constants.BE_VCPUS]
5303         elif field == "disk_template":
5304           val = instance.disk_template
5305         elif field == "ip":
5306           if instance.nics:
5307             val = instance.nics[0].ip
5308           else:
5309             val = None
5310         elif field == "nic_mode":
5311           if instance.nics:
5312             val = i_nicp[0][constants.NIC_MODE]
5313           else:
5314             val = None
5315         elif field == "nic_link":
5316           if instance.nics:
5317             val = i_nicp[0][constants.NIC_LINK]
5318           else:
5319             val = None
5320         elif field == "bridge":
5321           if (instance.nics and
5322               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5323             val = i_nicp[0][constants.NIC_LINK]
5324           else:
5325             val = None
5326         elif field == "mac":
5327           if instance.nics:
5328             val = instance.nics[0].mac
5329           else:
5330             val = None
5331         elif field == "sda_size" or field == "sdb_size":
5332           idx = ord(field[2]) - ord('a')
5333           try:
5334             val = instance.FindDisk(idx).size
5335           except errors.OpPrereqError:
5336             val = None
5337         elif field == "disk_usage": # total disk usage per node
5338           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5339           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5340         elif field == "tags":
5341           val = list(instance.GetTags())
5342         elif field == "hvparams":
5343           val = i_hv
5344         elif (field.startswith(HVPREFIX) and
5345               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5346               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5347           val = i_hv.get(field[len(HVPREFIX):], None)
5348         elif field == "beparams":
5349           val = i_be
5350         elif (field.startswith(BEPREFIX) and
5351               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5352           val = i_be.get(field[len(BEPREFIX):], None)
5353         elif st_match and st_match.groups():
5354           # matches a variable list
5355           st_groups = st_match.groups()
5356           if st_groups and st_groups[0] == "disk":
5357             if st_groups[1] == "count":
5358               val = len(instance.disks)
5359             elif st_groups[1] == "sizes":
5360               val = [disk.size for disk in instance.disks]
5361             elif st_groups[1] == "size":
5362               try:
5363                 val = instance.FindDisk(st_groups[2]).size
5364               except errors.OpPrereqError:
5365                 val = None
5366             else:
5367               assert False, "Unhandled disk parameter"
5368           elif st_groups[0] == "nic":
5369             if st_groups[1] == "count":
5370               val = len(instance.nics)
5371             elif st_groups[1] == "macs":
5372               val = [nic.mac for nic in instance.nics]
5373             elif st_groups[1] == "ips":
5374               val = [nic.ip for nic in instance.nics]
5375             elif st_groups[1] == "modes":
5376               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5377             elif st_groups[1] == "links":
5378               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5379             elif st_groups[1] == "bridges":
5380               val = []
5381               for nicp in i_nicp:
5382                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5383                   val.append(nicp[constants.NIC_LINK])
5384                 else:
5385                   val.append(None)
5386             else:
5387               # index-based item
5388               nic_idx = int(st_groups[2])
5389               if nic_idx >= len(instance.nics):
5390                 val = None
5391               else:
5392                 if st_groups[1] == "mac":
5393                   val = instance.nics[nic_idx].mac
5394                 elif st_groups[1] == "ip":
5395                   val = instance.nics[nic_idx].ip
5396                 elif st_groups[1] == "mode":
5397                   val = i_nicp[nic_idx][constants.NIC_MODE]
5398                 elif st_groups[1] == "link":
5399                   val = i_nicp[nic_idx][constants.NIC_LINK]
5400                 elif st_groups[1] == "bridge":
5401                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5402                   if nic_mode == constants.NIC_MODE_BRIDGED:
5403                     val = i_nicp[nic_idx][constants.NIC_LINK]
5404                   else:
5405                     val = None
5406                 else:
5407                   assert False, "Unhandled NIC parameter"
5408           else:
5409             assert False, ("Declared but unhandled variable parameter '%s'" %
5410                            field)
5411         else:
5412           assert False, "Declared but unhandled parameter '%s'" % field
5413         iout.append(val)
5414       output.append(iout)
5415
5416     return output
5417
5418
5419 class LUFailoverInstance(LogicalUnit):
5420   """Failover an instance.
5421
5422   """
5423   HPATH = "instance-failover"
5424   HTYPE = constants.HTYPE_INSTANCE
5425   _OP_PARAMS = [
5426     _PInstanceName,
5427     ("ignore_consistency", False, _TBool),
5428     _PShutdownTimeout,
5429     ]
5430   REQ_BGL = False
5431
5432   def ExpandNames(self):
5433     self._ExpandAndLockInstance()
5434     self.needed_locks[locking.LEVEL_NODE] = []
5435     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5436
5437   def DeclareLocks(self, level):
5438     if level == locking.LEVEL_NODE:
5439       self._LockInstancesNodes()
5440
5441   def BuildHooksEnv(self):
5442     """Build hooks env.
5443
5444     This runs on master, primary and secondary nodes of the instance.
5445
5446     """
5447     instance = self.instance
5448     source_node = instance.primary_node
5449     target_node = instance.secondary_nodes[0]
5450     env = {
5451       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5452       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5453       "OLD_PRIMARY": source_node,
5454       "OLD_SECONDARY": target_node,
5455       "NEW_PRIMARY": target_node,
5456       "NEW_SECONDARY": source_node,
5457       }
5458     env.update(_BuildInstanceHookEnvByObject(self, instance))
5459     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5460     nl_post = list(nl)
5461     nl_post.append(source_node)
5462     return env, nl, nl_post
5463
5464   def CheckPrereq(self):
5465     """Check prerequisites.
5466
5467     This checks that the instance is in the cluster.
5468
5469     """
5470     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5471     assert self.instance is not None, \
5472       "Cannot retrieve locked instance %s" % self.op.instance_name
5473
5474     bep = self.cfg.GetClusterInfo().FillBE(instance)
5475     if instance.disk_template not in constants.DTS_NET_MIRROR:
5476       raise errors.OpPrereqError("Instance's disk layout is not"
5477                                  " network mirrored, cannot failover.",
5478                                  errors.ECODE_STATE)
5479
5480     secondary_nodes = instance.secondary_nodes
5481     if not secondary_nodes:
5482       raise errors.ProgrammerError("no secondary node but using "
5483                                    "a mirrored disk template")
5484
5485     target_node = secondary_nodes[0]
5486     _CheckNodeOnline(self, target_node)
5487     _CheckNodeNotDrained(self, target_node)
5488     if instance.admin_up:
5489       # check memory requirements on the secondary node
5490       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5491                            instance.name, bep[constants.BE_MEMORY],
5492                            instance.hypervisor)
5493     else:
5494       self.LogInfo("Not checking memory on the secondary node as"
5495                    " instance will not be started")
5496
5497     # check bridge existance
5498     _CheckInstanceBridgesExist(self, instance, node=target_node)
5499
5500   def Exec(self, feedback_fn):
5501     """Failover an instance.
5502
5503     The failover is done by shutting it down on its present node and
5504     starting it on the secondary.
5505
5506     """
5507     instance = self.instance
5508
5509     source_node = instance.primary_node
5510     target_node = instance.secondary_nodes[0]
5511
5512     if instance.admin_up:
5513       feedback_fn("* checking disk consistency between source and target")
5514       for dev in instance.disks:
5515         # for drbd, these are drbd over lvm
5516         if not _CheckDiskConsistency(self, dev, target_node, False):
5517           if not self.op.ignore_consistency:
5518             raise errors.OpExecError("Disk %s is degraded on target node,"
5519                                      " aborting failover." % dev.iv_name)
5520     else:
5521       feedback_fn("* not checking disk consistency as instance is not running")
5522
5523     feedback_fn("* shutting down instance on source node")
5524     logging.info("Shutting down instance %s on node %s",
5525                  instance.name, source_node)
5526
5527     result = self.rpc.call_instance_shutdown(source_node, instance,
5528                                              self.op.shutdown_timeout)
5529     msg = result.fail_msg
5530     if msg:
5531       if self.op.ignore_consistency:
5532         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5533                              " Proceeding anyway. Please make sure node"
5534                              " %s is down. Error details: %s",
5535                              instance.name, source_node, source_node, msg)
5536       else:
5537         raise errors.OpExecError("Could not shutdown instance %s on"
5538                                  " node %s: %s" %
5539                                  (instance.name, source_node, msg))
5540
5541     feedback_fn("* deactivating the instance's disks on source node")
5542     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5543       raise errors.OpExecError("Can't shut down the instance's disks.")
5544
5545     instance.primary_node = target_node
5546     # distribute new instance config to the other nodes
5547     self.cfg.Update(instance, feedback_fn)
5548
5549     # Only start the instance if it's marked as up
5550     if instance.admin_up:
5551       feedback_fn("* activating the instance's disks on target node")
5552       logging.info("Starting instance %s on node %s",
5553                    instance.name, target_node)
5554
5555       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5556                                            ignore_secondaries=True)
5557       if not disks_ok:
5558         _ShutdownInstanceDisks(self, instance)
5559         raise errors.OpExecError("Can't activate the instance's disks")
5560
5561       feedback_fn("* starting the instance on the target node")
5562       result = self.rpc.call_instance_start(target_node, instance, None, None)
5563       msg = result.fail_msg
5564       if msg:
5565         _ShutdownInstanceDisks(self, instance)
5566         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5567                                  (instance.name, target_node, msg))
5568
5569
5570 class LUMigrateInstance(LogicalUnit):
5571   """Migrate an instance.
5572
5573   This is migration without shutting down, compared to the failover,
5574   which is done with shutdown.
5575
5576   """
5577   HPATH = "instance-migrate"
5578   HTYPE = constants.HTYPE_INSTANCE
5579   _OP_PARAMS = [
5580     _PInstanceName,
5581     _PMigrationMode,
5582     _PMigrationLive,
5583     ("cleanup", False, _TBool),
5584     ]
5585
5586   REQ_BGL = False
5587
5588   def ExpandNames(self):
5589     self._ExpandAndLockInstance()
5590
5591     self.needed_locks[locking.LEVEL_NODE] = []
5592     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5593
5594     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5595                                        self.op.cleanup)
5596     self.tasklets = [self._migrater]
5597
5598   def DeclareLocks(self, level):
5599     if level == locking.LEVEL_NODE:
5600       self._LockInstancesNodes()
5601
5602   def BuildHooksEnv(self):
5603     """Build hooks env.
5604
5605     This runs on master, primary and secondary nodes of the instance.
5606
5607     """
5608     instance = self._migrater.instance
5609     source_node = instance.primary_node
5610     target_node = instance.secondary_nodes[0]
5611     env = _BuildInstanceHookEnvByObject(self, instance)
5612     env["MIGRATE_LIVE"] = self._migrater.live
5613     env["MIGRATE_CLEANUP"] = self.op.cleanup
5614     env.update({
5615         "OLD_PRIMARY": source_node,
5616         "OLD_SECONDARY": target_node,
5617         "NEW_PRIMARY": target_node,
5618         "NEW_SECONDARY": source_node,
5619         })
5620     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5621     nl_post = list(nl)
5622     nl_post.append(source_node)
5623     return env, nl, nl_post
5624
5625
5626 class LUMoveInstance(LogicalUnit):
5627   """Move an instance by data-copying.
5628
5629   """
5630   HPATH = "instance-move"
5631   HTYPE = constants.HTYPE_INSTANCE
5632   _OP_PARAMS = [
5633     _PInstanceName,
5634     ("target_node", _NoDefault, _TNonEmptyString),
5635     _PShutdownTimeout,
5636     ]
5637   REQ_BGL = False
5638
5639   def ExpandNames(self):
5640     self._ExpandAndLockInstance()
5641     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5642     self.op.target_node = target_node
5643     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5644     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5645
5646   def DeclareLocks(self, level):
5647     if level == locking.LEVEL_NODE:
5648       self._LockInstancesNodes(primary_only=True)
5649
5650   def BuildHooksEnv(self):
5651     """Build hooks env.
5652
5653     This runs on master, primary and secondary nodes of the instance.
5654
5655     """
5656     env = {
5657       "TARGET_NODE": self.op.target_node,
5658       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5659       }
5660     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5661     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5662                                        self.op.target_node]
5663     return env, nl, nl
5664
5665   def CheckPrereq(self):
5666     """Check prerequisites.
5667
5668     This checks that the instance is in the cluster.
5669
5670     """
5671     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5672     assert self.instance is not None, \
5673       "Cannot retrieve locked instance %s" % self.op.instance_name
5674
5675     node = self.cfg.GetNodeInfo(self.op.target_node)
5676     assert node is not None, \
5677       "Cannot retrieve locked node %s" % self.op.target_node
5678
5679     self.target_node = target_node = node.name
5680
5681     if target_node == instance.primary_node:
5682       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5683                                  (instance.name, target_node),
5684                                  errors.ECODE_STATE)
5685
5686     bep = self.cfg.GetClusterInfo().FillBE(instance)
5687
5688     for idx, dsk in enumerate(instance.disks):
5689       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5690         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5691                                    " cannot copy" % idx, errors.ECODE_STATE)
5692
5693     _CheckNodeOnline(self, target_node)
5694     _CheckNodeNotDrained(self, target_node)
5695
5696     if instance.admin_up:
5697       # check memory requirements on the secondary node
5698       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5699                            instance.name, bep[constants.BE_MEMORY],
5700                            instance.hypervisor)
5701     else:
5702       self.LogInfo("Not checking memory on the secondary node as"
5703                    " instance will not be started")
5704
5705     # check bridge existance
5706     _CheckInstanceBridgesExist(self, instance, node=target_node)
5707
5708   def Exec(self, feedback_fn):
5709     """Move an instance.
5710
5711     The move is done by shutting it down on its present node, copying
5712     the data over (slow) and starting it on the new node.
5713
5714     """
5715     instance = self.instance
5716
5717     source_node = instance.primary_node
5718     target_node = self.target_node
5719
5720     self.LogInfo("Shutting down instance %s on source node %s",
5721                  instance.name, source_node)
5722
5723     result = self.rpc.call_instance_shutdown(source_node, instance,
5724                                              self.op.shutdown_timeout)
5725     msg = result.fail_msg
5726     if msg:
5727       if self.op.ignore_consistency:
5728         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5729                              " Proceeding anyway. Please make sure node"
5730                              " %s is down. Error details: %s",
5731                              instance.name, source_node, source_node, msg)
5732       else:
5733         raise errors.OpExecError("Could not shutdown instance %s on"
5734                                  " node %s: %s" %
5735                                  (instance.name, source_node, msg))
5736
5737     # create the target disks
5738     try:
5739       _CreateDisks(self, instance, target_node=target_node)
5740     except errors.OpExecError:
5741       self.LogWarning("Device creation failed, reverting...")
5742       try:
5743         _RemoveDisks(self, instance, target_node=target_node)
5744       finally:
5745         self.cfg.ReleaseDRBDMinors(instance.name)
5746         raise
5747
5748     cluster_name = self.cfg.GetClusterInfo().cluster_name
5749
5750     errs = []
5751     # activate, get path, copy the data over
5752     for idx, disk in enumerate(instance.disks):
5753       self.LogInfo("Copying data for disk %d", idx)
5754       result = self.rpc.call_blockdev_assemble(target_node, disk,
5755                                                instance.name, True)
5756       if result.fail_msg:
5757         self.LogWarning("Can't assemble newly created disk %d: %s",
5758                         idx, result.fail_msg)
5759         errs.append(result.fail_msg)
5760         break
5761       dev_path = result.payload
5762       result = self.rpc.call_blockdev_export(source_node, disk,
5763                                              target_node, dev_path,
5764                                              cluster_name)
5765       if result.fail_msg:
5766         self.LogWarning("Can't copy data over for disk %d: %s",
5767                         idx, result.fail_msg)
5768         errs.append(result.fail_msg)
5769         break
5770
5771     if errs:
5772       self.LogWarning("Some disks failed to copy, aborting")
5773       try:
5774         _RemoveDisks(self, instance, target_node=target_node)
5775       finally:
5776         self.cfg.ReleaseDRBDMinors(instance.name)
5777         raise errors.OpExecError("Errors during disk copy: %s" %
5778                                  (",".join(errs),))
5779
5780     instance.primary_node = target_node
5781     self.cfg.Update(instance, feedback_fn)
5782
5783     self.LogInfo("Removing the disks on the original node")
5784     _RemoveDisks(self, instance, target_node=source_node)
5785
5786     # Only start the instance if it's marked as up
5787     if instance.admin_up:
5788       self.LogInfo("Starting instance %s on node %s",
5789                    instance.name, target_node)
5790
5791       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5792                                            ignore_secondaries=True)
5793       if not disks_ok:
5794         _ShutdownInstanceDisks(self, instance)
5795         raise errors.OpExecError("Can't activate the instance's disks")
5796
5797       result = self.rpc.call_instance_start(target_node, instance, None, None)
5798       msg = result.fail_msg
5799       if msg:
5800         _ShutdownInstanceDisks(self, instance)
5801         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5802                                  (instance.name, target_node, msg))
5803
5804
5805 class LUMigrateNode(LogicalUnit):
5806   """Migrate all instances from a node.
5807
5808   """
5809   HPATH = "node-migrate"
5810   HTYPE = constants.HTYPE_NODE
5811   _OP_PARAMS = [
5812     _PNodeName,
5813     _PMigrationMode,
5814     _PMigrationLive,
5815     ]
5816   REQ_BGL = False
5817
5818   def ExpandNames(self):
5819     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5820
5821     self.needed_locks = {
5822       locking.LEVEL_NODE: [self.op.node_name],
5823       }
5824
5825     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5826
5827     # Create tasklets for migrating instances for all instances on this node
5828     names = []
5829     tasklets = []
5830
5831     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5832       logging.debug("Migrating instance %s", inst.name)
5833       names.append(inst.name)
5834
5835       tasklets.append(TLMigrateInstance(self, inst.name, False))
5836
5837     self.tasklets = tasklets
5838
5839     # Declare instance locks
5840     self.needed_locks[locking.LEVEL_INSTANCE] = names
5841
5842   def DeclareLocks(self, level):
5843     if level == locking.LEVEL_NODE:
5844       self._LockInstancesNodes()
5845
5846   def BuildHooksEnv(self):
5847     """Build hooks env.
5848
5849     This runs on the master, the primary and all the secondaries.
5850
5851     """
5852     env = {
5853       "NODE_NAME": self.op.node_name,
5854       }
5855
5856     nl = [self.cfg.GetMasterNode()]
5857
5858     return (env, nl, nl)
5859
5860
5861 class TLMigrateInstance(Tasklet):
5862   """Tasklet class for instance migration.
5863
5864   @type live: boolean
5865   @ivar live: whether the migration will be done live or non-live;
5866       this variable is initalized only after CheckPrereq has run
5867
5868   """
5869   def __init__(self, lu, instance_name, cleanup):
5870     """Initializes this class.
5871
5872     """
5873     Tasklet.__init__(self, lu)
5874
5875     # Parameters
5876     self.instance_name = instance_name
5877     self.cleanup = cleanup
5878     self.live = False # will be overridden later
5879
5880   def CheckPrereq(self):
5881     """Check prerequisites.
5882
5883     This checks that the instance is in the cluster.
5884
5885     """
5886     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5887     instance = self.cfg.GetInstanceInfo(instance_name)
5888     assert instance is not None
5889
5890     if instance.disk_template != constants.DT_DRBD8:
5891       raise errors.OpPrereqError("Instance's disk layout is not"
5892                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5893
5894     secondary_nodes = instance.secondary_nodes
5895     if not secondary_nodes:
5896       raise errors.ConfigurationError("No secondary node but using"
5897                                       " drbd8 disk template")
5898
5899     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5900
5901     target_node = secondary_nodes[0]
5902     # check memory requirements on the secondary node
5903     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5904                          instance.name, i_be[constants.BE_MEMORY],
5905                          instance.hypervisor)
5906
5907     # check bridge existance
5908     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5909
5910     if not self.cleanup:
5911       _CheckNodeNotDrained(self.lu, target_node)
5912       result = self.rpc.call_instance_migratable(instance.primary_node,
5913                                                  instance)
5914       result.Raise("Can't migrate, please use failover",
5915                    prereq=True, ecode=errors.ECODE_STATE)
5916
5917     self.instance = instance
5918
5919     if self.lu.op.live is not None and self.lu.op.mode is not None:
5920       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5921                                  " parameters are accepted",
5922                                  errors.ECODE_INVAL)
5923     if self.lu.op.live is not None:
5924       if self.lu.op.live:
5925         self.lu.op.mode = constants.HT_MIGRATION_LIVE
5926       else:
5927         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5928       # reset the 'live' parameter to None so that repeated
5929       # invocations of CheckPrereq do not raise an exception
5930       self.lu.op.live = None
5931     elif self.lu.op.mode is None:
5932       # read the default value from the hypervisor
5933       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5934       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5935
5936     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5937
5938   def _WaitUntilSync(self):
5939     """Poll with custom rpc for disk sync.
5940
5941     This uses our own step-based rpc call.
5942
5943     """
5944     self.feedback_fn("* wait until resync is done")
5945     all_done = False
5946     while not all_done:
5947       all_done = True
5948       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5949                                             self.nodes_ip,
5950                                             self.instance.disks)
5951       min_percent = 100
5952       for node, nres in result.items():
5953         nres.Raise("Cannot resync disks on node %s" % node)
5954         node_done, node_percent = nres.payload
5955         all_done = all_done and node_done
5956         if node_percent is not None:
5957           min_percent = min(min_percent, node_percent)
5958       if not all_done:
5959         if min_percent < 100:
5960           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5961         time.sleep(2)
5962
5963   def _EnsureSecondary(self, node):
5964     """Demote a node to secondary.
5965
5966     """
5967     self.feedback_fn("* switching node %s to secondary mode" % node)
5968
5969     for dev in self.instance.disks:
5970       self.cfg.SetDiskID(dev, node)
5971
5972     result = self.rpc.call_blockdev_close(node, self.instance.name,
5973                                           self.instance.disks)
5974     result.Raise("Cannot change disk to secondary on node %s" % node)
5975
5976   def _GoStandalone(self):
5977     """Disconnect from the network.
5978
5979     """
5980     self.feedback_fn("* changing into standalone mode")
5981     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5982                                                self.instance.disks)
5983     for node, nres in result.items():
5984       nres.Raise("Cannot disconnect disks node %s" % node)
5985
5986   def _GoReconnect(self, multimaster):
5987     """Reconnect to the network.
5988
5989     """
5990     if multimaster:
5991       msg = "dual-master"
5992     else:
5993       msg = "single-master"
5994     self.feedback_fn("* changing disks into %s mode" % msg)
5995     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5996                                            self.instance.disks,
5997                                            self.instance.name, multimaster)
5998     for node, nres in result.items():
5999       nres.Raise("Cannot change disks config on node %s" % node)
6000
6001   def _ExecCleanup(self):
6002     """Try to cleanup after a failed migration.
6003
6004     The cleanup is done by:
6005       - check that the instance is running only on one node
6006         (and update the config if needed)
6007       - change disks on its secondary node to secondary
6008       - wait until disks are fully synchronized
6009       - disconnect from the network
6010       - change disks into single-master mode
6011       - wait again until disks are fully synchronized
6012
6013     """
6014     instance = self.instance
6015     target_node = self.target_node
6016     source_node = self.source_node
6017
6018     # check running on only one node
6019     self.feedback_fn("* checking where the instance actually runs"
6020                      " (if this hangs, the hypervisor might be in"
6021                      " a bad state)")
6022     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6023     for node, result in ins_l.items():
6024       result.Raise("Can't contact node %s" % node)
6025
6026     runningon_source = instance.name in ins_l[source_node].payload
6027     runningon_target = instance.name in ins_l[target_node].payload
6028
6029     if runningon_source and runningon_target:
6030       raise errors.OpExecError("Instance seems to be running on two nodes,"
6031                                " or the hypervisor is confused. You will have"
6032                                " to ensure manually that it runs only on one"
6033                                " and restart this operation.")
6034
6035     if not (runningon_source or runningon_target):
6036       raise errors.OpExecError("Instance does not seem to be running at all."
6037                                " In this case, it's safer to repair by"
6038                                " running 'gnt-instance stop' to ensure disk"
6039                                " shutdown, and then restarting it.")
6040
6041     if runningon_target:
6042       # the migration has actually succeeded, we need to update the config
6043       self.feedback_fn("* instance running on secondary node (%s),"
6044                        " updating config" % target_node)
6045       instance.primary_node = target_node
6046       self.cfg.Update(instance, self.feedback_fn)
6047       demoted_node = source_node
6048     else:
6049       self.feedback_fn("* instance confirmed to be running on its"
6050                        " primary node (%s)" % source_node)
6051       demoted_node = target_node
6052
6053     self._EnsureSecondary(demoted_node)
6054     try:
6055       self._WaitUntilSync()
6056     except errors.OpExecError:
6057       # we ignore here errors, since if the device is standalone, it
6058       # won't be able to sync
6059       pass
6060     self._GoStandalone()
6061     self._GoReconnect(False)
6062     self._WaitUntilSync()
6063
6064     self.feedback_fn("* done")
6065
6066   def _RevertDiskStatus(self):
6067     """Try to revert the disk status after a failed migration.
6068
6069     """
6070     target_node = self.target_node
6071     try:
6072       self._EnsureSecondary(target_node)
6073       self._GoStandalone()
6074       self._GoReconnect(False)
6075       self._WaitUntilSync()
6076     except errors.OpExecError, err:
6077       self.lu.LogWarning("Migration failed and I can't reconnect the"
6078                          " drives: error '%s'\n"
6079                          "Please look and recover the instance status" %
6080                          str(err))
6081
6082   def _AbortMigration(self):
6083     """Call the hypervisor code to abort a started migration.
6084
6085     """
6086     instance = self.instance
6087     target_node = self.target_node
6088     migration_info = self.migration_info
6089
6090     abort_result = self.rpc.call_finalize_migration(target_node,
6091                                                     instance,
6092                                                     migration_info,
6093                                                     False)
6094     abort_msg = abort_result.fail_msg
6095     if abort_msg:
6096       logging.error("Aborting migration failed on target node %s: %s",
6097                     target_node, abort_msg)
6098       # Don't raise an exception here, as we stil have to try to revert the
6099       # disk status, even if this step failed.
6100
6101   def _ExecMigration(self):
6102     """Migrate an instance.
6103
6104     The migrate is done by:
6105       - change the disks into dual-master mode
6106       - wait until disks are fully synchronized again
6107       - migrate the instance
6108       - change disks on the new secondary node (the old primary) to secondary
6109       - wait until disks are fully synchronized
6110       - change disks into single-master mode
6111
6112     """
6113     instance = self.instance
6114     target_node = self.target_node
6115     source_node = self.source_node
6116
6117     self.feedback_fn("* checking disk consistency between source and target")
6118     for dev in instance.disks:
6119       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6120         raise errors.OpExecError("Disk %s is degraded or not fully"
6121                                  " synchronized on target node,"
6122                                  " aborting migrate." % dev.iv_name)
6123
6124     # First get the migration information from the remote node
6125     result = self.rpc.call_migration_info(source_node, instance)
6126     msg = result.fail_msg
6127     if msg:
6128       log_err = ("Failed fetching source migration information from %s: %s" %
6129                  (source_node, msg))
6130       logging.error(log_err)
6131       raise errors.OpExecError(log_err)
6132
6133     self.migration_info = migration_info = result.payload
6134
6135     # Then switch the disks to master/master mode
6136     self._EnsureSecondary(target_node)
6137     self._GoStandalone()
6138     self._GoReconnect(True)
6139     self._WaitUntilSync()
6140
6141     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6142     result = self.rpc.call_accept_instance(target_node,
6143                                            instance,
6144                                            migration_info,
6145                                            self.nodes_ip[target_node])
6146
6147     msg = result.fail_msg
6148     if msg:
6149       logging.error("Instance pre-migration failed, trying to revert"
6150                     " disk status: %s", msg)
6151       self.feedback_fn("Pre-migration failed, aborting")
6152       self._AbortMigration()
6153       self._RevertDiskStatus()
6154       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6155                                (instance.name, msg))
6156
6157     self.feedback_fn("* migrating instance to %s" % target_node)
6158     time.sleep(10)
6159     result = self.rpc.call_instance_migrate(source_node, instance,
6160                                             self.nodes_ip[target_node],
6161                                             self.live)
6162     msg = result.fail_msg
6163     if msg:
6164       logging.error("Instance migration failed, trying to revert"
6165                     " disk status: %s", msg)
6166       self.feedback_fn("Migration failed, aborting")
6167       self._AbortMigration()
6168       self._RevertDiskStatus()
6169       raise errors.OpExecError("Could not migrate instance %s: %s" %
6170                                (instance.name, msg))
6171     time.sleep(10)
6172
6173     instance.primary_node = target_node
6174     # distribute new instance config to the other nodes
6175     self.cfg.Update(instance, self.feedback_fn)
6176
6177     result = self.rpc.call_finalize_migration(target_node,
6178                                               instance,
6179                                               migration_info,
6180                                               True)
6181     msg = result.fail_msg
6182     if msg:
6183       logging.error("Instance migration succeeded, but finalization failed:"
6184                     " %s", msg)
6185       raise errors.OpExecError("Could not finalize instance migration: %s" %
6186                                msg)
6187
6188     self._EnsureSecondary(source_node)
6189     self._WaitUntilSync()
6190     self._GoStandalone()
6191     self._GoReconnect(False)
6192     self._WaitUntilSync()
6193
6194     self.feedback_fn("* done")
6195
6196   def Exec(self, feedback_fn):
6197     """Perform the migration.
6198
6199     """
6200     feedback_fn("Migrating instance %s" % self.instance.name)
6201
6202     self.feedback_fn = feedback_fn
6203
6204     self.source_node = self.instance.primary_node
6205     self.target_node = self.instance.secondary_nodes[0]
6206     self.all_nodes = [self.source_node, self.target_node]
6207     self.nodes_ip = {
6208       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6209       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6210       }
6211
6212     if self.cleanup:
6213       return self._ExecCleanup()
6214     else:
6215       return self._ExecMigration()
6216
6217
6218 def _CreateBlockDev(lu, node, instance, device, force_create,
6219                     info, force_open):
6220   """Create a tree of block devices on a given node.
6221
6222   If this device type has to be created on secondaries, create it and
6223   all its children.
6224
6225   If not, just recurse to children keeping the same 'force' value.
6226
6227   @param lu: the lu on whose behalf we execute
6228   @param node: the node on which to create the device
6229   @type instance: L{objects.Instance}
6230   @param instance: the instance which owns the device
6231   @type device: L{objects.Disk}
6232   @param device: the device to create
6233   @type force_create: boolean
6234   @param force_create: whether to force creation of this device; this
6235       will be change to True whenever we find a device which has
6236       CreateOnSecondary() attribute
6237   @param info: the extra 'metadata' we should attach to the device
6238       (this will be represented as a LVM tag)
6239   @type force_open: boolean
6240   @param force_open: this parameter will be passes to the
6241       L{backend.BlockdevCreate} function where it specifies
6242       whether we run on primary or not, and it affects both
6243       the child assembly and the device own Open() execution
6244
6245   """
6246   if device.CreateOnSecondary():
6247     force_create = True
6248
6249   if device.children:
6250     for child in device.children:
6251       _CreateBlockDev(lu, node, instance, child, force_create,
6252                       info, force_open)
6253
6254   if not force_create:
6255     return
6256
6257   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6258
6259
6260 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6261   """Create a single block device on a given node.
6262
6263   This will not recurse over children of the device, so they must be
6264   created in advance.
6265
6266   @param lu: the lu on whose behalf we execute
6267   @param node: the node on which to create the device
6268   @type instance: L{objects.Instance}
6269   @param instance: the instance which owns the device
6270   @type device: L{objects.Disk}
6271   @param device: the device to create
6272   @param info: the extra 'metadata' we should attach to the device
6273       (this will be represented as a LVM tag)
6274   @type force_open: boolean
6275   @param force_open: this parameter will be passes to the
6276       L{backend.BlockdevCreate} function where it specifies
6277       whether we run on primary or not, and it affects both
6278       the child assembly and the device own Open() execution
6279
6280   """
6281   lu.cfg.SetDiskID(device, node)
6282   result = lu.rpc.call_blockdev_create(node, device, device.size,
6283                                        instance.name, force_open, info)
6284   result.Raise("Can't create block device %s on"
6285                " node %s for instance %s" % (device, node, instance.name))
6286   if device.physical_id is None:
6287     device.physical_id = result.payload
6288
6289
6290 def _GenerateUniqueNames(lu, exts):
6291   """Generate a suitable LV name.
6292
6293   This will generate a logical volume name for the given instance.
6294
6295   """
6296   results = []
6297   for val in exts:
6298     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6299     results.append("%s%s" % (new_id, val))
6300   return results
6301
6302
6303 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6304                          p_minor, s_minor):
6305   """Generate a drbd8 device complete with its children.
6306
6307   """
6308   port = lu.cfg.AllocatePort()
6309   vgname = lu.cfg.GetVGName()
6310   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6311   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6312                           logical_id=(vgname, names[0]))
6313   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6314                           logical_id=(vgname, names[1]))
6315   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6316                           logical_id=(primary, secondary, port,
6317                                       p_minor, s_minor,
6318                                       shared_secret),
6319                           children=[dev_data, dev_meta],
6320                           iv_name=iv_name)
6321   return drbd_dev
6322
6323
6324 def _GenerateDiskTemplate(lu, template_name,
6325                           instance_name, primary_node,
6326                           secondary_nodes, disk_info,
6327                           file_storage_dir, file_driver,
6328                           base_index):
6329   """Generate the entire disk layout for a given template type.
6330
6331   """
6332   #TODO: compute space requirements
6333
6334   vgname = lu.cfg.GetVGName()
6335   disk_count = len(disk_info)
6336   disks = []
6337   if template_name == constants.DT_DISKLESS:
6338     pass
6339   elif template_name == constants.DT_PLAIN:
6340     if len(secondary_nodes) != 0:
6341       raise errors.ProgrammerError("Wrong template configuration")
6342
6343     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6344                                       for i in range(disk_count)])
6345     for idx, disk in enumerate(disk_info):
6346       disk_index = idx + base_index
6347       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6348                               logical_id=(vgname, names[idx]),
6349                               iv_name="disk/%d" % disk_index,
6350                               mode=disk["mode"])
6351       disks.append(disk_dev)
6352   elif template_name == constants.DT_DRBD8:
6353     if len(secondary_nodes) != 1:
6354       raise errors.ProgrammerError("Wrong template configuration")
6355     remote_node = secondary_nodes[0]
6356     minors = lu.cfg.AllocateDRBDMinor(
6357       [primary_node, remote_node] * len(disk_info), instance_name)
6358
6359     names = []
6360     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6361                                                for i in range(disk_count)]):
6362       names.append(lv_prefix + "_data")
6363       names.append(lv_prefix + "_meta")
6364     for idx, disk in enumerate(disk_info):
6365       disk_index = idx + base_index
6366       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6367                                       disk["size"], names[idx*2:idx*2+2],
6368                                       "disk/%d" % disk_index,
6369                                       minors[idx*2], minors[idx*2+1])
6370       disk_dev.mode = disk["mode"]
6371       disks.append(disk_dev)
6372   elif template_name == constants.DT_FILE:
6373     if len(secondary_nodes) != 0:
6374       raise errors.ProgrammerError("Wrong template configuration")
6375
6376     _RequireFileStorage()
6377
6378     for idx, disk in enumerate(disk_info):
6379       disk_index = idx + base_index
6380       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6381                               iv_name="disk/%d" % disk_index,
6382                               logical_id=(file_driver,
6383                                           "%s/disk%d" % (file_storage_dir,
6384                                                          disk_index)),
6385                               mode=disk["mode"])
6386       disks.append(disk_dev)
6387   else:
6388     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6389   return disks
6390
6391
6392 def _GetInstanceInfoText(instance):
6393   """Compute that text that should be added to the disk's metadata.
6394
6395   """
6396   return "originstname+%s" % instance.name
6397
6398
6399 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6400   """Create all disks for an instance.
6401
6402   This abstracts away some work from AddInstance.
6403
6404   @type lu: L{LogicalUnit}
6405   @param lu: the logical unit on whose behalf we execute
6406   @type instance: L{objects.Instance}
6407   @param instance: the instance whose disks we should create
6408   @type to_skip: list
6409   @param to_skip: list of indices to skip
6410   @type target_node: string
6411   @param target_node: if passed, overrides the target node for creation
6412   @rtype: boolean
6413   @return: the success of the creation
6414
6415   """
6416   info = _GetInstanceInfoText(instance)
6417   if target_node is None:
6418     pnode = instance.primary_node
6419     all_nodes = instance.all_nodes
6420   else:
6421     pnode = target_node
6422     all_nodes = [pnode]
6423
6424   if instance.disk_template == constants.DT_FILE:
6425     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6426     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6427
6428     result.Raise("Failed to create directory '%s' on"
6429                  " node %s" % (file_storage_dir, pnode))
6430
6431   # Note: this needs to be kept in sync with adding of disks in
6432   # LUSetInstanceParams
6433   for idx, device in enumerate(instance.disks):
6434     if to_skip and idx in to_skip:
6435       continue
6436     logging.info("Creating volume %s for instance %s",
6437                  device.iv_name, instance.name)
6438     #HARDCODE
6439     for node in all_nodes:
6440       f_create = node == pnode
6441       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6442
6443
6444 def _RemoveDisks(lu, instance, target_node=None):
6445   """Remove all disks for an instance.
6446
6447   This abstracts away some work from `AddInstance()` and
6448   `RemoveInstance()`. Note that in case some of the devices couldn't
6449   be removed, the removal will continue with the other ones (compare
6450   with `_CreateDisks()`).
6451
6452   @type lu: L{LogicalUnit}
6453   @param lu: the logical unit on whose behalf we execute
6454   @type instance: L{objects.Instance}
6455   @param instance: the instance whose disks we should remove
6456   @type target_node: string
6457   @param target_node: used to override the node on which to remove the disks
6458   @rtype: boolean
6459   @return: the success of the removal
6460
6461   """
6462   logging.info("Removing block devices for instance %s", instance.name)
6463
6464   all_result = True
6465   for device in instance.disks:
6466     if target_node:
6467       edata = [(target_node, device)]
6468     else:
6469       edata = device.ComputeNodeTree(instance.primary_node)
6470     for node, disk in edata:
6471       lu.cfg.SetDiskID(disk, node)
6472       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6473       if msg:
6474         lu.LogWarning("Could not remove block device %s on node %s,"
6475                       " continuing anyway: %s", device.iv_name, node, msg)
6476         all_result = False
6477
6478   if instance.disk_template == constants.DT_FILE:
6479     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6480     if target_node:
6481       tgt = target_node
6482     else:
6483       tgt = instance.primary_node
6484     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6485     if result.fail_msg:
6486       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6487                     file_storage_dir, instance.primary_node, result.fail_msg)
6488       all_result = False
6489
6490   return all_result
6491
6492
6493 def _ComputeDiskSize(disk_template, disks):
6494   """Compute disk size requirements in the volume group
6495
6496   """
6497   # Required free disk space as a function of disk and swap space
6498   req_size_dict = {
6499     constants.DT_DISKLESS: None,
6500     constants.DT_PLAIN: sum(d["size"] for d in disks),
6501     # 128 MB are added for drbd metadata for each disk
6502     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6503     constants.DT_FILE: None,
6504   }
6505
6506   if disk_template not in req_size_dict:
6507     raise errors.ProgrammerError("Disk template '%s' size requirement"
6508                                  " is unknown" %  disk_template)
6509
6510   return req_size_dict[disk_template]
6511
6512
6513 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6514   """Hypervisor parameter validation.
6515
6516   This function abstract the hypervisor parameter validation to be
6517   used in both instance create and instance modify.
6518
6519   @type lu: L{LogicalUnit}
6520   @param lu: the logical unit for which we check
6521   @type nodenames: list
6522   @param nodenames: the list of nodes on which we should check
6523   @type hvname: string
6524   @param hvname: the name of the hypervisor we should use
6525   @type hvparams: dict
6526   @param hvparams: the parameters which we need to check
6527   @raise errors.OpPrereqError: if the parameters are not valid
6528
6529   """
6530   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6531                                                   hvname,
6532                                                   hvparams)
6533   for node in nodenames:
6534     info = hvinfo[node]
6535     if info.offline:
6536       continue
6537     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6538
6539
6540 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6541   """OS parameters validation.
6542
6543   @type lu: L{LogicalUnit}
6544   @param lu: the logical unit for which we check
6545   @type required: boolean
6546   @param required: whether the validation should fail if the OS is not
6547       found
6548   @type nodenames: list
6549   @param nodenames: the list of nodes on which we should check
6550   @type osname: string
6551   @param osname: the name of the hypervisor we should use
6552   @type osparams: dict
6553   @param osparams: the parameters which we need to check
6554   @raise errors.OpPrereqError: if the parameters are not valid
6555
6556   """
6557   result = lu.rpc.call_os_validate(required, nodenames, osname,
6558                                    [constants.OS_VALIDATE_PARAMETERS],
6559                                    osparams)
6560   for node, nres in result.items():
6561     # we don't check for offline cases since this should be run only
6562     # against the master node and/or an instance's nodes
6563     nres.Raise("OS Parameters validation failed on node %s" % node)
6564     if not nres.payload:
6565       lu.LogInfo("OS %s not found on node %s, validation skipped",
6566                  osname, node)
6567
6568
6569 class LUCreateInstance(LogicalUnit):
6570   """Create an instance.
6571
6572   """
6573   HPATH = "instance-add"
6574   HTYPE = constants.HTYPE_INSTANCE
6575   _OP_PARAMS = [
6576     _PInstanceName,
6577     ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6578     ("start", True, _TBool),
6579     ("wait_for_sync", True, _TBool),
6580     ("ip_check", True, _TBool),
6581     ("name_check", True, _TBool),
6582     ("disks", _NoDefault, _TListOf(_TDict)),
6583     ("nics", _NoDefault, _TListOf(_TDict)),
6584     ("hvparams", _EmptyDict, _TDict),
6585     ("beparams", _EmptyDict, _TDict),
6586     ("osparams", _EmptyDict, _TDict),
6587     ("no_install", None, _TMaybeBool),
6588     ("os_type", None, _TMaybeString),
6589     ("force_variant", False, _TBool),
6590     ("source_handshake", None, _TOr(_TList, _TNone)),
6591     ("source_x509_ca", None, _TMaybeString),
6592     ("source_instance_name", None, _TMaybeString),
6593     ("src_node", None, _TMaybeString),
6594     ("src_path", None, _TMaybeString),
6595     ("pnode", None, _TMaybeString),
6596     ("snode", None, _TMaybeString),
6597     ("iallocator", None, _TMaybeString),
6598     ("hypervisor", None, _TMaybeString),
6599     ("disk_template", _NoDefault, _CheckDiskTemplate),
6600     ("identify_defaults", False, _TBool),
6601     ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6602     ("file_storage_dir", None, _TMaybeString),
6603     ]
6604   REQ_BGL = False
6605
6606   def CheckArguments(self):
6607     """Check arguments.
6608
6609     """
6610     # do not require name_check to ease forward/backward compatibility
6611     # for tools
6612     if self.op.no_install and self.op.start:
6613       self.LogInfo("No-installation mode selected, disabling startup")
6614       self.op.start = False
6615     # validate/normalize the instance name
6616     self.op.instance_name = \
6617       netutils.HostInfo.NormalizeName(self.op.instance_name)
6618
6619     if self.op.ip_check and not self.op.name_check:
6620       # TODO: make the ip check more flexible and not depend on the name check
6621       raise errors.OpPrereqError("Cannot do ip check without a name check",
6622                                  errors.ECODE_INVAL)
6623
6624     # check nics' parameter names
6625     for nic in self.op.nics:
6626       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6627
6628     # check disks. parameter names and consistent adopt/no-adopt strategy
6629     has_adopt = has_no_adopt = False
6630     for disk in self.op.disks:
6631       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6632       if "adopt" in disk:
6633         has_adopt = True
6634       else:
6635         has_no_adopt = True
6636     if has_adopt and has_no_adopt:
6637       raise errors.OpPrereqError("Either all disks are adopted or none is",
6638                                  errors.ECODE_INVAL)
6639     if has_adopt:
6640       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6641         raise errors.OpPrereqError("Disk adoption is not supported for the"
6642                                    " '%s' disk template" %
6643                                    self.op.disk_template,
6644                                    errors.ECODE_INVAL)
6645       if self.op.iallocator is not None:
6646         raise errors.OpPrereqError("Disk adoption not allowed with an"
6647                                    " iallocator script", errors.ECODE_INVAL)
6648       if self.op.mode == constants.INSTANCE_IMPORT:
6649         raise errors.OpPrereqError("Disk adoption not allowed for"
6650                                    " instance import", errors.ECODE_INVAL)
6651
6652     self.adopt_disks = has_adopt
6653
6654     # instance name verification
6655     if self.op.name_check:
6656       self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6657       self.op.instance_name = self.hostname1.name
6658       # used in CheckPrereq for ip ping check
6659       self.check_ip = self.hostname1.ip
6660     else:
6661       self.check_ip = None
6662
6663     # file storage checks
6664     if (self.op.file_driver and
6665         not self.op.file_driver in constants.FILE_DRIVER):
6666       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6667                                  self.op.file_driver, errors.ECODE_INVAL)
6668
6669     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6670       raise errors.OpPrereqError("File storage directory path not absolute",
6671                                  errors.ECODE_INVAL)
6672
6673     ### Node/iallocator related checks
6674     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6675
6676     if self.op.pnode is not None:
6677       if self.op.disk_template in constants.DTS_NET_MIRROR:
6678         if self.op.snode is None:
6679           raise errors.OpPrereqError("The networked disk templates need"
6680                                      " a mirror node", errors.ECODE_INVAL)
6681       elif self.op.snode:
6682         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6683                         " template")
6684         self.op.snode = None
6685
6686     self._cds = _GetClusterDomainSecret()
6687
6688     if self.op.mode == constants.INSTANCE_IMPORT:
6689       # On import force_variant must be True, because if we forced it at
6690       # initial install, our only chance when importing it back is that it
6691       # works again!
6692       self.op.force_variant = True
6693
6694       if self.op.no_install:
6695         self.LogInfo("No-installation mode has no effect during import")
6696
6697     elif self.op.mode == constants.INSTANCE_CREATE:
6698       if self.op.os_type is None:
6699         raise errors.OpPrereqError("No guest OS specified",
6700                                    errors.ECODE_INVAL)
6701       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6702         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6703                                    " installation" % self.op.os_type,
6704                                    errors.ECODE_STATE)
6705       if self.op.disk_template is None:
6706         raise errors.OpPrereqError("No disk template specified",
6707                                    errors.ECODE_INVAL)
6708
6709     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6710       # Check handshake to ensure both clusters have the same domain secret
6711       src_handshake = self.op.source_handshake
6712       if not src_handshake:
6713         raise errors.OpPrereqError("Missing source handshake",
6714                                    errors.ECODE_INVAL)
6715
6716       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6717                                                            src_handshake)
6718       if errmsg:
6719         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6720                                    errors.ECODE_INVAL)
6721
6722       # Load and check source CA
6723       self.source_x509_ca_pem = self.op.source_x509_ca
6724       if not self.source_x509_ca_pem:
6725         raise errors.OpPrereqError("Missing source X509 CA",
6726                                    errors.ECODE_INVAL)
6727
6728       try:
6729         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6730                                                     self._cds)
6731       except OpenSSL.crypto.Error, err:
6732         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6733                                    (err, ), errors.ECODE_INVAL)
6734
6735       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6736       if errcode is not None:
6737         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6738                                    errors.ECODE_INVAL)
6739
6740       self.source_x509_ca = cert
6741
6742       src_instance_name = self.op.source_instance_name
6743       if not src_instance_name:
6744         raise errors.OpPrereqError("Missing source instance name",
6745                                    errors.ECODE_INVAL)
6746
6747       norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6748       self.source_instance_name = netutils.GetHostInfo(norm_name).name
6749
6750     else:
6751       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6752                                  self.op.mode, errors.ECODE_INVAL)
6753
6754   def ExpandNames(self):
6755     """ExpandNames for CreateInstance.
6756
6757     Figure out the right locks for instance creation.
6758
6759     """
6760     self.needed_locks = {}
6761
6762     instance_name = self.op.instance_name
6763     # this is just a preventive check, but someone might still add this
6764     # instance in the meantime, and creation will fail at lock-add time
6765     if instance_name in self.cfg.GetInstanceList():
6766       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6767                                  instance_name, errors.ECODE_EXISTS)
6768
6769     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6770
6771     if self.op.iallocator:
6772       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6773     else:
6774       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6775       nodelist = [self.op.pnode]
6776       if self.op.snode is not None:
6777         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6778         nodelist.append(self.op.snode)
6779       self.needed_locks[locking.LEVEL_NODE] = nodelist
6780
6781     # in case of import lock the source node too
6782     if self.op.mode == constants.INSTANCE_IMPORT:
6783       src_node = self.op.src_node
6784       src_path = self.op.src_path
6785
6786       if src_path is None:
6787         self.op.src_path = src_path = self.op.instance_name
6788
6789       if src_node is None:
6790         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6791         self.op.src_node = None
6792         if os.path.isabs(src_path):
6793           raise errors.OpPrereqError("Importing an instance from an absolute"
6794                                      " path requires a source node option.",
6795                                      errors.ECODE_INVAL)
6796       else:
6797         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6798         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6799           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6800         if not os.path.isabs(src_path):
6801           self.op.src_path = src_path = \
6802             utils.PathJoin(constants.EXPORT_DIR, src_path)
6803
6804   def _RunAllocator(self):
6805     """Run the allocator based on input opcode.
6806
6807     """
6808     nics = [n.ToDict() for n in self.nics]
6809     ial = IAllocator(self.cfg, self.rpc,
6810                      mode=constants.IALLOCATOR_MODE_ALLOC,
6811                      name=self.op.instance_name,
6812                      disk_template=self.op.disk_template,
6813                      tags=[],
6814                      os=self.op.os_type,
6815                      vcpus=self.be_full[constants.BE_VCPUS],
6816                      mem_size=self.be_full[constants.BE_MEMORY],
6817                      disks=self.disks,
6818                      nics=nics,
6819                      hypervisor=self.op.hypervisor,
6820                      )
6821
6822     ial.Run(self.op.iallocator)
6823
6824     if not ial.success:
6825       raise errors.OpPrereqError("Can't compute nodes using"
6826                                  " iallocator '%s': %s" %
6827                                  (self.op.iallocator, ial.info),
6828                                  errors.ECODE_NORES)
6829     if len(ial.result) != ial.required_nodes:
6830       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6831                                  " of nodes (%s), required %s" %
6832                                  (self.op.iallocator, len(ial.result),
6833                                   ial.required_nodes), errors.ECODE_FAULT)
6834     self.op.pnode = ial.result[0]
6835     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6836                  self.op.instance_name, self.op.iallocator,
6837                  utils.CommaJoin(ial.result))
6838     if ial.required_nodes == 2:
6839       self.op.snode = ial.result[1]
6840
6841   def BuildHooksEnv(self):
6842     """Build hooks env.
6843
6844     This runs on master, primary and secondary nodes of the instance.
6845
6846     """
6847     env = {
6848       "ADD_MODE": self.op.mode,
6849       }
6850     if self.op.mode == constants.INSTANCE_IMPORT:
6851       env["SRC_NODE"] = self.op.src_node
6852       env["SRC_PATH"] = self.op.src_path
6853       env["SRC_IMAGES"] = self.src_images
6854
6855     env.update(_BuildInstanceHookEnv(
6856       name=self.op.instance_name,
6857       primary_node=self.op.pnode,
6858       secondary_nodes=self.secondaries,
6859       status=self.op.start,
6860       os_type=self.op.os_type,
6861       memory=self.be_full[constants.BE_MEMORY],
6862       vcpus=self.be_full[constants.BE_VCPUS],
6863       nics=_NICListToTuple(self, self.nics),
6864       disk_template=self.op.disk_template,
6865       disks=[(d["size"], d["mode"]) for d in self.disks],
6866       bep=self.be_full,
6867       hvp=self.hv_full,
6868       hypervisor_name=self.op.hypervisor,
6869     ))
6870
6871     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6872           self.secondaries)
6873     return env, nl, nl
6874
6875   def _ReadExportInfo(self):
6876     """Reads the export information from disk.
6877
6878     It will override the opcode source node and path with the actual
6879     information, if these two were not specified before.
6880
6881     @return: the export information
6882
6883     """
6884     assert self.op.mode == constants.INSTANCE_IMPORT
6885
6886     src_node = self.op.src_node
6887     src_path = self.op.src_path
6888
6889     if src_node is None:
6890       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6891       exp_list = self.rpc.call_export_list(locked_nodes)
6892       found = False
6893       for node in exp_list:
6894         if exp_list[node].fail_msg:
6895           continue
6896         if src_path in exp_list[node].payload:
6897           found = True
6898           self.op.src_node = src_node = node
6899           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6900                                                        src_path)
6901           break
6902       if not found:
6903         raise errors.OpPrereqError("No export found for relative path %s" %
6904                                     src_path, errors.ECODE_INVAL)
6905
6906     _CheckNodeOnline(self, src_node)
6907     result = self.rpc.call_export_info(src_node, src_path)
6908     result.Raise("No export or invalid export found in dir %s" % src_path)
6909
6910     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6911     if not export_info.has_section(constants.INISECT_EXP):
6912       raise errors.ProgrammerError("Corrupted export config",
6913                                    errors.ECODE_ENVIRON)
6914
6915     ei_version = export_info.get(constants.INISECT_EXP, "version")
6916     if (int(ei_version) != constants.EXPORT_VERSION):
6917       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6918                                  (ei_version, constants.EXPORT_VERSION),
6919                                  errors.ECODE_ENVIRON)
6920     return export_info
6921
6922   def _ReadExportParams(self, einfo):
6923     """Use export parameters as defaults.
6924
6925     In case the opcode doesn't specify (as in override) some instance
6926     parameters, then try to use them from the export information, if
6927     that declares them.
6928
6929     """
6930     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6931
6932     if self.op.disk_template is None:
6933       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6934         self.op.disk_template = einfo.get(constants.INISECT_INS,
6935                                           "disk_template")
6936       else:
6937         raise errors.OpPrereqError("No disk template specified and the export"
6938                                    " is missing the disk_template information",
6939                                    errors.ECODE_INVAL)
6940
6941     if not self.op.disks:
6942       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6943         disks = []
6944         # TODO: import the disk iv_name too
6945         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6946           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6947           disks.append({"size": disk_sz})
6948         self.op.disks = disks
6949       else:
6950         raise errors.OpPrereqError("No disk info specified and the export"
6951                                    " is missing the disk information",
6952                                    errors.ECODE_INVAL)
6953
6954     if (not self.op.nics and
6955         einfo.has_option(constants.INISECT_INS, "nic_count")):
6956       nics = []
6957       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6958         ndict = {}
6959         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6960           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6961           ndict[name] = v
6962         nics.append(ndict)
6963       self.op.nics = nics
6964
6965     if (self.op.hypervisor is None and
6966         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6967       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6968     if einfo.has_section(constants.INISECT_HYP):
6969       # use the export parameters but do not override the ones
6970       # specified by the user
6971       for name, value in einfo.items(constants.INISECT_HYP):
6972         if name not in self.op.hvparams:
6973           self.op.hvparams[name] = value
6974
6975     if einfo.has_section(constants.INISECT_BEP):
6976       # use the parameters, without overriding
6977       for name, value in einfo.items(constants.INISECT_BEP):
6978         if name not in self.op.beparams:
6979           self.op.beparams[name] = value
6980     else:
6981       # try to read the parameters old style, from the main section
6982       for name in constants.BES_PARAMETERS:
6983         if (name not in self.op.beparams and
6984             einfo.has_option(constants.INISECT_INS, name)):
6985           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6986
6987     if einfo.has_section(constants.INISECT_OSP):
6988       # use the parameters, without overriding
6989       for name, value in einfo.items(constants.INISECT_OSP):
6990         if name not in self.op.osparams:
6991           self.op.osparams[name] = value
6992
6993   def _RevertToDefaults(self, cluster):
6994     """Revert the instance parameters to the default values.
6995
6996     """
6997     # hvparams
6998     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6999     for name in self.op.hvparams.keys():
7000       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7001         del self.op.hvparams[name]
7002     # beparams
7003     be_defs = cluster.SimpleFillBE({})
7004     for name in self.op.beparams.keys():
7005       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7006         del self.op.beparams[name]
7007     # nic params
7008     nic_defs = cluster.SimpleFillNIC({})
7009     for nic in self.op.nics:
7010       for name in constants.NICS_PARAMETERS:
7011         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7012           del nic[name]
7013     # osparams
7014     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7015     for name in self.op.osparams.keys():
7016       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7017         del self.op.osparams[name]
7018
7019   def CheckPrereq(self):
7020     """Check prerequisites.
7021
7022     """
7023     if self.op.mode == constants.INSTANCE_IMPORT:
7024       export_info = self._ReadExportInfo()
7025       self._ReadExportParams(export_info)
7026
7027     _CheckDiskTemplate(self.op.disk_template)
7028
7029     if (not self.cfg.GetVGName() and
7030         self.op.disk_template not in constants.DTS_NOT_LVM):
7031       raise errors.OpPrereqError("Cluster does not support lvm-based"
7032                                  " instances", errors.ECODE_STATE)
7033
7034     if self.op.hypervisor is None:
7035       self.op.hypervisor = self.cfg.GetHypervisorType()
7036
7037     cluster = self.cfg.GetClusterInfo()
7038     enabled_hvs = cluster.enabled_hypervisors
7039     if self.op.hypervisor not in enabled_hvs:
7040       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7041                                  " cluster (%s)" % (self.op.hypervisor,
7042                                   ",".join(enabled_hvs)),
7043                                  errors.ECODE_STATE)
7044
7045     # check hypervisor parameter syntax (locally)
7046     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7047     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7048                                       self.op.hvparams)
7049     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7050     hv_type.CheckParameterSyntax(filled_hvp)
7051     self.hv_full = filled_hvp
7052     # check that we don't specify global parameters on an instance
7053     _CheckGlobalHvParams(self.op.hvparams)
7054
7055     # fill and remember the beparams dict
7056     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7057     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7058
7059     # build os parameters
7060     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7061
7062     # now that hvp/bep are in final format, let's reset to defaults,
7063     # if told to do so
7064     if self.op.identify_defaults:
7065       self._RevertToDefaults(cluster)
7066
7067     # NIC buildup
7068     self.nics = []
7069     for idx, nic in enumerate(self.op.nics):
7070       nic_mode_req = nic.get("mode", None)
7071       nic_mode = nic_mode_req
7072       if nic_mode is None:
7073         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7074
7075       # in routed mode, for the first nic, the default ip is 'auto'
7076       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7077         default_ip_mode = constants.VALUE_AUTO
7078       else:
7079         default_ip_mode = constants.VALUE_NONE
7080
7081       # ip validity checks
7082       ip = nic.get("ip", default_ip_mode)
7083       if ip is None or ip.lower() == constants.VALUE_NONE:
7084         nic_ip = None
7085       elif ip.lower() == constants.VALUE_AUTO:
7086         if not self.op.name_check:
7087           raise errors.OpPrereqError("IP address set to auto but name checks"
7088                                      " have been skipped. Aborting.",
7089                                      errors.ECODE_INVAL)
7090         nic_ip = self.hostname1.ip
7091       else:
7092         if not netutils.IsValidIP4(ip):
7093           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7094                                      " like a valid IP" % ip,
7095                                      errors.ECODE_INVAL)
7096         nic_ip = ip
7097
7098       # TODO: check the ip address for uniqueness
7099       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7100         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7101                                    errors.ECODE_INVAL)
7102
7103       # MAC address verification
7104       mac = nic.get("mac", constants.VALUE_AUTO)
7105       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7106         mac = utils.NormalizeAndValidateMac(mac)
7107
7108         try:
7109           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7110         except errors.ReservationError:
7111           raise errors.OpPrereqError("MAC address %s already in use"
7112                                      " in cluster" % mac,
7113                                      errors.ECODE_NOTUNIQUE)
7114
7115       # bridge verification
7116       bridge = nic.get("bridge", None)
7117       link = nic.get("link", None)
7118       if bridge and link:
7119         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7120                                    " at the same time", errors.ECODE_INVAL)
7121       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7122         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7123                                    errors.ECODE_INVAL)
7124       elif bridge:
7125         link = bridge
7126
7127       nicparams = {}
7128       if nic_mode_req:
7129         nicparams[constants.NIC_MODE] = nic_mode_req
7130       if link:
7131         nicparams[constants.NIC_LINK] = link
7132
7133       check_params = cluster.SimpleFillNIC(nicparams)
7134       objects.NIC.CheckParameterSyntax(check_params)
7135       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7136
7137     # disk checks/pre-build
7138     self.disks = []
7139     for disk in self.op.disks:
7140       mode = disk.get("mode", constants.DISK_RDWR)
7141       if mode not in constants.DISK_ACCESS_SET:
7142         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7143                                    mode, errors.ECODE_INVAL)
7144       size = disk.get("size", None)
7145       if size is None:
7146         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7147       try:
7148         size = int(size)
7149       except (TypeError, ValueError):
7150         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7151                                    errors.ECODE_INVAL)
7152       new_disk = {"size": size, "mode": mode}
7153       if "adopt" in disk:
7154         new_disk["adopt"] = disk["adopt"]
7155       self.disks.append(new_disk)
7156
7157     if self.op.mode == constants.INSTANCE_IMPORT:
7158
7159       # Check that the new instance doesn't have less disks than the export
7160       instance_disks = len(self.disks)
7161       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7162       if instance_disks < export_disks:
7163         raise errors.OpPrereqError("Not enough disks to import."
7164                                    " (instance: %d, export: %d)" %
7165                                    (instance_disks, export_disks),
7166                                    errors.ECODE_INVAL)
7167
7168       disk_images = []
7169       for idx in range(export_disks):
7170         option = 'disk%d_dump' % idx
7171         if export_info.has_option(constants.INISECT_INS, option):
7172           # FIXME: are the old os-es, disk sizes, etc. useful?
7173           export_name = export_info.get(constants.INISECT_INS, option)
7174           image = utils.PathJoin(self.op.src_path, export_name)
7175           disk_images.append(image)
7176         else:
7177           disk_images.append(False)
7178
7179       self.src_images = disk_images
7180
7181       old_name = export_info.get(constants.INISECT_INS, 'name')
7182       try:
7183         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7184       except (TypeError, ValueError), err:
7185         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7186                                    " an integer: %s" % str(err),
7187                                    errors.ECODE_STATE)
7188       if self.op.instance_name == old_name:
7189         for idx, nic in enumerate(self.nics):
7190           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7191             nic_mac_ini = 'nic%d_mac' % idx
7192             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7193
7194     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7195
7196     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7197     if self.op.ip_check:
7198       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7199         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7200                                    (self.check_ip, self.op.instance_name),
7201                                    errors.ECODE_NOTUNIQUE)
7202
7203     #### mac address generation
7204     # By generating here the mac address both the allocator and the hooks get
7205     # the real final mac address rather than the 'auto' or 'generate' value.
7206     # There is a race condition between the generation and the instance object
7207     # creation, which means that we know the mac is valid now, but we're not
7208     # sure it will be when we actually add the instance. If things go bad
7209     # adding the instance will abort because of a duplicate mac, and the
7210     # creation job will fail.
7211     for nic in self.nics:
7212       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7213         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7214
7215     #### allocator run
7216
7217     if self.op.iallocator is not None:
7218       self._RunAllocator()
7219
7220     #### node related checks
7221
7222     # check primary node
7223     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7224     assert self.pnode is not None, \
7225       "Cannot retrieve locked node %s" % self.op.pnode
7226     if pnode.offline:
7227       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7228                                  pnode.name, errors.ECODE_STATE)
7229     if pnode.drained:
7230       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7231                                  pnode.name, errors.ECODE_STATE)
7232
7233     self.secondaries = []
7234
7235     # mirror node verification
7236     if self.op.disk_template in constants.DTS_NET_MIRROR:
7237       if self.op.snode == pnode.name:
7238         raise errors.OpPrereqError("The secondary node cannot be the"
7239                                    " primary node.", errors.ECODE_INVAL)
7240       _CheckNodeOnline(self, self.op.snode)
7241       _CheckNodeNotDrained(self, self.op.snode)
7242       self.secondaries.append(self.op.snode)
7243
7244     nodenames = [pnode.name] + self.secondaries
7245
7246     req_size = _ComputeDiskSize(self.op.disk_template,
7247                                 self.disks)
7248
7249     # Check lv size requirements, if not adopting
7250     if req_size is not None and not self.adopt_disks:
7251       _CheckNodesFreeDisk(self, nodenames, req_size)
7252
7253     if self.adopt_disks: # instead, we must check the adoption data
7254       all_lvs = set([i["adopt"] for i in self.disks])
7255       if len(all_lvs) != len(self.disks):
7256         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7257                                    errors.ECODE_INVAL)
7258       for lv_name in all_lvs:
7259         try:
7260           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7261         except errors.ReservationError:
7262           raise errors.OpPrereqError("LV named %s used by another instance" %
7263                                      lv_name, errors.ECODE_NOTUNIQUE)
7264
7265       node_lvs = self.rpc.call_lv_list([pnode.name],
7266                                        self.cfg.GetVGName())[pnode.name]
7267       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7268       node_lvs = node_lvs.payload
7269       delta = all_lvs.difference(node_lvs.keys())
7270       if delta:
7271         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7272                                    utils.CommaJoin(delta),
7273                                    errors.ECODE_INVAL)
7274       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7275       if online_lvs:
7276         raise errors.OpPrereqError("Online logical volumes found, cannot"
7277                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7278                                    errors.ECODE_STATE)
7279       # update the size of disk based on what is found
7280       for dsk in self.disks:
7281         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7282
7283     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7284
7285     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7286     # check OS parameters (remotely)
7287     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7288
7289     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7290
7291     # memory check on primary node
7292     if self.op.start:
7293       _CheckNodeFreeMemory(self, self.pnode.name,
7294                            "creating instance %s" % self.op.instance_name,
7295                            self.be_full[constants.BE_MEMORY],
7296                            self.op.hypervisor)
7297
7298     self.dry_run_result = list(nodenames)
7299
7300   def Exec(self, feedback_fn):
7301     """Create and add the instance to the cluster.
7302
7303     """
7304     instance = self.op.instance_name
7305     pnode_name = self.pnode.name
7306
7307     ht_kind = self.op.hypervisor
7308     if ht_kind in constants.HTS_REQ_PORT:
7309       network_port = self.cfg.AllocatePort()
7310     else:
7311       network_port = None
7312
7313     if constants.ENABLE_FILE_STORAGE:
7314       # this is needed because os.path.join does not accept None arguments
7315       if self.op.file_storage_dir is None:
7316         string_file_storage_dir = ""
7317       else:
7318         string_file_storage_dir = self.op.file_storage_dir
7319
7320       # build the full file storage dir path
7321       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7322                                         string_file_storage_dir, instance)
7323     else:
7324       file_storage_dir = ""
7325
7326     disks = _GenerateDiskTemplate(self,
7327                                   self.op.disk_template,
7328                                   instance, pnode_name,
7329                                   self.secondaries,
7330                                   self.disks,
7331                                   file_storage_dir,
7332                                   self.op.file_driver,
7333                                   0)
7334
7335     iobj = objects.Instance(name=instance, os=self.op.os_type,
7336                             primary_node=pnode_name,
7337                             nics=self.nics, disks=disks,
7338                             disk_template=self.op.disk_template,
7339                             admin_up=False,
7340                             network_port=network_port,
7341                             beparams=self.op.beparams,
7342                             hvparams=self.op.hvparams,
7343                             hypervisor=self.op.hypervisor,
7344                             osparams=self.op.osparams,
7345                             )
7346
7347     if self.adopt_disks:
7348       # rename LVs to the newly-generated names; we need to construct
7349       # 'fake' LV disks with the old data, plus the new unique_id
7350       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7351       rename_to = []
7352       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7353         rename_to.append(t_dsk.logical_id)
7354         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7355         self.cfg.SetDiskID(t_dsk, pnode_name)
7356       result = self.rpc.call_blockdev_rename(pnode_name,
7357                                              zip(tmp_disks, rename_to))
7358       result.Raise("Failed to rename adoped LVs")
7359     else:
7360       feedback_fn("* creating instance disks...")
7361       try:
7362         _CreateDisks(self, iobj)
7363       except errors.OpExecError:
7364         self.LogWarning("Device creation failed, reverting...")
7365         try:
7366           _RemoveDisks(self, iobj)
7367         finally:
7368           self.cfg.ReleaseDRBDMinors(instance)
7369           raise
7370
7371     feedback_fn("adding instance %s to cluster config" % instance)
7372
7373     self.cfg.AddInstance(iobj, self.proc.GetECId())
7374
7375     # Declare that we don't want to remove the instance lock anymore, as we've
7376     # added the instance to the config
7377     del self.remove_locks[locking.LEVEL_INSTANCE]
7378     # Unlock all the nodes
7379     if self.op.mode == constants.INSTANCE_IMPORT:
7380       nodes_keep = [self.op.src_node]
7381       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7382                        if node != self.op.src_node]
7383       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7384       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7385     else:
7386       self.context.glm.release(locking.LEVEL_NODE)
7387       del self.acquired_locks[locking.LEVEL_NODE]
7388
7389     if self.op.wait_for_sync:
7390       disk_abort = not _WaitForSync(self, iobj)
7391     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7392       # make sure the disks are not degraded (still sync-ing is ok)
7393       time.sleep(15)
7394       feedback_fn("* checking mirrors status")
7395       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7396     else:
7397       disk_abort = False
7398
7399     if disk_abort:
7400       _RemoveDisks(self, iobj)
7401       self.cfg.RemoveInstance(iobj.name)
7402       # Make sure the instance lock gets removed
7403       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7404       raise errors.OpExecError("There are some degraded disks for"
7405                                " this instance")
7406
7407     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7408       if self.op.mode == constants.INSTANCE_CREATE:
7409         if not self.op.no_install:
7410           feedback_fn("* running the instance OS create scripts...")
7411           # FIXME: pass debug option from opcode to backend
7412           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7413                                                  self.op.debug_level)
7414           result.Raise("Could not add os for instance %s"
7415                        " on node %s" % (instance, pnode_name))
7416
7417       elif self.op.mode == constants.INSTANCE_IMPORT:
7418         feedback_fn("* running the instance OS import scripts...")
7419
7420         transfers = []
7421
7422         for idx, image in enumerate(self.src_images):
7423           if not image:
7424             continue
7425
7426           # FIXME: pass debug option from opcode to backend
7427           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7428                                              constants.IEIO_FILE, (image, ),
7429                                              constants.IEIO_SCRIPT,
7430                                              (iobj.disks[idx], idx),
7431                                              None)
7432           transfers.append(dt)
7433
7434         import_result = \
7435           masterd.instance.TransferInstanceData(self, feedback_fn,
7436                                                 self.op.src_node, pnode_name,
7437                                                 self.pnode.secondary_ip,
7438                                                 iobj, transfers)
7439         if not compat.all(import_result):
7440           self.LogWarning("Some disks for instance %s on node %s were not"
7441                           " imported successfully" % (instance, pnode_name))
7442
7443       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7444         feedback_fn("* preparing remote import...")
7445         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7446         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7447
7448         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7449                                                      self.source_x509_ca,
7450                                                      self._cds, timeouts)
7451         if not compat.all(disk_results):
7452           # TODO: Should the instance still be started, even if some disks
7453           # failed to import (valid for local imports, too)?
7454           self.LogWarning("Some disks for instance %s on node %s were not"
7455                           " imported successfully" % (instance, pnode_name))
7456
7457         # Run rename script on newly imported instance
7458         assert iobj.name == instance
7459         feedback_fn("Running rename script for %s" % instance)
7460         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7461                                                    self.source_instance_name,
7462                                                    self.op.debug_level)
7463         if result.fail_msg:
7464           self.LogWarning("Failed to run rename script for %s on node"
7465                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7466
7467       else:
7468         # also checked in the prereq part
7469         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7470                                      % self.op.mode)
7471
7472     if self.op.start:
7473       iobj.admin_up = True
7474       self.cfg.Update(iobj, feedback_fn)
7475       logging.info("Starting instance %s on node %s", instance, pnode_name)
7476       feedback_fn("* starting instance...")
7477       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7478       result.Raise("Could not start instance")
7479
7480     return list(iobj.all_nodes)
7481
7482
7483 class LUConnectConsole(NoHooksLU):
7484   """Connect to an instance's console.
7485
7486   This is somewhat special in that it returns the command line that
7487   you need to run on the master node in order to connect to the
7488   console.
7489
7490   """
7491   _OP_PARAMS = [
7492     _PInstanceName
7493     ]
7494   REQ_BGL = False
7495
7496   def ExpandNames(self):
7497     self._ExpandAndLockInstance()
7498
7499   def CheckPrereq(self):
7500     """Check prerequisites.
7501
7502     This checks that the instance is in the cluster.
7503
7504     """
7505     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7506     assert self.instance is not None, \
7507       "Cannot retrieve locked instance %s" % self.op.instance_name
7508     _CheckNodeOnline(self, self.instance.primary_node)
7509
7510   def Exec(self, feedback_fn):
7511     """Connect to the console of an instance
7512
7513     """
7514     instance = self.instance
7515     node = instance.primary_node
7516
7517     node_insts = self.rpc.call_instance_list([node],
7518                                              [instance.hypervisor])[node]
7519     node_insts.Raise("Can't get node information from %s" % node)
7520
7521     if instance.name not in node_insts.payload:
7522       raise errors.OpExecError("Instance %s is not running." % instance.name)
7523
7524     logging.debug("Connecting to console of %s on %s", instance.name, node)
7525
7526     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7527     cluster = self.cfg.GetClusterInfo()
7528     # beparams and hvparams are passed separately, to avoid editing the
7529     # instance and then saving the defaults in the instance itself.
7530     hvparams = cluster.FillHV(instance)
7531     beparams = cluster.FillBE(instance)
7532     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7533
7534     # build ssh cmdline
7535     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7536
7537
7538 class LUReplaceDisks(LogicalUnit):
7539   """Replace the disks of an instance.
7540
7541   """
7542   HPATH = "mirrors-replace"
7543   HTYPE = constants.HTYPE_INSTANCE
7544   _OP_PARAMS = [
7545     _PInstanceName,
7546     ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7547     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7548     ("remote_node", None, _TMaybeString),
7549     ("iallocator", None, _TMaybeString),
7550     ("early_release", False, _TBool),
7551     ]
7552   REQ_BGL = False
7553
7554   def CheckArguments(self):
7555     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7556                                   self.op.iallocator)
7557
7558   def ExpandNames(self):
7559     self._ExpandAndLockInstance()
7560
7561     if self.op.iallocator is not None:
7562       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7563
7564     elif self.op.remote_node is not None:
7565       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7566       self.op.remote_node = remote_node
7567
7568       # Warning: do not remove the locking of the new secondary here
7569       # unless DRBD8.AddChildren is changed to work in parallel;
7570       # currently it doesn't since parallel invocations of
7571       # FindUnusedMinor will conflict
7572       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7573       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7574
7575     else:
7576       self.needed_locks[locking.LEVEL_NODE] = []
7577       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7578
7579     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7580                                    self.op.iallocator, self.op.remote_node,
7581                                    self.op.disks, False, self.op.early_release)
7582
7583     self.tasklets = [self.replacer]
7584
7585   def DeclareLocks(self, level):
7586     # If we're not already locking all nodes in the set we have to declare the
7587     # instance's primary/secondary nodes.
7588     if (level == locking.LEVEL_NODE and
7589         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7590       self._LockInstancesNodes()
7591
7592   def BuildHooksEnv(self):
7593     """Build hooks env.
7594
7595     This runs on the master, the primary and all the secondaries.
7596
7597     """
7598     instance = self.replacer.instance
7599     env = {
7600       "MODE": self.op.mode,
7601       "NEW_SECONDARY": self.op.remote_node,
7602       "OLD_SECONDARY": instance.secondary_nodes[0],
7603       }
7604     env.update(_BuildInstanceHookEnvByObject(self, instance))
7605     nl = [
7606       self.cfg.GetMasterNode(),
7607       instance.primary_node,
7608       ]
7609     if self.op.remote_node is not None:
7610       nl.append(self.op.remote_node)
7611     return env, nl, nl
7612
7613
7614 class TLReplaceDisks(Tasklet):
7615   """Replaces disks for an instance.
7616
7617   Note: Locking is not within the scope of this class.
7618
7619   """
7620   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7621                disks, delay_iallocator, early_release):
7622     """Initializes this class.
7623
7624     """
7625     Tasklet.__init__(self, lu)
7626
7627     # Parameters
7628     self.instance_name = instance_name
7629     self.mode = mode
7630     self.iallocator_name = iallocator_name
7631     self.remote_node = remote_node
7632     self.disks = disks
7633     self.delay_iallocator = delay_iallocator
7634     self.early_release = early_release
7635
7636     # Runtime data
7637     self.instance = None
7638     self.new_node = None
7639     self.target_node = None
7640     self.other_node = None
7641     self.remote_node_info = None
7642     self.node_secondary_ip = None
7643
7644   @staticmethod
7645   def CheckArguments(mode, remote_node, iallocator):
7646     """Helper function for users of this class.
7647
7648     """
7649     # check for valid parameter combination
7650     if mode == constants.REPLACE_DISK_CHG:
7651       if remote_node is None and iallocator is None:
7652         raise errors.OpPrereqError("When changing the secondary either an"
7653                                    " iallocator script must be used or the"
7654                                    " new node given", errors.ECODE_INVAL)
7655
7656       if remote_node is not None and iallocator is not None:
7657         raise errors.OpPrereqError("Give either the iallocator or the new"
7658                                    " secondary, not both", errors.ECODE_INVAL)
7659
7660     elif remote_node is not None or iallocator is not None:
7661       # Not replacing the secondary
7662       raise errors.OpPrereqError("The iallocator and new node options can"
7663                                  " only be used when changing the"
7664                                  " secondary node", errors.ECODE_INVAL)
7665
7666   @staticmethod
7667   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7668     """Compute a new secondary node using an IAllocator.
7669
7670     """
7671     ial = IAllocator(lu.cfg, lu.rpc,
7672                      mode=constants.IALLOCATOR_MODE_RELOC,
7673                      name=instance_name,
7674                      relocate_from=relocate_from)
7675
7676     ial.Run(iallocator_name)
7677
7678     if not ial.success:
7679       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7680                                  " %s" % (iallocator_name, ial.info),
7681                                  errors.ECODE_NORES)
7682
7683     if len(ial.result) != ial.required_nodes:
7684       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7685                                  " of nodes (%s), required %s" %
7686                                  (iallocator_name,
7687                                   len(ial.result), ial.required_nodes),
7688                                  errors.ECODE_FAULT)
7689
7690     remote_node_name = ial.result[0]
7691
7692     lu.LogInfo("Selected new secondary for instance '%s': %s",
7693                instance_name, remote_node_name)
7694
7695     return remote_node_name
7696
7697   def _FindFaultyDisks(self, node_name):
7698     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7699                                     node_name, True)
7700
7701   def CheckPrereq(self):
7702     """Check prerequisites.
7703
7704     This checks that the instance is in the cluster.
7705
7706     """
7707     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7708     assert instance is not None, \
7709       "Cannot retrieve locked instance %s" % self.instance_name
7710
7711     if instance.disk_template != constants.DT_DRBD8:
7712       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7713                                  " instances", errors.ECODE_INVAL)
7714
7715     if len(instance.secondary_nodes) != 1:
7716       raise errors.OpPrereqError("The instance has a strange layout,"
7717                                  " expected one secondary but found %d" %
7718                                  len(instance.secondary_nodes),
7719                                  errors.ECODE_FAULT)
7720
7721     if not self.delay_iallocator:
7722       self._CheckPrereq2()
7723
7724   def _CheckPrereq2(self):
7725     """Check prerequisites, second part.
7726
7727     This function should always be part of CheckPrereq. It was separated and is
7728     now called from Exec because during node evacuation iallocator was only
7729     called with an unmodified cluster model, not taking planned changes into
7730     account.
7731
7732     """
7733     instance = self.instance
7734     secondary_node = instance.secondary_nodes[0]
7735
7736     if self.iallocator_name is None:
7737       remote_node = self.remote_node
7738     else:
7739       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7740                                        instance.name, instance.secondary_nodes)
7741
7742     if remote_node is not None:
7743       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7744       assert self.remote_node_info is not None, \
7745         "Cannot retrieve locked node %s" % remote_node
7746     else:
7747       self.remote_node_info = None
7748
7749     if remote_node == self.instance.primary_node:
7750       raise errors.OpPrereqError("The specified node is the primary node of"
7751                                  " the instance.", errors.ECODE_INVAL)
7752
7753     if remote_node == secondary_node:
7754       raise errors.OpPrereqError("The specified node is already the"
7755                                  " secondary node of the instance.",
7756                                  errors.ECODE_INVAL)
7757
7758     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7759                                     constants.REPLACE_DISK_CHG):
7760       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7761                                  errors.ECODE_INVAL)
7762
7763     if self.mode == constants.REPLACE_DISK_AUTO:
7764       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7765       faulty_secondary = self._FindFaultyDisks(secondary_node)
7766
7767       if faulty_primary and faulty_secondary:
7768         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7769                                    " one node and can not be repaired"
7770                                    " automatically" % self.instance_name,
7771                                    errors.ECODE_STATE)
7772
7773       if faulty_primary:
7774         self.disks = faulty_primary
7775         self.target_node = instance.primary_node
7776         self.other_node = secondary_node
7777         check_nodes = [self.target_node, self.other_node]
7778       elif faulty_secondary:
7779         self.disks = faulty_secondary
7780         self.target_node = secondary_node
7781         self.other_node = instance.primary_node
7782         check_nodes = [self.target_node, self.other_node]
7783       else:
7784         self.disks = []
7785         check_nodes = []
7786
7787     else:
7788       # Non-automatic modes
7789       if self.mode == constants.REPLACE_DISK_PRI:
7790         self.target_node = instance.primary_node
7791         self.other_node = secondary_node
7792         check_nodes = [self.target_node, self.other_node]
7793
7794       elif self.mode == constants.REPLACE_DISK_SEC:
7795         self.target_node = secondary_node
7796         self.other_node = instance.primary_node
7797         check_nodes = [self.target_node, self.other_node]
7798
7799       elif self.mode == constants.REPLACE_DISK_CHG:
7800         self.new_node = remote_node
7801         self.other_node = instance.primary_node
7802         self.target_node = secondary_node
7803         check_nodes = [self.new_node, self.other_node]
7804
7805         _CheckNodeNotDrained(self.lu, remote_node)
7806
7807         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7808         assert old_node_info is not None
7809         if old_node_info.offline and not self.early_release:
7810           # doesn't make sense to delay the release
7811           self.early_release = True
7812           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7813                           " early-release mode", secondary_node)
7814
7815       else:
7816         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7817                                      self.mode)
7818
7819       # If not specified all disks should be replaced
7820       if not self.disks:
7821         self.disks = range(len(self.instance.disks))
7822
7823     for node in check_nodes:
7824       _CheckNodeOnline(self.lu, node)
7825
7826     # Check whether disks are valid
7827     for disk_idx in self.disks:
7828       instance.FindDisk(disk_idx)
7829
7830     # Get secondary node IP addresses
7831     node_2nd_ip = {}
7832
7833     for node_name in [self.target_node, self.other_node, self.new_node]:
7834       if node_name is not None:
7835         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7836
7837     self.node_secondary_ip = node_2nd_ip
7838
7839   def Exec(self, feedback_fn):
7840     """Execute disk replacement.
7841
7842     This dispatches the disk replacement to the appropriate handler.
7843
7844     """
7845     if self.delay_iallocator:
7846       self._CheckPrereq2()
7847
7848     if not self.disks:
7849       feedback_fn("No disks need replacement")
7850       return
7851
7852     feedback_fn("Replacing disk(s) %s for %s" %
7853                 (utils.CommaJoin(self.disks), self.instance.name))
7854
7855     activate_disks = (not self.instance.admin_up)
7856
7857     # Activate the instance disks if we're replacing them on a down instance
7858     if activate_disks:
7859       _StartInstanceDisks(self.lu, self.instance, True)
7860
7861     try:
7862       # Should we replace the secondary node?
7863       if self.new_node is not None:
7864         fn = self._ExecDrbd8Secondary
7865       else:
7866         fn = self._ExecDrbd8DiskOnly
7867
7868       return fn(feedback_fn)
7869
7870     finally:
7871       # Deactivate the instance disks if we're replacing them on a
7872       # down instance
7873       if activate_disks:
7874         _SafeShutdownInstanceDisks(self.lu, self.instance)
7875
7876   def _CheckVolumeGroup(self, nodes):
7877     self.lu.LogInfo("Checking volume groups")
7878
7879     vgname = self.cfg.GetVGName()
7880
7881     # Make sure volume group exists on all involved nodes
7882     results = self.rpc.call_vg_list(nodes)
7883     if not results:
7884       raise errors.OpExecError("Can't list volume groups on the nodes")
7885
7886     for node in nodes:
7887       res = results[node]
7888       res.Raise("Error checking node %s" % node)
7889       if vgname not in res.payload:
7890         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7891                                  (vgname, node))
7892
7893   def _CheckDisksExistence(self, nodes):
7894     # Check disk existence
7895     for idx, dev in enumerate(self.instance.disks):
7896       if idx not in self.disks:
7897         continue
7898
7899       for node in nodes:
7900         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7901         self.cfg.SetDiskID(dev, node)
7902
7903         result = self.rpc.call_blockdev_find(node, dev)
7904
7905         msg = result.fail_msg
7906         if msg or not result.payload:
7907           if not msg:
7908             msg = "disk not found"
7909           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7910                                    (idx, node, msg))
7911
7912   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7913     for idx, dev in enumerate(self.instance.disks):
7914       if idx not in self.disks:
7915         continue
7916
7917       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7918                       (idx, node_name))
7919
7920       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7921                                    ldisk=ldisk):
7922         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7923                                  " replace disks for instance %s" %
7924                                  (node_name, self.instance.name))
7925
7926   def _CreateNewStorage(self, node_name):
7927     vgname = self.cfg.GetVGName()
7928     iv_names = {}
7929
7930     for idx, dev in enumerate(self.instance.disks):
7931       if idx not in self.disks:
7932         continue
7933
7934       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7935
7936       self.cfg.SetDiskID(dev, node_name)
7937
7938       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7939       names = _GenerateUniqueNames(self.lu, lv_names)
7940
7941       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7942                              logical_id=(vgname, names[0]))
7943       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7944                              logical_id=(vgname, names[1]))
7945
7946       new_lvs = [lv_data, lv_meta]
7947       old_lvs = dev.children
7948       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7949
7950       # we pass force_create=True to force the LVM creation
7951       for new_lv in new_lvs:
7952         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7953                         _GetInstanceInfoText(self.instance), False)
7954
7955     return iv_names
7956
7957   def _CheckDevices(self, node_name, iv_names):
7958     for name, (dev, _, _) in iv_names.iteritems():
7959       self.cfg.SetDiskID(dev, node_name)
7960
7961       result = self.rpc.call_blockdev_find(node_name, dev)
7962
7963       msg = result.fail_msg
7964       if msg or not result.payload:
7965         if not msg:
7966           msg = "disk not found"
7967         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7968                                  (name, msg))
7969
7970       if result.payload.is_degraded:
7971         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7972
7973   def _RemoveOldStorage(self, node_name, iv_names):
7974     for name, (_, old_lvs, _) in iv_names.iteritems():
7975       self.lu.LogInfo("Remove logical volumes for %s" % name)
7976
7977       for lv in old_lvs:
7978         self.cfg.SetDiskID(lv, node_name)
7979
7980         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7981         if msg:
7982           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7983                              hint="remove unused LVs manually")
7984
7985   def _ReleaseNodeLock(self, node_name):
7986     """Releases the lock for a given node."""
7987     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7988
7989   def _ExecDrbd8DiskOnly(self, feedback_fn):
7990     """Replace a disk on the primary or secondary for DRBD 8.
7991
7992     The algorithm for replace is quite complicated:
7993
7994       1. for each disk to be replaced:
7995
7996         1. create new LVs on the target node with unique names
7997         1. detach old LVs from the drbd device
7998         1. rename old LVs to name_replaced.<time_t>
7999         1. rename new LVs to old LVs
8000         1. attach the new LVs (with the old names now) to the drbd device
8001
8002       1. wait for sync across all devices
8003
8004       1. for each modified disk:
8005
8006         1. remove old LVs (which have the name name_replaces.<time_t>)
8007
8008     Failures are not very well handled.
8009
8010     """
8011     steps_total = 6
8012
8013     # Step: check device activation
8014     self.lu.LogStep(1, steps_total, "Check device existence")
8015     self._CheckDisksExistence([self.other_node, self.target_node])
8016     self._CheckVolumeGroup([self.target_node, self.other_node])
8017
8018     # Step: check other node consistency
8019     self.lu.LogStep(2, steps_total, "Check peer consistency")
8020     self._CheckDisksConsistency(self.other_node,
8021                                 self.other_node == self.instance.primary_node,
8022                                 False)
8023
8024     # Step: create new storage
8025     self.lu.LogStep(3, steps_total, "Allocate new storage")
8026     iv_names = self._CreateNewStorage(self.target_node)
8027
8028     # Step: for each lv, detach+rename*2+attach
8029     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8030     for dev, old_lvs, new_lvs in iv_names.itervalues():
8031       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8032
8033       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8034                                                      old_lvs)
8035       result.Raise("Can't detach drbd from local storage on node"
8036                    " %s for device %s" % (self.target_node, dev.iv_name))
8037       #dev.children = []
8038       #cfg.Update(instance)
8039
8040       # ok, we created the new LVs, so now we know we have the needed
8041       # storage; as such, we proceed on the target node to rename
8042       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8043       # using the assumption that logical_id == physical_id (which in
8044       # turn is the unique_id on that node)
8045
8046       # FIXME(iustin): use a better name for the replaced LVs
8047       temp_suffix = int(time.time())
8048       ren_fn = lambda d, suff: (d.physical_id[0],
8049                                 d.physical_id[1] + "_replaced-%s" % suff)
8050
8051       # Build the rename list based on what LVs exist on the node
8052       rename_old_to_new = []
8053       for to_ren in old_lvs:
8054         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8055         if not result.fail_msg and result.payload:
8056           # device exists
8057           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8058
8059       self.lu.LogInfo("Renaming the old LVs on the target node")
8060       result = self.rpc.call_blockdev_rename(self.target_node,
8061                                              rename_old_to_new)
8062       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8063
8064       # Now we rename the new LVs to the old LVs
8065       self.lu.LogInfo("Renaming the new LVs on the target node")
8066       rename_new_to_old = [(new, old.physical_id)
8067                            for old, new in zip(old_lvs, new_lvs)]
8068       result = self.rpc.call_blockdev_rename(self.target_node,
8069                                              rename_new_to_old)
8070       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8071
8072       for old, new in zip(old_lvs, new_lvs):
8073         new.logical_id = old.logical_id
8074         self.cfg.SetDiskID(new, self.target_node)
8075
8076       for disk in old_lvs:
8077         disk.logical_id = ren_fn(disk, temp_suffix)
8078         self.cfg.SetDiskID(disk, self.target_node)
8079
8080       # Now that the new lvs have the old name, we can add them to the device
8081       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8082       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8083                                                   new_lvs)
8084       msg = result.fail_msg
8085       if msg:
8086         for new_lv in new_lvs:
8087           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8088                                                new_lv).fail_msg
8089           if msg2:
8090             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8091                                hint=("cleanup manually the unused logical"
8092                                      "volumes"))
8093         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8094
8095       dev.children = new_lvs
8096
8097       self.cfg.Update(self.instance, feedback_fn)
8098
8099     cstep = 5
8100     if self.early_release:
8101       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8102       cstep += 1
8103       self._RemoveOldStorage(self.target_node, iv_names)
8104       # WARNING: we release both node locks here, do not do other RPCs
8105       # than WaitForSync to the primary node
8106       self._ReleaseNodeLock([self.target_node, self.other_node])
8107
8108     # Wait for sync
8109     # This can fail as the old devices are degraded and _WaitForSync
8110     # does a combined result over all disks, so we don't check its return value
8111     self.lu.LogStep(cstep, steps_total, "Sync devices")
8112     cstep += 1
8113     _WaitForSync(self.lu, self.instance)
8114
8115     # Check all devices manually
8116     self._CheckDevices(self.instance.primary_node, iv_names)
8117
8118     # Step: remove old storage
8119     if not self.early_release:
8120       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8121       cstep += 1
8122       self._RemoveOldStorage(self.target_node, iv_names)
8123
8124   def _ExecDrbd8Secondary(self, feedback_fn):
8125     """Replace the secondary node for DRBD 8.
8126
8127     The algorithm for replace is quite complicated:
8128       - for all disks of the instance:
8129         - create new LVs on the new node with same names
8130         - shutdown the drbd device on the old secondary
8131         - disconnect the drbd network on the primary
8132         - create the drbd device on the new secondary
8133         - network attach the drbd on the primary, using an artifice:
8134           the drbd code for Attach() will connect to the network if it
8135           finds a device which is connected to the good local disks but
8136           not network enabled
8137       - wait for sync across all devices
8138       - remove all disks from the old secondary
8139
8140     Failures are not very well handled.
8141
8142     """
8143     steps_total = 6
8144
8145     # Step: check device activation
8146     self.lu.LogStep(1, steps_total, "Check device existence")
8147     self._CheckDisksExistence([self.instance.primary_node])
8148     self._CheckVolumeGroup([self.instance.primary_node])
8149
8150     # Step: check other node consistency
8151     self.lu.LogStep(2, steps_total, "Check peer consistency")
8152     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8153
8154     # Step: create new storage
8155     self.lu.LogStep(3, steps_total, "Allocate new storage")
8156     for idx, dev in enumerate(self.instance.disks):
8157       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8158                       (self.new_node, idx))
8159       # we pass force_create=True to force LVM creation
8160       for new_lv in dev.children:
8161         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8162                         _GetInstanceInfoText(self.instance), False)
8163
8164     # Step 4: dbrd minors and drbd setups changes
8165     # after this, we must manually remove the drbd minors on both the
8166     # error and the success paths
8167     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8168     minors = self.cfg.AllocateDRBDMinor([self.new_node
8169                                          for dev in self.instance.disks],
8170                                         self.instance.name)
8171     logging.debug("Allocated minors %r", minors)
8172
8173     iv_names = {}
8174     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8175       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8176                       (self.new_node, idx))
8177       # create new devices on new_node; note that we create two IDs:
8178       # one without port, so the drbd will be activated without
8179       # networking information on the new node at this stage, and one
8180       # with network, for the latter activation in step 4
8181       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8182       if self.instance.primary_node == o_node1:
8183         p_minor = o_minor1
8184       else:
8185         assert self.instance.primary_node == o_node2, "Three-node instance?"
8186         p_minor = o_minor2
8187
8188       new_alone_id = (self.instance.primary_node, self.new_node, None,
8189                       p_minor, new_minor, o_secret)
8190       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8191                     p_minor, new_minor, o_secret)
8192
8193       iv_names[idx] = (dev, dev.children, new_net_id)
8194       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8195                     new_net_id)
8196       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8197                               logical_id=new_alone_id,
8198                               children=dev.children,
8199                               size=dev.size)
8200       try:
8201         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8202                               _GetInstanceInfoText(self.instance), False)
8203       except errors.GenericError:
8204         self.cfg.ReleaseDRBDMinors(self.instance.name)
8205         raise
8206
8207     # We have new devices, shutdown the drbd on the old secondary
8208     for idx, dev in enumerate(self.instance.disks):
8209       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8210       self.cfg.SetDiskID(dev, self.target_node)
8211       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8212       if msg:
8213         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8214                            "node: %s" % (idx, msg),
8215                            hint=("Please cleanup this device manually as"
8216                                  " soon as possible"))
8217
8218     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8219     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8220                                                self.node_secondary_ip,
8221                                                self.instance.disks)\
8222                                               [self.instance.primary_node]
8223
8224     msg = result.fail_msg
8225     if msg:
8226       # detaches didn't succeed (unlikely)
8227       self.cfg.ReleaseDRBDMinors(self.instance.name)
8228       raise errors.OpExecError("Can't detach the disks from the network on"
8229                                " old node: %s" % (msg,))
8230
8231     # if we managed to detach at least one, we update all the disks of
8232     # the instance to point to the new secondary
8233     self.lu.LogInfo("Updating instance configuration")
8234     for dev, _, new_logical_id in iv_names.itervalues():
8235       dev.logical_id = new_logical_id
8236       self.cfg.SetDiskID(dev, self.instance.primary_node)
8237
8238     self.cfg.Update(self.instance, feedback_fn)
8239
8240     # and now perform the drbd attach
8241     self.lu.LogInfo("Attaching primary drbds to new secondary"
8242                     " (standalone => connected)")
8243     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8244                                             self.new_node],
8245                                            self.node_secondary_ip,
8246                                            self.instance.disks,
8247                                            self.instance.name,
8248                                            False)
8249     for to_node, to_result in result.items():
8250       msg = to_result.fail_msg
8251       if msg:
8252         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8253                            to_node, msg,
8254                            hint=("please do a gnt-instance info to see the"
8255                                  " status of disks"))
8256     cstep = 5
8257     if self.early_release:
8258       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8259       cstep += 1
8260       self._RemoveOldStorage(self.target_node, iv_names)
8261       # WARNING: we release all node locks here, do not do other RPCs
8262       # than WaitForSync to the primary node
8263       self._ReleaseNodeLock([self.instance.primary_node,
8264                              self.target_node,
8265                              self.new_node])
8266
8267     # Wait for sync
8268     # This can fail as the old devices are degraded and _WaitForSync
8269     # does a combined result over all disks, so we don't check its return value
8270     self.lu.LogStep(cstep, steps_total, "Sync devices")
8271     cstep += 1
8272     _WaitForSync(self.lu, self.instance)
8273
8274     # Check all devices manually
8275     self._CheckDevices(self.instance.primary_node, iv_names)
8276
8277     # Step: remove old storage
8278     if not self.early_release:
8279       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8280       self._RemoveOldStorage(self.target_node, iv_names)
8281
8282
8283 class LURepairNodeStorage(NoHooksLU):
8284   """Repairs the volume group on a node.
8285
8286   """
8287   _OP_PARAMS = [
8288     _PNodeName,
8289     ("storage_type", _NoDefault, _CheckStorageType),
8290     ("name", _NoDefault, _TNonEmptyString),
8291     ("ignore_consistency", False, _TBool),
8292     ]
8293   REQ_BGL = False
8294
8295   def CheckArguments(self):
8296     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8297
8298     storage_type = self.op.storage_type
8299
8300     if (constants.SO_FIX_CONSISTENCY not in
8301         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8302       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8303                                  " repaired" % storage_type,
8304                                  errors.ECODE_INVAL)
8305
8306   def ExpandNames(self):
8307     self.needed_locks = {
8308       locking.LEVEL_NODE: [self.op.node_name],
8309       }
8310
8311   def _CheckFaultyDisks(self, instance, node_name):
8312     """Ensure faulty disks abort the opcode or at least warn."""
8313     try:
8314       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8315                                   node_name, True):
8316         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8317                                    " node '%s'" % (instance.name, node_name),
8318                                    errors.ECODE_STATE)
8319     except errors.OpPrereqError, err:
8320       if self.op.ignore_consistency:
8321         self.proc.LogWarning(str(err.args[0]))
8322       else:
8323         raise
8324
8325   def CheckPrereq(self):
8326     """Check prerequisites.
8327
8328     """
8329     # Check whether any instance on this node has faulty disks
8330     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8331       if not inst.admin_up:
8332         continue
8333       check_nodes = set(inst.all_nodes)
8334       check_nodes.discard(self.op.node_name)
8335       for inst_node_name in check_nodes:
8336         self._CheckFaultyDisks(inst, inst_node_name)
8337
8338   def Exec(self, feedback_fn):
8339     feedback_fn("Repairing storage unit '%s' on %s ..." %
8340                 (self.op.name, self.op.node_name))
8341
8342     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8343     result = self.rpc.call_storage_execute(self.op.node_name,
8344                                            self.op.storage_type, st_args,
8345                                            self.op.name,
8346                                            constants.SO_FIX_CONSISTENCY)
8347     result.Raise("Failed to repair storage unit '%s' on %s" %
8348                  (self.op.name, self.op.node_name))
8349
8350
8351 class LUNodeEvacuationStrategy(NoHooksLU):
8352   """Computes the node evacuation strategy.
8353
8354   """
8355   _OP_PARAMS = [
8356     ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8357     ("remote_node", None, _TMaybeString),
8358     ("iallocator", None, _TMaybeString),
8359     ]
8360   REQ_BGL = False
8361
8362   def CheckArguments(self):
8363     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8364
8365   def ExpandNames(self):
8366     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8367     self.needed_locks = locks = {}
8368     if self.op.remote_node is None:
8369       locks[locking.LEVEL_NODE] = locking.ALL_SET
8370     else:
8371       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8372       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8373
8374   def Exec(self, feedback_fn):
8375     if self.op.remote_node is not None:
8376       instances = []
8377       for node in self.op.nodes:
8378         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8379       result = []
8380       for i in instances:
8381         if i.primary_node == self.op.remote_node:
8382           raise errors.OpPrereqError("Node %s is the primary node of"
8383                                      " instance %s, cannot use it as"
8384                                      " secondary" %
8385                                      (self.op.remote_node, i.name),
8386                                      errors.ECODE_INVAL)
8387         result.append([i.name, self.op.remote_node])
8388     else:
8389       ial = IAllocator(self.cfg, self.rpc,
8390                        mode=constants.IALLOCATOR_MODE_MEVAC,
8391                        evac_nodes=self.op.nodes)
8392       ial.Run(self.op.iallocator, validate=True)
8393       if not ial.success:
8394         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8395                                  errors.ECODE_NORES)
8396       result = ial.result
8397     return result
8398
8399
8400 class LUGrowDisk(LogicalUnit):
8401   """Grow a disk of an instance.
8402
8403   """
8404   HPATH = "disk-grow"
8405   HTYPE = constants.HTYPE_INSTANCE
8406   _OP_PARAMS = [
8407     _PInstanceName,
8408     ("disk", _NoDefault, _TInt),
8409     ("amount", _NoDefault, _TInt),
8410     ("wait_for_sync", True, _TBool),
8411     ]
8412   REQ_BGL = False
8413
8414   def ExpandNames(self):
8415     self._ExpandAndLockInstance()
8416     self.needed_locks[locking.LEVEL_NODE] = []
8417     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8418
8419   def DeclareLocks(self, level):
8420     if level == locking.LEVEL_NODE:
8421       self._LockInstancesNodes()
8422
8423   def BuildHooksEnv(self):
8424     """Build hooks env.
8425
8426     This runs on the master, the primary and all the secondaries.
8427
8428     """
8429     env = {
8430       "DISK": self.op.disk,
8431       "AMOUNT": self.op.amount,
8432       }
8433     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8434     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8435     return env, nl, nl
8436
8437   def CheckPrereq(self):
8438     """Check prerequisites.
8439
8440     This checks that the instance is in the cluster.
8441
8442     """
8443     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8444     assert instance is not None, \
8445       "Cannot retrieve locked instance %s" % self.op.instance_name
8446     nodenames = list(instance.all_nodes)
8447     for node in nodenames:
8448       _CheckNodeOnline(self, node)
8449
8450     self.instance = instance
8451
8452     if instance.disk_template not in constants.DTS_GROWABLE:
8453       raise errors.OpPrereqError("Instance's disk layout does not support"
8454                                  " growing.", errors.ECODE_INVAL)
8455
8456     self.disk = instance.FindDisk(self.op.disk)
8457
8458     if instance.disk_template != constants.DT_FILE:
8459       # TODO: check the free disk space for file, when that feature will be
8460       # supported
8461       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8462
8463   def Exec(self, feedback_fn):
8464     """Execute disk grow.
8465
8466     """
8467     instance = self.instance
8468     disk = self.disk
8469
8470     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8471     if not disks_ok:
8472       raise errors.OpExecError("Cannot activate block device to grow")
8473
8474     for node in instance.all_nodes:
8475       self.cfg.SetDiskID(disk, node)
8476       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8477       result.Raise("Grow request failed to node %s" % node)
8478
8479       # TODO: Rewrite code to work properly
8480       # DRBD goes into sync mode for a short amount of time after executing the
8481       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8482       # calling "resize" in sync mode fails. Sleeping for a short amount of
8483       # time is a work-around.
8484       time.sleep(5)
8485
8486     disk.RecordGrow(self.op.amount)
8487     self.cfg.Update(instance, feedback_fn)
8488     if self.op.wait_for_sync:
8489       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8490       if disk_abort:
8491         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8492                              " status.\nPlease check the instance.")
8493       if not instance.admin_up:
8494         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8495     elif not instance.admin_up:
8496       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8497                            " not supposed to be running because no wait for"
8498                            " sync mode was requested.")
8499
8500
8501 class LUQueryInstanceData(NoHooksLU):
8502   """Query runtime instance data.
8503
8504   """
8505   _OP_PARAMS = [
8506     ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8507     ("static", False, _TBool),
8508     ]
8509   REQ_BGL = False
8510
8511   def ExpandNames(self):
8512     self.needed_locks = {}
8513     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8514
8515     if self.op.instances:
8516       self.wanted_names = []
8517       for name in self.op.instances:
8518         full_name = _ExpandInstanceName(self.cfg, name)
8519         self.wanted_names.append(full_name)
8520       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8521     else:
8522       self.wanted_names = None
8523       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8524
8525     self.needed_locks[locking.LEVEL_NODE] = []
8526     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8527
8528   def DeclareLocks(self, level):
8529     if level == locking.LEVEL_NODE:
8530       self._LockInstancesNodes()
8531
8532   def CheckPrereq(self):
8533     """Check prerequisites.
8534
8535     This only checks the optional instance list against the existing names.
8536
8537     """
8538     if self.wanted_names is None:
8539       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8540
8541     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8542                              in self.wanted_names]
8543
8544   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8545     """Returns the status of a block device
8546
8547     """
8548     if self.op.static or not node:
8549       return None
8550
8551     self.cfg.SetDiskID(dev, node)
8552
8553     result = self.rpc.call_blockdev_find(node, dev)
8554     if result.offline:
8555       return None
8556
8557     result.Raise("Can't compute disk status for %s" % instance_name)
8558
8559     status = result.payload
8560     if status is None:
8561       return None
8562
8563     return (status.dev_path, status.major, status.minor,
8564             status.sync_percent, status.estimated_time,
8565             status.is_degraded, status.ldisk_status)
8566
8567   def _ComputeDiskStatus(self, instance, snode, dev):
8568     """Compute block device status.
8569
8570     """
8571     if dev.dev_type in constants.LDS_DRBD:
8572       # we change the snode then (otherwise we use the one passed in)
8573       if dev.logical_id[0] == instance.primary_node:
8574         snode = dev.logical_id[1]
8575       else:
8576         snode = dev.logical_id[0]
8577
8578     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8579                                               instance.name, dev)
8580     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8581
8582     if dev.children:
8583       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8584                       for child in dev.children]
8585     else:
8586       dev_children = []
8587
8588     data = {
8589       "iv_name": dev.iv_name,
8590       "dev_type": dev.dev_type,
8591       "logical_id": dev.logical_id,
8592       "physical_id": dev.physical_id,
8593       "pstatus": dev_pstatus,
8594       "sstatus": dev_sstatus,
8595       "children": dev_children,
8596       "mode": dev.mode,
8597       "size": dev.size,
8598       }
8599
8600     return data
8601
8602   def Exec(self, feedback_fn):
8603     """Gather and return data"""
8604     result = {}
8605
8606     cluster = self.cfg.GetClusterInfo()
8607
8608     for instance in self.wanted_instances:
8609       if not self.op.static:
8610         remote_info = self.rpc.call_instance_info(instance.primary_node,
8611                                                   instance.name,
8612                                                   instance.hypervisor)
8613         remote_info.Raise("Error checking node %s" % instance.primary_node)
8614         remote_info = remote_info.payload
8615         if remote_info and "state" in remote_info:
8616           remote_state = "up"
8617         else:
8618           remote_state = "down"
8619       else:
8620         remote_state = None
8621       if instance.admin_up:
8622         config_state = "up"
8623       else:
8624         config_state = "down"
8625
8626       disks = [self._ComputeDiskStatus(instance, None, device)
8627                for device in instance.disks]
8628
8629       idict = {
8630         "name": instance.name,
8631         "config_state": config_state,
8632         "run_state": remote_state,
8633         "pnode": instance.primary_node,
8634         "snodes": instance.secondary_nodes,
8635         "os": instance.os,
8636         # this happens to be the same format used for hooks
8637         "nics": _NICListToTuple(self, instance.nics),
8638         "disk_template": instance.disk_template,
8639         "disks": disks,
8640         "hypervisor": instance.hypervisor,
8641         "network_port": instance.network_port,
8642         "hv_instance": instance.hvparams,
8643         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8644         "be_instance": instance.beparams,
8645         "be_actual": cluster.FillBE(instance),
8646         "os_instance": instance.osparams,
8647         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8648         "serial_no": instance.serial_no,
8649         "mtime": instance.mtime,
8650         "ctime": instance.ctime,
8651         "uuid": instance.uuid,
8652         }
8653
8654       result[instance.name] = idict
8655
8656     return result
8657
8658
8659 class LUSetInstanceParams(LogicalUnit):
8660   """Modifies an instances's parameters.
8661
8662   """
8663   HPATH = "instance-modify"
8664   HTYPE = constants.HTYPE_INSTANCE
8665   _OP_PARAMS = [
8666     _PInstanceName,
8667     ("nics", _EmptyList, _TList),
8668     ("disks", _EmptyList, _TList),
8669     ("beparams", _EmptyDict, _TDict),
8670     ("hvparams", _EmptyDict, _TDict),
8671     ("disk_template", None, _TMaybeString),
8672     ("remote_node", None, _TMaybeString),
8673     ("os_name", None, _TMaybeString),
8674     ("force_variant", False, _TBool),
8675     ("osparams", None, _TOr(_TDict, _TNone)),
8676     _PForce,
8677     ]
8678   REQ_BGL = False
8679
8680   def CheckArguments(self):
8681     if not (self.op.nics or self.op.disks or self.op.disk_template or
8682             self.op.hvparams or self.op.beparams or self.op.os_name):
8683       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8684
8685     if self.op.hvparams:
8686       _CheckGlobalHvParams(self.op.hvparams)
8687
8688     # Disk validation
8689     disk_addremove = 0
8690     for disk_op, disk_dict in self.op.disks:
8691       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8692       if disk_op == constants.DDM_REMOVE:
8693         disk_addremove += 1
8694         continue
8695       elif disk_op == constants.DDM_ADD:
8696         disk_addremove += 1
8697       else:
8698         if not isinstance(disk_op, int):
8699           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8700         if not isinstance(disk_dict, dict):
8701           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8702           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8703
8704       if disk_op == constants.DDM_ADD:
8705         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8706         if mode not in constants.DISK_ACCESS_SET:
8707           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8708                                      errors.ECODE_INVAL)
8709         size = disk_dict.get('size', None)
8710         if size is None:
8711           raise errors.OpPrereqError("Required disk parameter size missing",
8712                                      errors.ECODE_INVAL)
8713         try:
8714           size = int(size)
8715         except (TypeError, ValueError), err:
8716           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8717                                      str(err), errors.ECODE_INVAL)
8718         disk_dict['size'] = size
8719       else:
8720         # modification of disk
8721         if 'size' in disk_dict:
8722           raise errors.OpPrereqError("Disk size change not possible, use"
8723                                      " grow-disk", errors.ECODE_INVAL)
8724
8725     if disk_addremove > 1:
8726       raise errors.OpPrereqError("Only one disk add or remove operation"
8727                                  " supported at a time", errors.ECODE_INVAL)
8728
8729     if self.op.disks and self.op.disk_template is not None:
8730       raise errors.OpPrereqError("Disk template conversion and other disk"
8731                                  " changes not supported at the same time",
8732                                  errors.ECODE_INVAL)
8733
8734     if self.op.disk_template:
8735       _CheckDiskTemplate(self.op.disk_template)
8736       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8737           self.op.remote_node is None):
8738         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8739                                    " one requires specifying a secondary node",
8740                                    errors.ECODE_INVAL)
8741
8742     # NIC validation
8743     nic_addremove = 0
8744     for nic_op, nic_dict in self.op.nics:
8745       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8746       if nic_op == constants.DDM_REMOVE:
8747         nic_addremove += 1
8748         continue
8749       elif nic_op == constants.DDM_ADD:
8750         nic_addremove += 1
8751       else:
8752         if not isinstance(nic_op, int):
8753           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8754         if not isinstance(nic_dict, dict):
8755           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8756           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8757
8758       # nic_dict should be a dict
8759       nic_ip = nic_dict.get('ip', None)
8760       if nic_ip is not None:
8761         if nic_ip.lower() == constants.VALUE_NONE:
8762           nic_dict['ip'] = None
8763         else:
8764           if not netutils.IsValidIP4(nic_ip):
8765             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8766                                        errors.ECODE_INVAL)
8767
8768       nic_bridge = nic_dict.get('bridge', None)
8769       nic_link = nic_dict.get('link', None)
8770       if nic_bridge and nic_link:
8771         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8772                                    " at the same time", errors.ECODE_INVAL)
8773       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8774         nic_dict['bridge'] = None
8775       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8776         nic_dict['link'] = None
8777
8778       if nic_op == constants.DDM_ADD:
8779         nic_mac = nic_dict.get('mac', None)
8780         if nic_mac is None:
8781           nic_dict['mac'] = constants.VALUE_AUTO
8782
8783       if 'mac' in nic_dict:
8784         nic_mac = nic_dict['mac']
8785         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8786           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8787
8788         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8789           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8790                                      " modifying an existing nic",
8791                                      errors.ECODE_INVAL)
8792
8793     if nic_addremove > 1:
8794       raise errors.OpPrereqError("Only one NIC add or remove operation"
8795                                  " supported at a time", errors.ECODE_INVAL)
8796
8797   def ExpandNames(self):
8798     self._ExpandAndLockInstance()
8799     self.needed_locks[locking.LEVEL_NODE] = []
8800     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8801
8802   def DeclareLocks(self, level):
8803     if level == locking.LEVEL_NODE:
8804       self._LockInstancesNodes()
8805       if self.op.disk_template and self.op.remote_node:
8806         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8807         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8808
8809   def BuildHooksEnv(self):
8810     """Build hooks env.
8811
8812     This runs on the master, primary and secondaries.
8813
8814     """
8815     args = dict()
8816     if constants.BE_MEMORY in self.be_new:
8817       args['memory'] = self.be_new[constants.BE_MEMORY]
8818     if constants.BE_VCPUS in self.be_new:
8819       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8820     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8821     # information at all.
8822     if self.op.nics:
8823       args['nics'] = []
8824       nic_override = dict(self.op.nics)
8825       for idx, nic in enumerate(self.instance.nics):
8826         if idx in nic_override:
8827           this_nic_override = nic_override[idx]
8828         else:
8829           this_nic_override = {}
8830         if 'ip' in this_nic_override:
8831           ip = this_nic_override['ip']
8832         else:
8833           ip = nic.ip
8834         if 'mac' in this_nic_override:
8835           mac = this_nic_override['mac']
8836         else:
8837           mac = nic.mac
8838         if idx in self.nic_pnew:
8839           nicparams = self.nic_pnew[idx]
8840         else:
8841           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8842         mode = nicparams[constants.NIC_MODE]
8843         link = nicparams[constants.NIC_LINK]
8844         args['nics'].append((ip, mac, mode, link))
8845       if constants.DDM_ADD in nic_override:
8846         ip = nic_override[constants.DDM_ADD].get('ip', None)
8847         mac = nic_override[constants.DDM_ADD]['mac']
8848         nicparams = self.nic_pnew[constants.DDM_ADD]
8849         mode = nicparams[constants.NIC_MODE]
8850         link = nicparams[constants.NIC_LINK]
8851         args['nics'].append((ip, mac, mode, link))
8852       elif constants.DDM_REMOVE in nic_override:
8853         del args['nics'][-1]
8854
8855     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8856     if self.op.disk_template:
8857       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8858     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8859     return env, nl, nl
8860
8861   def CheckPrereq(self):
8862     """Check prerequisites.
8863
8864     This only checks the instance list against the existing names.
8865
8866     """
8867     # checking the new params on the primary/secondary nodes
8868
8869     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8870     cluster = self.cluster = self.cfg.GetClusterInfo()
8871     assert self.instance is not None, \
8872       "Cannot retrieve locked instance %s" % self.op.instance_name
8873     pnode = instance.primary_node
8874     nodelist = list(instance.all_nodes)
8875
8876     # OS change
8877     if self.op.os_name and not self.op.force:
8878       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8879                       self.op.force_variant)
8880       instance_os = self.op.os_name
8881     else:
8882       instance_os = instance.os
8883
8884     if self.op.disk_template:
8885       if instance.disk_template == self.op.disk_template:
8886         raise errors.OpPrereqError("Instance already has disk template %s" %
8887                                    instance.disk_template, errors.ECODE_INVAL)
8888
8889       if (instance.disk_template,
8890           self.op.disk_template) not in self._DISK_CONVERSIONS:
8891         raise errors.OpPrereqError("Unsupported disk template conversion from"
8892                                    " %s to %s" % (instance.disk_template,
8893                                                   self.op.disk_template),
8894                                    errors.ECODE_INVAL)
8895       _CheckInstanceDown(self, instance, "cannot change disk template")
8896       if self.op.disk_template in constants.DTS_NET_MIRROR:
8897         if self.op.remote_node == pnode:
8898           raise errors.OpPrereqError("Given new secondary node %s is the same"
8899                                      " as the primary node of the instance" %
8900                                      self.op.remote_node, errors.ECODE_STATE)
8901         _CheckNodeOnline(self, self.op.remote_node)
8902         _CheckNodeNotDrained(self, self.op.remote_node)
8903         disks = [{"size": d.size} for d in instance.disks]
8904         required = _ComputeDiskSize(self.op.disk_template, disks)
8905         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8906
8907     # hvparams processing
8908     if self.op.hvparams:
8909       hv_type = instance.hypervisor
8910       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8911       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8912       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8913
8914       # local check
8915       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8916       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8917       self.hv_new = hv_new # the new actual values
8918       self.hv_inst = i_hvdict # the new dict (without defaults)
8919     else:
8920       self.hv_new = self.hv_inst = {}
8921
8922     # beparams processing
8923     if self.op.beparams:
8924       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8925                                    use_none=True)
8926       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8927       be_new = cluster.SimpleFillBE(i_bedict)
8928       self.be_new = be_new # the new actual values
8929       self.be_inst = i_bedict # the new dict (without defaults)
8930     else:
8931       self.be_new = self.be_inst = {}
8932
8933     # osparams processing
8934     if self.op.osparams:
8935       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8936       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8937       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8938       self.os_inst = i_osdict # the new dict (without defaults)
8939     else:
8940       self.os_new = self.os_inst = {}
8941
8942     self.warn = []
8943
8944     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8945       mem_check_list = [pnode]
8946       if be_new[constants.BE_AUTO_BALANCE]:
8947         # either we changed auto_balance to yes or it was from before
8948         mem_check_list.extend(instance.secondary_nodes)
8949       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8950                                                   instance.hypervisor)
8951       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8952                                          instance.hypervisor)
8953       pninfo = nodeinfo[pnode]
8954       msg = pninfo.fail_msg
8955       if msg:
8956         # Assume the primary node is unreachable and go ahead
8957         self.warn.append("Can't get info from primary node %s: %s" %
8958                          (pnode,  msg))
8959       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8960         self.warn.append("Node data from primary node %s doesn't contain"
8961                          " free memory information" % pnode)
8962       elif instance_info.fail_msg:
8963         self.warn.append("Can't get instance runtime information: %s" %
8964                         instance_info.fail_msg)
8965       else:
8966         if instance_info.payload:
8967           current_mem = int(instance_info.payload['memory'])
8968         else:
8969           # Assume instance not running
8970           # (there is a slight race condition here, but it's not very probable,
8971           # and we have no other way to check)
8972           current_mem = 0
8973         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8974                     pninfo.payload['memory_free'])
8975         if miss_mem > 0:
8976           raise errors.OpPrereqError("This change will prevent the instance"
8977                                      " from starting, due to %d MB of memory"
8978                                      " missing on its primary node" % miss_mem,
8979                                      errors.ECODE_NORES)
8980
8981       if be_new[constants.BE_AUTO_BALANCE]:
8982         for node, nres in nodeinfo.items():
8983           if node not in instance.secondary_nodes:
8984             continue
8985           msg = nres.fail_msg
8986           if msg:
8987             self.warn.append("Can't get info from secondary node %s: %s" %
8988                              (node, msg))
8989           elif not isinstance(nres.payload.get('memory_free', None), int):
8990             self.warn.append("Secondary node %s didn't return free"
8991                              " memory information" % node)
8992           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8993             self.warn.append("Not enough memory to failover instance to"
8994                              " secondary node %s" % node)
8995
8996     # NIC processing
8997     self.nic_pnew = {}
8998     self.nic_pinst = {}
8999     for nic_op, nic_dict in self.op.nics:
9000       if nic_op == constants.DDM_REMOVE:
9001         if not instance.nics:
9002           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9003                                      errors.ECODE_INVAL)
9004         continue
9005       if nic_op != constants.DDM_ADD:
9006         # an existing nic
9007         if not instance.nics:
9008           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9009                                      " no NICs" % nic_op,
9010                                      errors.ECODE_INVAL)
9011         if nic_op < 0 or nic_op >= len(instance.nics):
9012           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9013                                      " are 0 to %d" %
9014                                      (nic_op, len(instance.nics) - 1),
9015                                      errors.ECODE_INVAL)
9016         old_nic_params = instance.nics[nic_op].nicparams
9017         old_nic_ip = instance.nics[nic_op].ip
9018       else:
9019         old_nic_params = {}
9020         old_nic_ip = None
9021
9022       update_params_dict = dict([(key, nic_dict[key])
9023                                  for key in constants.NICS_PARAMETERS
9024                                  if key in nic_dict])
9025
9026       if 'bridge' in nic_dict:
9027         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9028
9029       new_nic_params = _GetUpdatedParams(old_nic_params,
9030                                          update_params_dict)
9031       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9032       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9033       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9034       self.nic_pinst[nic_op] = new_nic_params
9035       self.nic_pnew[nic_op] = new_filled_nic_params
9036       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9037
9038       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9039         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9040         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9041         if msg:
9042           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9043           if self.op.force:
9044             self.warn.append(msg)
9045           else:
9046             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9047       if new_nic_mode == constants.NIC_MODE_ROUTED:
9048         if 'ip' in nic_dict:
9049           nic_ip = nic_dict['ip']
9050         else:
9051           nic_ip = old_nic_ip
9052         if nic_ip is None:
9053           raise errors.OpPrereqError('Cannot set the nic ip to None'
9054                                      ' on a routed nic', errors.ECODE_INVAL)
9055       if 'mac' in nic_dict:
9056         nic_mac = nic_dict['mac']
9057         if nic_mac is None:
9058           raise errors.OpPrereqError('Cannot set the nic mac to None',
9059                                      errors.ECODE_INVAL)
9060         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9061           # otherwise generate the mac
9062           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9063         else:
9064           # or validate/reserve the current one
9065           try:
9066             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9067           except errors.ReservationError:
9068             raise errors.OpPrereqError("MAC address %s already in use"
9069                                        " in cluster" % nic_mac,
9070                                        errors.ECODE_NOTUNIQUE)
9071
9072     # DISK processing
9073     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9074       raise errors.OpPrereqError("Disk operations not supported for"
9075                                  " diskless instances",
9076                                  errors.ECODE_INVAL)
9077     for disk_op, _ in self.op.disks:
9078       if disk_op == constants.DDM_REMOVE:
9079         if len(instance.disks) == 1:
9080           raise errors.OpPrereqError("Cannot remove the last disk of"
9081                                      " an instance", errors.ECODE_INVAL)
9082         _CheckInstanceDown(self, instance, "cannot remove disks")
9083
9084       if (disk_op == constants.DDM_ADD and
9085           len(instance.nics) >= constants.MAX_DISKS):
9086         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9087                                    " add more" % constants.MAX_DISKS,
9088                                    errors.ECODE_STATE)
9089       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9090         # an existing disk
9091         if disk_op < 0 or disk_op >= len(instance.disks):
9092           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9093                                      " are 0 to %d" %
9094                                      (disk_op, len(instance.disks)),
9095                                      errors.ECODE_INVAL)
9096
9097     return
9098
9099   def _ConvertPlainToDrbd(self, feedback_fn):
9100     """Converts an instance from plain to drbd.
9101
9102     """
9103     feedback_fn("Converting template to drbd")
9104     instance = self.instance
9105     pnode = instance.primary_node
9106     snode = self.op.remote_node
9107
9108     # create a fake disk info for _GenerateDiskTemplate
9109     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9110     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9111                                       instance.name, pnode, [snode],
9112                                       disk_info, None, None, 0)
9113     info = _GetInstanceInfoText(instance)
9114     feedback_fn("Creating aditional volumes...")
9115     # first, create the missing data and meta devices
9116     for disk in new_disks:
9117       # unfortunately this is... not too nice
9118       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9119                             info, True)
9120       for child in disk.children:
9121         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9122     # at this stage, all new LVs have been created, we can rename the
9123     # old ones
9124     feedback_fn("Renaming original volumes...")
9125     rename_list = [(o, n.children[0].logical_id)
9126                    for (o, n) in zip(instance.disks, new_disks)]
9127     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9128     result.Raise("Failed to rename original LVs")
9129
9130     feedback_fn("Initializing DRBD devices...")
9131     # all child devices are in place, we can now create the DRBD devices
9132     for disk in new_disks:
9133       for node in [pnode, snode]:
9134         f_create = node == pnode
9135         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9136
9137     # at this point, the instance has been modified
9138     instance.disk_template = constants.DT_DRBD8
9139     instance.disks = new_disks
9140     self.cfg.Update(instance, feedback_fn)
9141
9142     # disks are created, waiting for sync
9143     disk_abort = not _WaitForSync(self, instance)
9144     if disk_abort:
9145       raise errors.OpExecError("There are some degraded disks for"
9146                                " this instance, please cleanup manually")
9147
9148   def _ConvertDrbdToPlain(self, feedback_fn):
9149     """Converts an instance from drbd to plain.
9150
9151     """
9152     instance = self.instance
9153     assert len(instance.secondary_nodes) == 1
9154     pnode = instance.primary_node
9155     snode = instance.secondary_nodes[0]
9156     feedback_fn("Converting template to plain")
9157
9158     old_disks = instance.disks
9159     new_disks = [d.children[0] for d in old_disks]
9160
9161     # copy over size and mode
9162     for parent, child in zip(old_disks, new_disks):
9163       child.size = parent.size
9164       child.mode = parent.mode
9165
9166     # update instance structure
9167     instance.disks = new_disks
9168     instance.disk_template = constants.DT_PLAIN
9169     self.cfg.Update(instance, feedback_fn)
9170
9171     feedback_fn("Removing volumes on the secondary node...")
9172     for disk in old_disks:
9173       self.cfg.SetDiskID(disk, snode)
9174       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9175       if msg:
9176         self.LogWarning("Could not remove block device %s on node %s,"
9177                         " continuing anyway: %s", disk.iv_name, snode, msg)
9178
9179     feedback_fn("Removing unneeded volumes on the primary node...")
9180     for idx, disk in enumerate(old_disks):
9181       meta = disk.children[1]
9182       self.cfg.SetDiskID(meta, pnode)
9183       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9184       if msg:
9185         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9186                         " continuing anyway: %s", idx, pnode, msg)
9187
9188
9189   def Exec(self, feedback_fn):
9190     """Modifies an instance.
9191
9192     All parameters take effect only at the next restart of the instance.
9193
9194     """
9195     # Process here the warnings from CheckPrereq, as we don't have a
9196     # feedback_fn there.
9197     for warn in self.warn:
9198       feedback_fn("WARNING: %s" % warn)
9199
9200     result = []
9201     instance = self.instance
9202     # disk changes
9203     for disk_op, disk_dict in self.op.disks:
9204       if disk_op == constants.DDM_REMOVE:
9205         # remove the last disk
9206         device = instance.disks.pop()
9207         device_idx = len(instance.disks)
9208         for node, disk in device.ComputeNodeTree(instance.primary_node):
9209           self.cfg.SetDiskID(disk, node)
9210           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9211           if msg:
9212             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9213                             " continuing anyway", device_idx, node, msg)
9214         result.append(("disk/%d" % device_idx, "remove"))
9215       elif disk_op == constants.DDM_ADD:
9216         # add a new disk
9217         if instance.disk_template == constants.DT_FILE:
9218           file_driver, file_path = instance.disks[0].logical_id
9219           file_path = os.path.dirname(file_path)
9220         else:
9221           file_driver = file_path = None
9222         disk_idx_base = len(instance.disks)
9223         new_disk = _GenerateDiskTemplate(self,
9224                                          instance.disk_template,
9225                                          instance.name, instance.primary_node,
9226                                          instance.secondary_nodes,
9227                                          [disk_dict],
9228                                          file_path,
9229                                          file_driver,
9230                                          disk_idx_base)[0]
9231         instance.disks.append(new_disk)
9232         info = _GetInstanceInfoText(instance)
9233
9234         logging.info("Creating volume %s for instance %s",
9235                      new_disk.iv_name, instance.name)
9236         # Note: this needs to be kept in sync with _CreateDisks
9237         #HARDCODE
9238         for node in instance.all_nodes:
9239           f_create = node == instance.primary_node
9240           try:
9241             _CreateBlockDev(self, node, instance, new_disk,
9242                             f_create, info, f_create)
9243           except errors.OpExecError, err:
9244             self.LogWarning("Failed to create volume %s (%s) on"
9245                             " node %s: %s",
9246                             new_disk.iv_name, new_disk, node, err)
9247         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9248                        (new_disk.size, new_disk.mode)))
9249       else:
9250         # change a given disk
9251         instance.disks[disk_op].mode = disk_dict['mode']
9252         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9253
9254     if self.op.disk_template:
9255       r_shut = _ShutdownInstanceDisks(self, instance)
9256       if not r_shut:
9257         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9258                                  " proceed with disk template conversion")
9259       mode = (instance.disk_template, self.op.disk_template)
9260       try:
9261         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9262       except:
9263         self.cfg.ReleaseDRBDMinors(instance.name)
9264         raise
9265       result.append(("disk_template", self.op.disk_template))
9266
9267     # NIC changes
9268     for nic_op, nic_dict in self.op.nics:
9269       if nic_op == constants.DDM_REMOVE:
9270         # remove the last nic
9271         del instance.nics[-1]
9272         result.append(("nic.%d" % len(instance.nics), "remove"))
9273       elif nic_op == constants.DDM_ADD:
9274         # mac and bridge should be set, by now
9275         mac = nic_dict['mac']
9276         ip = nic_dict.get('ip', None)
9277         nicparams = self.nic_pinst[constants.DDM_ADD]
9278         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9279         instance.nics.append(new_nic)
9280         result.append(("nic.%d" % (len(instance.nics) - 1),
9281                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9282                        (new_nic.mac, new_nic.ip,
9283                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9284                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9285                        )))
9286       else:
9287         for key in 'mac', 'ip':
9288           if key in nic_dict:
9289             setattr(instance.nics[nic_op], key, nic_dict[key])
9290         if nic_op in self.nic_pinst:
9291           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9292         for key, val in nic_dict.iteritems():
9293           result.append(("nic.%s/%d" % (key, nic_op), val))
9294
9295     # hvparams changes
9296     if self.op.hvparams:
9297       instance.hvparams = self.hv_inst
9298       for key, val in self.op.hvparams.iteritems():
9299         result.append(("hv/%s" % key, val))
9300
9301     # beparams changes
9302     if self.op.beparams:
9303       instance.beparams = self.be_inst
9304       for key, val in self.op.beparams.iteritems():
9305         result.append(("be/%s" % key, val))
9306
9307     # OS change
9308     if self.op.os_name:
9309       instance.os = self.op.os_name
9310
9311     # osparams changes
9312     if self.op.osparams:
9313       instance.osparams = self.os_inst
9314       for key, val in self.op.osparams.iteritems():
9315         result.append(("os/%s" % key, val))
9316
9317     self.cfg.Update(instance, feedback_fn)
9318
9319     return result
9320
9321   _DISK_CONVERSIONS = {
9322     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9323     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9324     }
9325
9326
9327 class LUQueryExports(NoHooksLU):
9328   """Query the exports list
9329
9330   """
9331   _OP_PARAMS = [
9332     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9333     ("use_locking", False, _TBool),
9334     ]
9335   REQ_BGL = False
9336
9337   def ExpandNames(self):
9338     self.needed_locks = {}
9339     self.share_locks[locking.LEVEL_NODE] = 1
9340     if not self.op.nodes:
9341       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9342     else:
9343       self.needed_locks[locking.LEVEL_NODE] = \
9344         _GetWantedNodes(self, self.op.nodes)
9345
9346   def Exec(self, feedback_fn):
9347     """Compute the list of all the exported system images.
9348
9349     @rtype: dict
9350     @return: a dictionary with the structure node->(export-list)
9351         where export-list is a list of the instances exported on
9352         that node.
9353
9354     """
9355     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9356     rpcresult = self.rpc.call_export_list(self.nodes)
9357     result = {}
9358     for node in rpcresult:
9359       if rpcresult[node].fail_msg:
9360         result[node] = False
9361       else:
9362         result[node] = rpcresult[node].payload
9363
9364     return result
9365
9366
9367 class LUPrepareExport(NoHooksLU):
9368   """Prepares an instance for an export and returns useful information.
9369
9370   """
9371   _OP_PARAMS = [
9372     _PInstanceName,
9373     ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9374     ]
9375   REQ_BGL = False
9376
9377   def ExpandNames(self):
9378     self._ExpandAndLockInstance()
9379
9380   def CheckPrereq(self):
9381     """Check prerequisites.
9382
9383     """
9384     instance_name = self.op.instance_name
9385
9386     self.instance = self.cfg.GetInstanceInfo(instance_name)
9387     assert self.instance is not None, \
9388           "Cannot retrieve locked instance %s" % self.op.instance_name
9389     _CheckNodeOnline(self, self.instance.primary_node)
9390
9391     self._cds = _GetClusterDomainSecret()
9392
9393   def Exec(self, feedback_fn):
9394     """Prepares an instance for an export.
9395
9396     """
9397     instance = self.instance
9398
9399     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9400       salt = utils.GenerateSecret(8)
9401
9402       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9403       result = self.rpc.call_x509_cert_create(instance.primary_node,
9404                                               constants.RIE_CERT_VALIDITY)
9405       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9406
9407       (name, cert_pem) = result.payload
9408
9409       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9410                                              cert_pem)
9411
9412       return {
9413         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9414         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9415                           salt),
9416         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9417         }
9418
9419     return None
9420
9421
9422 class LUExportInstance(LogicalUnit):
9423   """Export an instance to an image in the cluster.
9424
9425   """
9426   HPATH = "instance-export"
9427   HTYPE = constants.HTYPE_INSTANCE
9428   _OP_PARAMS = [
9429     _PInstanceName,
9430     ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9431     ("shutdown", True, _TBool),
9432     _PShutdownTimeout,
9433     ("remove_instance", False, _TBool),
9434     ("ignore_remove_failures", False, _TBool),
9435     ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9436     ("x509_key_name", None, _TOr(_TList, _TNone)),
9437     ("destination_x509_ca", None, _TMaybeString),
9438     ]
9439   REQ_BGL = False
9440
9441   def CheckArguments(self):
9442     """Check the arguments.
9443
9444     """
9445     self.x509_key_name = self.op.x509_key_name
9446     self.dest_x509_ca_pem = self.op.destination_x509_ca
9447
9448     if self.op.remove_instance and not self.op.shutdown:
9449       raise errors.OpPrereqError("Can not remove instance without shutting it"
9450                                  " down before")
9451
9452     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9453       if not self.x509_key_name:
9454         raise errors.OpPrereqError("Missing X509 key name for encryption",
9455                                    errors.ECODE_INVAL)
9456
9457       if not self.dest_x509_ca_pem:
9458         raise errors.OpPrereqError("Missing destination X509 CA",
9459                                    errors.ECODE_INVAL)
9460
9461   def ExpandNames(self):
9462     self._ExpandAndLockInstance()
9463
9464     # Lock all nodes for local exports
9465     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9466       # FIXME: lock only instance primary and destination node
9467       #
9468       # Sad but true, for now we have do lock all nodes, as we don't know where
9469       # the previous export might be, and in this LU we search for it and
9470       # remove it from its current node. In the future we could fix this by:
9471       #  - making a tasklet to search (share-lock all), then create the
9472       #    new one, then one to remove, after
9473       #  - removing the removal operation altogether
9474       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9475
9476   def DeclareLocks(self, level):
9477     """Last minute lock declaration."""
9478     # All nodes are locked anyway, so nothing to do here.
9479
9480   def BuildHooksEnv(self):
9481     """Build hooks env.
9482
9483     This will run on the master, primary node and target node.
9484
9485     """
9486     env = {
9487       "EXPORT_MODE": self.op.mode,
9488       "EXPORT_NODE": self.op.target_node,
9489       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9490       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9491       # TODO: Generic function for boolean env variables
9492       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9493       }
9494
9495     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9496
9497     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9498
9499     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9500       nl.append(self.op.target_node)
9501
9502     return env, nl, nl
9503
9504   def CheckPrereq(self):
9505     """Check prerequisites.
9506
9507     This checks that the instance and node names are valid.
9508
9509     """
9510     instance_name = self.op.instance_name
9511
9512     self.instance = self.cfg.GetInstanceInfo(instance_name)
9513     assert self.instance is not None, \
9514           "Cannot retrieve locked instance %s" % self.op.instance_name
9515     _CheckNodeOnline(self, self.instance.primary_node)
9516
9517     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9518       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9519       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9520       assert self.dst_node is not None
9521
9522       _CheckNodeOnline(self, self.dst_node.name)
9523       _CheckNodeNotDrained(self, self.dst_node.name)
9524
9525       self._cds = None
9526       self.dest_disk_info = None
9527       self.dest_x509_ca = None
9528
9529     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9530       self.dst_node = None
9531
9532       if len(self.op.target_node) != len(self.instance.disks):
9533         raise errors.OpPrereqError(("Received destination information for %s"
9534                                     " disks, but instance %s has %s disks") %
9535                                    (len(self.op.target_node), instance_name,
9536                                     len(self.instance.disks)),
9537                                    errors.ECODE_INVAL)
9538
9539       cds = _GetClusterDomainSecret()
9540
9541       # Check X509 key name
9542       try:
9543         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9544       except (TypeError, ValueError), err:
9545         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9546
9547       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9548         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9549                                    errors.ECODE_INVAL)
9550
9551       # Load and verify CA
9552       try:
9553         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9554       except OpenSSL.crypto.Error, err:
9555         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9556                                    (err, ), errors.ECODE_INVAL)
9557
9558       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9559       if errcode is not None:
9560         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9561                                    (msg, ), errors.ECODE_INVAL)
9562
9563       self.dest_x509_ca = cert
9564
9565       # Verify target information
9566       disk_info = []
9567       for idx, disk_data in enumerate(self.op.target_node):
9568         try:
9569           (host, port, magic) = \
9570             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9571         except errors.GenericError, err:
9572           raise errors.OpPrereqError("Target info for disk %s: %s" %
9573                                      (idx, err), errors.ECODE_INVAL)
9574
9575         disk_info.append((host, port, magic))
9576
9577       assert len(disk_info) == len(self.op.target_node)
9578       self.dest_disk_info = disk_info
9579
9580     else:
9581       raise errors.ProgrammerError("Unhandled export mode %r" %
9582                                    self.op.mode)
9583
9584     # instance disk type verification
9585     # TODO: Implement export support for file-based disks
9586     for disk in self.instance.disks:
9587       if disk.dev_type == constants.LD_FILE:
9588         raise errors.OpPrereqError("Export not supported for instances with"
9589                                    " file-based disks", errors.ECODE_INVAL)
9590
9591   def _CleanupExports(self, feedback_fn):
9592     """Removes exports of current instance from all other nodes.
9593
9594     If an instance in a cluster with nodes A..D was exported to node C, its
9595     exports will be removed from the nodes A, B and D.
9596
9597     """
9598     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9599
9600     nodelist = self.cfg.GetNodeList()
9601     nodelist.remove(self.dst_node.name)
9602
9603     # on one-node clusters nodelist will be empty after the removal
9604     # if we proceed the backup would be removed because OpQueryExports
9605     # substitutes an empty list with the full cluster node list.
9606     iname = self.instance.name
9607     if nodelist:
9608       feedback_fn("Removing old exports for instance %s" % iname)
9609       exportlist = self.rpc.call_export_list(nodelist)
9610       for node in exportlist:
9611         if exportlist[node].fail_msg:
9612           continue
9613         if iname in exportlist[node].payload:
9614           msg = self.rpc.call_export_remove(node, iname).fail_msg
9615           if msg:
9616             self.LogWarning("Could not remove older export for instance %s"
9617                             " on node %s: %s", iname, node, msg)
9618
9619   def Exec(self, feedback_fn):
9620     """Export an instance to an image in the cluster.
9621
9622     """
9623     assert self.op.mode in constants.EXPORT_MODES
9624
9625     instance = self.instance
9626     src_node = instance.primary_node
9627
9628     if self.op.shutdown:
9629       # shutdown the instance, but not the disks
9630       feedback_fn("Shutting down instance %s" % instance.name)
9631       result = self.rpc.call_instance_shutdown(src_node, instance,
9632                                                self.op.shutdown_timeout)
9633       # TODO: Maybe ignore failures if ignore_remove_failures is set
9634       result.Raise("Could not shutdown instance %s on"
9635                    " node %s" % (instance.name, src_node))
9636
9637     # set the disks ID correctly since call_instance_start needs the
9638     # correct drbd minor to create the symlinks
9639     for disk in instance.disks:
9640       self.cfg.SetDiskID(disk, src_node)
9641
9642     activate_disks = (not instance.admin_up)
9643
9644     if activate_disks:
9645       # Activate the instance disks if we'exporting a stopped instance
9646       feedback_fn("Activating disks for %s" % instance.name)
9647       _StartInstanceDisks(self, instance, None)
9648
9649     try:
9650       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9651                                                      instance)
9652
9653       helper.CreateSnapshots()
9654       try:
9655         if (self.op.shutdown and instance.admin_up and
9656             not self.op.remove_instance):
9657           assert not activate_disks
9658           feedback_fn("Starting instance %s" % instance.name)
9659           result = self.rpc.call_instance_start(src_node, instance, None, None)
9660           msg = result.fail_msg
9661           if msg:
9662             feedback_fn("Failed to start instance: %s" % msg)
9663             _ShutdownInstanceDisks(self, instance)
9664             raise errors.OpExecError("Could not start instance: %s" % msg)
9665
9666         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9667           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9668         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9669           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9670           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9671
9672           (key_name, _, _) = self.x509_key_name
9673
9674           dest_ca_pem = \
9675             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9676                                             self.dest_x509_ca)
9677
9678           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9679                                                      key_name, dest_ca_pem,
9680                                                      timeouts)
9681       finally:
9682         helper.Cleanup()
9683
9684       # Check for backwards compatibility
9685       assert len(dresults) == len(instance.disks)
9686       assert compat.all(isinstance(i, bool) for i in dresults), \
9687              "Not all results are boolean: %r" % dresults
9688
9689     finally:
9690       if activate_disks:
9691         feedback_fn("Deactivating disks for %s" % instance.name)
9692         _ShutdownInstanceDisks(self, instance)
9693
9694     if not (compat.all(dresults) and fin_resu):
9695       failures = []
9696       if not fin_resu:
9697         failures.append("export finalization")
9698       if not compat.all(dresults):
9699         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9700                                if not dsk)
9701         failures.append("disk export: disk(s) %s" % fdsk)
9702
9703       raise errors.OpExecError("Export failed, errors in %s" %
9704                                utils.CommaJoin(failures))
9705
9706     # At this point, the export was successful, we can cleanup/finish
9707
9708     # Remove instance if requested
9709     if self.op.remove_instance:
9710       feedback_fn("Removing instance %s" % instance.name)
9711       _RemoveInstance(self, feedback_fn, instance,
9712                       self.op.ignore_remove_failures)
9713
9714     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9715       self._CleanupExports(feedback_fn)
9716
9717     return fin_resu, dresults
9718
9719
9720 class LURemoveExport(NoHooksLU):
9721   """Remove exports related to the named instance.
9722
9723   """
9724   _OP_PARAMS = [
9725     _PInstanceName,
9726     ]
9727   REQ_BGL = False
9728
9729   def ExpandNames(self):
9730     self.needed_locks = {}
9731     # We need all nodes to be locked in order for RemoveExport to work, but we
9732     # don't need to lock the instance itself, as nothing will happen to it (and
9733     # we can remove exports also for a removed instance)
9734     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9735
9736   def Exec(self, feedback_fn):
9737     """Remove any export.
9738
9739     """
9740     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9741     # If the instance was not found we'll try with the name that was passed in.
9742     # This will only work if it was an FQDN, though.
9743     fqdn_warn = False
9744     if not instance_name:
9745       fqdn_warn = True
9746       instance_name = self.op.instance_name
9747
9748     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9749     exportlist = self.rpc.call_export_list(locked_nodes)
9750     found = False
9751     for node in exportlist:
9752       msg = exportlist[node].fail_msg
9753       if msg:
9754         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9755         continue
9756       if instance_name in exportlist[node].payload:
9757         found = True
9758         result = self.rpc.call_export_remove(node, instance_name)
9759         msg = result.fail_msg
9760         if msg:
9761           logging.error("Could not remove export for instance %s"
9762                         " on node %s: %s", instance_name, node, msg)
9763
9764     if fqdn_warn and not found:
9765       feedback_fn("Export not found. If trying to remove an export belonging"
9766                   " to a deleted instance please use its Fully Qualified"
9767                   " Domain Name.")
9768
9769
9770 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9771   """Generic tags LU.
9772
9773   This is an abstract class which is the parent of all the other tags LUs.
9774
9775   """
9776
9777   def ExpandNames(self):
9778     self.needed_locks = {}
9779     if self.op.kind == constants.TAG_NODE:
9780       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9781       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9782     elif self.op.kind == constants.TAG_INSTANCE:
9783       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9784       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9785
9786     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
9787     # not possible to acquire the BGL based on opcode parameters)
9788
9789   def CheckPrereq(self):
9790     """Check prerequisites.
9791
9792     """
9793     if self.op.kind == constants.TAG_CLUSTER:
9794       self.target = self.cfg.GetClusterInfo()
9795     elif self.op.kind == constants.TAG_NODE:
9796       self.target = self.cfg.GetNodeInfo(self.op.name)
9797     elif self.op.kind == constants.TAG_INSTANCE:
9798       self.target = self.cfg.GetInstanceInfo(self.op.name)
9799     else:
9800       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9801                                  str(self.op.kind), errors.ECODE_INVAL)
9802
9803
9804 class LUGetTags(TagsLU):
9805   """Returns the tags of a given object.
9806
9807   """
9808   _OP_PARAMS = [
9809     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9810     # Name is only meaningful for nodes and instances
9811     ("name", _NoDefault, _TMaybeString),
9812     ]
9813   REQ_BGL = False
9814
9815   def ExpandNames(self):
9816     TagsLU.ExpandNames(self)
9817
9818     # Share locks as this is only a read operation
9819     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9820
9821   def Exec(self, feedback_fn):
9822     """Returns the tag list.
9823
9824     """
9825     return list(self.target.GetTags())
9826
9827
9828 class LUSearchTags(NoHooksLU):
9829   """Searches the tags for a given pattern.
9830
9831   """
9832   _OP_PARAMS = [
9833     ("pattern", _NoDefault, _TNonEmptyString),
9834     ]
9835   REQ_BGL = False
9836
9837   def ExpandNames(self):
9838     self.needed_locks = {}
9839
9840   def CheckPrereq(self):
9841     """Check prerequisites.
9842
9843     This checks the pattern passed for validity by compiling it.
9844
9845     """
9846     try:
9847       self.re = re.compile(self.op.pattern)
9848     except re.error, err:
9849       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9850                                  (self.op.pattern, err), errors.ECODE_INVAL)
9851
9852   def Exec(self, feedback_fn):
9853     """Returns the tag list.
9854
9855     """
9856     cfg = self.cfg
9857     tgts = [("/cluster", cfg.GetClusterInfo())]
9858     ilist = cfg.GetAllInstancesInfo().values()
9859     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9860     nlist = cfg.GetAllNodesInfo().values()
9861     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9862     results = []
9863     for path, target in tgts:
9864       for tag in target.GetTags():
9865         if self.re.search(tag):
9866           results.append((path, tag))
9867     return results
9868
9869
9870 class LUAddTags(TagsLU):
9871   """Sets a tag on a given object.
9872
9873   """
9874   _OP_PARAMS = [
9875     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9876     # Name is only meaningful for nodes and instances
9877     ("name", _NoDefault, _TMaybeString),
9878     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9879     ]
9880   REQ_BGL = False
9881
9882   def CheckPrereq(self):
9883     """Check prerequisites.
9884
9885     This checks the type and length of the tag name and value.
9886
9887     """
9888     TagsLU.CheckPrereq(self)
9889     for tag in self.op.tags:
9890       objects.TaggableObject.ValidateTag(tag)
9891
9892   def Exec(self, feedback_fn):
9893     """Sets the tag.
9894
9895     """
9896     try:
9897       for tag in self.op.tags:
9898         self.target.AddTag(tag)
9899     except errors.TagError, err:
9900       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9901     self.cfg.Update(self.target, feedback_fn)
9902
9903
9904 class LUDelTags(TagsLU):
9905   """Delete a list of tags from a given object.
9906
9907   """
9908   _OP_PARAMS = [
9909     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9910     # Name is only meaningful for nodes and instances
9911     ("name", _NoDefault, _TMaybeString),
9912     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9913     ]
9914   REQ_BGL = False
9915
9916   def CheckPrereq(self):
9917     """Check prerequisites.
9918
9919     This checks that we have the given tag.
9920
9921     """
9922     TagsLU.CheckPrereq(self)
9923     for tag in self.op.tags:
9924       objects.TaggableObject.ValidateTag(tag)
9925     del_tags = frozenset(self.op.tags)
9926     cur_tags = self.target.GetTags()
9927
9928     diff_tags = del_tags - cur_tags
9929     if diff_tags:
9930       diff_names = ("'%s'" % i for i in sorted(diff_tags))
9931       raise errors.OpPrereqError("Tag(s) %s not found" %
9932                                  (utils.CommaJoin(diff_names), ),
9933                                  errors.ECODE_NOENT)
9934
9935   def Exec(self, feedback_fn):
9936     """Remove the tag from the object.
9937
9938     """
9939     for tag in self.op.tags:
9940       self.target.RemoveTag(tag)
9941     self.cfg.Update(self.target, feedback_fn)
9942
9943
9944 class LUTestDelay(NoHooksLU):
9945   """Sleep for a specified amount of time.
9946
9947   This LU sleeps on the master and/or nodes for a specified amount of
9948   time.
9949
9950   """
9951   _OP_PARAMS = [
9952     ("duration", _NoDefault, _TFloat),
9953     ("on_master", True, _TBool),
9954     ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9955     ("repeat", 0, _TPositiveInt)
9956     ]
9957   REQ_BGL = False
9958
9959   def ExpandNames(self):
9960     """Expand names and set required locks.
9961
9962     This expands the node list, if any.
9963
9964     """
9965     self.needed_locks = {}
9966     if self.op.on_nodes:
9967       # _GetWantedNodes can be used here, but is not always appropriate to use
9968       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9969       # more information.
9970       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9971       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9972
9973   def _TestDelay(self):
9974     """Do the actual sleep.
9975
9976     """
9977     if self.op.on_master:
9978       if not utils.TestDelay(self.op.duration):
9979         raise errors.OpExecError("Error during master delay test")
9980     if self.op.on_nodes:
9981       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9982       for node, node_result in result.items():
9983         node_result.Raise("Failure during rpc call to node %s" % node)
9984
9985   def Exec(self, feedback_fn):
9986     """Execute the test delay opcode, with the wanted repetitions.
9987
9988     """
9989     if self.op.repeat == 0:
9990       self._TestDelay()
9991     else:
9992       top_value = self.op.repeat - 1
9993       for i in range(self.op.repeat):
9994         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9995         self._TestDelay()
9996
9997
9998 class LUTestJobqueue(NoHooksLU):
9999   """Utility LU to test some aspects of the job queue.
10000
10001   """
10002   _OP_PARAMS = [
10003     ("notify_waitlock", False, _TBool),
10004     ("notify_exec", False, _TBool),
10005     ("log_messages", _EmptyList, _TListOf(_TString)),
10006     ("fail", False, _TBool),
10007     ]
10008   REQ_BGL = False
10009
10010   # Must be lower than default timeout for WaitForJobChange to see whether it
10011   # notices changed jobs
10012   _CLIENT_CONNECT_TIMEOUT = 20.0
10013   _CLIENT_CONFIRM_TIMEOUT = 60.0
10014
10015   @classmethod
10016   def _NotifyUsingSocket(cls, cb, errcls):
10017     """Opens a Unix socket and waits for another program to connect.
10018
10019     @type cb: callable
10020     @param cb: Callback to send socket name to client
10021     @type errcls: class
10022     @param errcls: Exception class to use for errors
10023
10024     """
10025     # Using a temporary directory as there's no easy way to create temporary
10026     # sockets without writing a custom loop around tempfile.mktemp and
10027     # socket.bind
10028     tmpdir = tempfile.mkdtemp()
10029     try:
10030       tmpsock = utils.PathJoin(tmpdir, "sock")
10031
10032       logging.debug("Creating temporary socket at %s", tmpsock)
10033       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10034       try:
10035         sock.bind(tmpsock)
10036         sock.listen(1)
10037
10038         # Send details to client
10039         cb(tmpsock)
10040
10041         # Wait for client to connect before continuing
10042         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10043         try:
10044           (conn, _) = sock.accept()
10045         except socket.error, err:
10046           raise errcls("Client didn't connect in time (%s)" % err)
10047       finally:
10048         sock.close()
10049     finally:
10050       # Remove as soon as client is connected
10051       shutil.rmtree(tmpdir)
10052
10053     # Wait for client to close
10054     try:
10055       try:
10056         # pylint: disable-msg=E1101
10057         # Instance of '_socketobject' has no ... member
10058         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10059         conn.recv(1)
10060       except socket.error, err:
10061         raise errcls("Client failed to confirm notification (%s)" % err)
10062     finally:
10063       conn.close()
10064
10065   def _SendNotification(self, test, arg, sockname):
10066     """Sends a notification to the client.
10067
10068     @type test: string
10069     @param test: Test name
10070     @param arg: Test argument (depends on test)
10071     @type sockname: string
10072     @param sockname: Socket path
10073
10074     """
10075     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10076
10077   def _Notify(self, prereq, test, arg):
10078     """Notifies the client of a test.
10079
10080     @type prereq: bool
10081     @param prereq: Whether this is a prereq-phase test
10082     @type test: string
10083     @param test: Test name
10084     @param arg: Test argument (depends on test)
10085
10086     """
10087     if prereq:
10088       errcls = errors.OpPrereqError
10089     else:
10090       errcls = errors.OpExecError
10091
10092     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10093                                                   test, arg),
10094                                    errcls)
10095
10096   def CheckArguments(self):
10097     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10098     self.expandnames_calls = 0
10099
10100   def ExpandNames(self):
10101     checkargs_calls = getattr(self, "checkargs_calls", 0)
10102     if checkargs_calls < 1:
10103       raise errors.ProgrammerError("CheckArguments was not called")
10104
10105     self.expandnames_calls += 1
10106
10107     if self.op.notify_waitlock:
10108       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10109
10110     self.LogInfo("Expanding names")
10111
10112     # Get lock on master node (just to get a lock, not for a particular reason)
10113     self.needed_locks = {
10114       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10115       }
10116
10117   def Exec(self, feedback_fn):
10118     if self.expandnames_calls < 1:
10119       raise errors.ProgrammerError("ExpandNames was not called")
10120
10121     if self.op.notify_exec:
10122       self._Notify(False, constants.JQT_EXEC, None)
10123
10124     self.LogInfo("Executing")
10125
10126     if self.op.log_messages:
10127       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10128       for idx, msg in enumerate(self.op.log_messages):
10129         self.LogInfo("Sending log message %s", idx + 1)
10130         feedback_fn(constants.JQT_MSGPREFIX + msg)
10131         # Report how many test messages have been sent
10132         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10133
10134     if self.op.fail:
10135       raise errors.OpExecError("Opcode failure was requested")
10136
10137     return True
10138
10139
10140 class IAllocator(object):
10141   """IAllocator framework.
10142
10143   An IAllocator instance has three sets of attributes:
10144     - cfg that is needed to query the cluster
10145     - input data (all members of the _KEYS class attribute are required)
10146     - four buffer attributes (in|out_data|text), that represent the
10147       input (to the external script) in text and data structure format,
10148       and the output from it, again in two formats
10149     - the result variables from the script (success, info, nodes) for
10150       easy usage
10151
10152   """
10153   # pylint: disable-msg=R0902
10154   # lots of instance attributes
10155   _ALLO_KEYS = [
10156     "name", "mem_size", "disks", "disk_template",
10157     "os", "tags", "nics", "vcpus", "hypervisor",
10158     ]
10159   _RELO_KEYS = [
10160     "name", "relocate_from",
10161     ]
10162   _EVAC_KEYS = [
10163     "evac_nodes",
10164     ]
10165
10166   def __init__(self, cfg, rpc, mode, **kwargs):
10167     self.cfg = cfg
10168     self.rpc = rpc
10169     # init buffer variables
10170     self.in_text = self.out_text = self.in_data = self.out_data = None
10171     # init all input fields so that pylint is happy
10172     self.mode = mode
10173     self.mem_size = self.disks = self.disk_template = None
10174     self.os = self.tags = self.nics = self.vcpus = None
10175     self.hypervisor = None
10176     self.relocate_from = None
10177     self.name = None
10178     self.evac_nodes = None
10179     # computed fields
10180     self.required_nodes = None
10181     # init result fields
10182     self.success = self.info = self.result = None
10183     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10184       keyset = self._ALLO_KEYS
10185       fn = self._AddNewInstance
10186     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10187       keyset = self._RELO_KEYS
10188       fn = self._AddRelocateInstance
10189     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10190       keyset = self._EVAC_KEYS
10191       fn = self._AddEvacuateNodes
10192     else:
10193       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10194                                    " IAllocator" % self.mode)
10195     for key in kwargs:
10196       if key not in keyset:
10197         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10198                                      " IAllocator" % key)
10199       setattr(self, key, kwargs[key])
10200
10201     for key in keyset:
10202       if key not in kwargs:
10203         raise errors.ProgrammerError("Missing input parameter '%s' to"
10204                                      " IAllocator" % key)
10205     self._BuildInputData(fn)
10206
10207   def _ComputeClusterData(self):
10208     """Compute the generic allocator input data.
10209
10210     This is the data that is independent of the actual operation.
10211
10212     """
10213     cfg = self.cfg
10214     cluster_info = cfg.GetClusterInfo()
10215     # cluster data
10216     data = {
10217       "version": constants.IALLOCATOR_VERSION,
10218       "cluster_name": cfg.GetClusterName(),
10219       "cluster_tags": list(cluster_info.GetTags()),
10220       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10221       # we don't have job IDs
10222       }
10223     iinfo = cfg.GetAllInstancesInfo().values()
10224     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10225
10226     # node data
10227     node_results = {}
10228     node_list = cfg.GetNodeList()
10229
10230     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10231       hypervisor_name = self.hypervisor
10232     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10233       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10234     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10235       hypervisor_name = cluster_info.enabled_hypervisors[0]
10236
10237     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10238                                         hypervisor_name)
10239     node_iinfo = \
10240       self.rpc.call_all_instances_info(node_list,
10241                                        cluster_info.enabled_hypervisors)
10242     for nname, nresult in node_data.items():
10243       # first fill in static (config-based) values
10244       ninfo = cfg.GetNodeInfo(nname)
10245       pnr = {
10246         "tags": list(ninfo.GetTags()),
10247         "primary_ip": ninfo.primary_ip,
10248         "secondary_ip": ninfo.secondary_ip,
10249         "offline": ninfo.offline,
10250         "drained": ninfo.drained,
10251         "master_candidate": ninfo.master_candidate,
10252         }
10253
10254       if not (ninfo.offline or ninfo.drained):
10255         nresult.Raise("Can't get data for node %s" % nname)
10256         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10257                                 nname)
10258         remote_info = nresult.payload
10259
10260         for attr in ['memory_total', 'memory_free', 'memory_dom0',
10261                      'vg_size', 'vg_free', 'cpu_total']:
10262           if attr not in remote_info:
10263             raise errors.OpExecError("Node '%s' didn't return attribute"
10264                                      " '%s'" % (nname, attr))
10265           if not isinstance(remote_info[attr], int):
10266             raise errors.OpExecError("Node '%s' returned invalid value"
10267                                      " for '%s': %s" %
10268                                      (nname, attr, remote_info[attr]))
10269         # compute memory used by primary instances
10270         i_p_mem = i_p_up_mem = 0
10271         for iinfo, beinfo in i_list:
10272           if iinfo.primary_node == nname:
10273             i_p_mem += beinfo[constants.BE_MEMORY]
10274             if iinfo.name not in node_iinfo[nname].payload:
10275               i_used_mem = 0
10276             else:
10277               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10278             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10279             remote_info['memory_free'] -= max(0, i_mem_diff)
10280
10281             if iinfo.admin_up:
10282               i_p_up_mem += beinfo[constants.BE_MEMORY]
10283
10284         # compute memory used by instances
10285         pnr_dyn = {
10286           "total_memory": remote_info['memory_total'],
10287           "reserved_memory": remote_info['memory_dom0'],
10288           "free_memory": remote_info['memory_free'],
10289           "total_disk": remote_info['vg_size'],
10290           "free_disk": remote_info['vg_free'],
10291           "total_cpus": remote_info['cpu_total'],
10292           "i_pri_memory": i_p_mem,
10293           "i_pri_up_memory": i_p_up_mem,
10294           }
10295         pnr.update(pnr_dyn)
10296
10297       node_results[nname] = pnr
10298     data["nodes"] = node_results
10299
10300     # instance data
10301     instance_data = {}
10302     for iinfo, beinfo in i_list:
10303       nic_data = []
10304       for nic in iinfo.nics:
10305         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10306         nic_dict = {"mac": nic.mac,
10307                     "ip": nic.ip,
10308                     "mode": filled_params[constants.NIC_MODE],
10309                     "link": filled_params[constants.NIC_LINK],
10310                    }
10311         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10312           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10313         nic_data.append(nic_dict)
10314       pir = {
10315         "tags": list(iinfo.GetTags()),
10316         "admin_up": iinfo.admin_up,
10317         "vcpus": beinfo[constants.BE_VCPUS],
10318         "memory": beinfo[constants.BE_MEMORY],
10319         "os": iinfo.os,
10320         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10321         "nics": nic_data,
10322         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10323         "disk_template": iinfo.disk_template,
10324         "hypervisor": iinfo.hypervisor,
10325         }
10326       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10327                                                  pir["disks"])
10328       instance_data[iinfo.name] = pir
10329
10330     data["instances"] = instance_data
10331
10332     self.in_data = data
10333
10334   def _AddNewInstance(self):
10335     """Add new instance data to allocator structure.
10336
10337     This in combination with _AllocatorGetClusterData will create the
10338     correct structure needed as input for the allocator.
10339
10340     The checks for the completeness of the opcode must have already been
10341     done.
10342
10343     """
10344     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10345
10346     if self.disk_template in constants.DTS_NET_MIRROR:
10347       self.required_nodes = 2
10348     else:
10349       self.required_nodes = 1
10350     request = {
10351       "name": self.name,
10352       "disk_template": self.disk_template,
10353       "tags": self.tags,
10354       "os": self.os,
10355       "vcpus": self.vcpus,
10356       "memory": self.mem_size,
10357       "disks": self.disks,
10358       "disk_space_total": disk_space,
10359       "nics": self.nics,
10360       "required_nodes": self.required_nodes,
10361       }
10362     return request
10363
10364   def _AddRelocateInstance(self):
10365     """Add relocate instance data to allocator structure.
10366
10367     This in combination with _IAllocatorGetClusterData will create the
10368     correct structure needed as input for the allocator.
10369
10370     The checks for the completeness of the opcode must have already been
10371     done.
10372
10373     """
10374     instance = self.cfg.GetInstanceInfo(self.name)
10375     if instance is None:
10376       raise errors.ProgrammerError("Unknown instance '%s' passed to"
10377                                    " IAllocator" % self.name)
10378
10379     if instance.disk_template not in constants.DTS_NET_MIRROR:
10380       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10381                                  errors.ECODE_INVAL)
10382
10383     if len(instance.secondary_nodes) != 1:
10384       raise errors.OpPrereqError("Instance has not exactly one secondary node",
10385                                  errors.ECODE_STATE)
10386
10387     self.required_nodes = 1
10388     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10389     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10390
10391     request = {
10392       "name": self.name,
10393       "disk_space_total": disk_space,
10394       "required_nodes": self.required_nodes,
10395       "relocate_from": self.relocate_from,
10396       }
10397     return request
10398
10399   def _AddEvacuateNodes(self):
10400     """Add evacuate nodes data to allocator structure.
10401
10402     """
10403     request = {
10404       "evac_nodes": self.evac_nodes
10405       }
10406     return request
10407
10408   def _BuildInputData(self, fn):
10409     """Build input data structures.
10410
10411     """
10412     self._ComputeClusterData()
10413
10414     request = fn()
10415     request["type"] = self.mode
10416     self.in_data["request"] = request
10417
10418     self.in_text = serializer.Dump(self.in_data)
10419
10420   def Run(self, name, validate=True, call_fn=None):
10421     """Run an instance allocator and return the results.
10422
10423     """
10424     if call_fn is None:
10425       call_fn = self.rpc.call_iallocator_runner
10426
10427     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10428     result.Raise("Failure while running the iallocator script")
10429
10430     self.out_text = result.payload
10431     if validate:
10432       self._ValidateResult()
10433
10434   def _ValidateResult(self):
10435     """Process the allocator results.
10436
10437     This will process and if successful save the result in
10438     self.out_data and the other parameters.
10439
10440     """
10441     try:
10442       rdict = serializer.Load(self.out_text)
10443     except Exception, err:
10444       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10445
10446     if not isinstance(rdict, dict):
10447       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10448
10449     # TODO: remove backwards compatiblity in later versions
10450     if "nodes" in rdict and "result" not in rdict:
10451       rdict["result"] = rdict["nodes"]
10452       del rdict["nodes"]
10453
10454     for key in "success", "info", "result":
10455       if key not in rdict:
10456         raise errors.OpExecError("Can't parse iallocator results:"
10457                                  " missing key '%s'" % key)
10458       setattr(self, key, rdict[key])
10459
10460     if not isinstance(rdict["result"], list):
10461       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10462                                " is not a list")
10463     self.out_data = rdict
10464
10465
10466 class LUTestAllocator(NoHooksLU):
10467   """Run allocator tests.
10468
10469   This LU runs the allocator tests
10470
10471   """
10472   _OP_PARAMS = [
10473     ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10474     ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10475     ("name", _NoDefault, _TNonEmptyString),
10476     ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10477       _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10478                _TOr(_TNone, _TNonEmptyString))))),
10479     ("disks", _NoDefault, _TOr(_TNone, _TList)),
10480     ("hypervisor", None, _TMaybeString),
10481     ("allocator", None, _TMaybeString),
10482     ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10483     ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10484     ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10485     ("os", None, _TMaybeString),
10486     ("disk_template", None, _TMaybeString),
10487     ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10488     ]
10489
10490   def CheckPrereq(self):
10491     """Check prerequisites.
10492
10493     This checks the opcode parameters depending on the director and mode test.
10494
10495     """
10496     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10497       for attr in ["mem_size", "disks", "disk_template",
10498                    "os", "tags", "nics", "vcpus"]:
10499         if not hasattr(self.op, attr):
10500           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10501                                      attr, errors.ECODE_INVAL)
10502       iname = self.cfg.ExpandInstanceName(self.op.name)
10503       if iname is not None:
10504         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10505                                    iname, errors.ECODE_EXISTS)
10506       if not isinstance(self.op.nics, list):
10507         raise errors.OpPrereqError("Invalid parameter 'nics'",
10508                                    errors.ECODE_INVAL)
10509       if not isinstance(self.op.disks, list):
10510         raise errors.OpPrereqError("Invalid parameter 'disks'",
10511                                    errors.ECODE_INVAL)
10512       for row in self.op.disks:
10513         if (not isinstance(row, dict) or
10514             "size" not in row or
10515             not isinstance(row["size"], int) or
10516             "mode" not in row or
10517             row["mode"] not in ['r', 'w']):
10518           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10519                                      " parameter", errors.ECODE_INVAL)
10520       if self.op.hypervisor is None:
10521         self.op.hypervisor = self.cfg.GetHypervisorType()
10522     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10523       fname = _ExpandInstanceName(self.cfg, self.op.name)
10524       self.op.name = fname
10525       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10526     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10527       if not hasattr(self.op, "evac_nodes"):
10528         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10529                                    " opcode input", errors.ECODE_INVAL)
10530     else:
10531       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10532                                  self.op.mode, errors.ECODE_INVAL)
10533
10534     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10535       if self.op.allocator is None:
10536         raise errors.OpPrereqError("Missing allocator name",
10537                                    errors.ECODE_INVAL)
10538     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10539       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10540                                  self.op.direction, errors.ECODE_INVAL)
10541
10542   def Exec(self, feedback_fn):
10543     """Run the allocator test.
10544
10545     """
10546     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10547       ial = IAllocator(self.cfg, self.rpc,
10548                        mode=self.op.mode,
10549                        name=self.op.name,
10550                        mem_size=self.op.mem_size,
10551                        disks=self.op.disks,
10552                        disk_template=self.op.disk_template,
10553                        os=self.op.os,
10554                        tags=self.op.tags,
10555                        nics=self.op.nics,
10556                        vcpus=self.op.vcpus,
10557                        hypervisor=self.op.hypervisor,
10558                        )
10559     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10560       ial = IAllocator(self.cfg, self.rpc,
10561                        mode=self.op.mode,
10562                        name=self.op.name,
10563                        relocate_from=list(self.relocate_from),
10564                        )
10565     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10566       ial = IAllocator(self.cfg, self.rpc,
10567                        mode=self.op.mode,
10568                        evac_nodes=self.op.evac_nodes)
10569     else:
10570       raise errors.ProgrammerError("Uncatched mode %s in"
10571                                    " LUTestAllocator.Exec", self.op.mode)
10572
10573     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10574       result = ial.in_text
10575     else:
10576       ial.Run(self.op.allocator, validate=False)
10577       result = ial.out_text
10578     return result