code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42
  43 from ganeti import ssh
  44 from ganeti import utils
  45 from ganeti import errors
  46 from ganeti import hypervisor
  47 from ganeti import locking
  48 from ganeti import constants
  49 from ganeti import objects
  50 from ganeti import serializer
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56
  57 import ganeti.masterd.instance # pylint: disable-msg=W0611
  58
  59
  60 # Modifiable default values; need to define these here before the
  61 # actual LUs
  62
  63 def _EmptyList():
  64   """Returns an empty list.
  65
  66   """
  67   return []
  68
  69
  70 def _EmptyDict():
  71   """Returns an empty dict.
  72
  73   """
  74   return {}
  75
  76
  77 #: The without-default default value
  78 _NoDefault = object()
  79
  80
  81 #: The no-type (value to complex to check it in the type system)
  82 _NoType = object()
  83
  84
  85 # Some basic types
  86 def _TNotNone(val):
  87   """Checks if the given value is not None.
  88
  89   """
  90   return val is not None
  91
  92
  93 def _TNone(val):
  94   """Checks if the given value is None.
  95
  96   """
  97   return val is None
  98
  99
 100 def _TBool(val):
 101   """Checks if the given value is a boolean.
 102
 103   """
 104   return isinstance(val, bool)
 105
 106
 107 def _TInt(val):
 108   """Checks if the given value is an integer.
 109
 110   """
 111   return isinstance(val, int)
 112
 113
 114 def _TFloat(val):
 115   """Checks if the given value is a float.
 116
 117   """
 118   return isinstance(val, float)
 119
 120
 121 def _TString(val):
 122   """Checks if the given value is a string.
 123
 124   """
 125   return isinstance(val, basestring)
 126
 127
 128 def _TTrue(val):
 129   """Checks if a given value evaluates to a boolean True value.
 130
 131   """
 132   return bool(val)
 133
 134
 135 def _TElemOf(target_list):
 136   """Builds a function that checks if a given value is a member of a list.
 137
 138   """
 139   return lambda val: val in target_list
 140
 141
 142 # Container types
 143 def _TList(val):
 144   """Checks if the given value is a list.
 145
 146   """
 147   return isinstance(val, list)
 148
 149
 150 def _TDict(val):
 151   """Checks if the given value is a dictionary.
 152
 153   """
 154   return isinstance(val, dict)
 155
 156
 157 def _TIsLength(size):
 158   """Check is the given container is of the given size.
 159
 160   """
 161   return lambda container: len(container) == size
 162
 163
 164 # Combinator types
 165 def _TAnd(*args):
 166   """Combine multiple functions using an AND operation.
 167
 168   """
 169   def fn(val):
 170     return compat.all(t(val) for t in args)
 171   return fn
 172
 173
 174 def _TOr(*args):
 175   """Combine multiple functions using an AND operation.
 176
 177   """
 178   def fn(val):
 179     return compat.any(t(val) for t in args)
 180   return fn
 181
 182
 183 def _TMap(fn, test):
 184   """Checks that a modified version of the argument passes the given test.
 185
 186   """
 187   return lambda val: test(fn(val))
 188
 189
 190 # Type aliases
 191
 192 #: a non-empty string
 193 _TNonEmptyString = _TAnd(_TString, _TTrue)
 194
 195
 196 #: a maybe non-empty string
 197 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
 198
 199
 200 #: a maybe boolean (bool or none)
 201 _TMaybeBool = _TOr(_TBool, _TNone)
 202
 203
 204 #: a positive integer
 205 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
 206
 207 #: a strictly positive integer
 208 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
 209
 210
 211 def _TListOf(my_type):
 212   """Checks if a given value is a list with all elements of the same type.
 213
 214   """
 215   return _TAnd(_TList,
 216                lambda lst: compat.all(my_type(v) for v in lst))
 217
 218
 219 def _TDictOf(key_type, val_type):
 220   """Checks a dict type for the type of its key/values.
 221
 222   """
 223   return _TAnd(_TDict,
 224                lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
 225                                 and compat.all(val_type(v)
 226                                                for v in my_dict.values())))
 227
 228
 229 # Common opcode attributes
 230
 231 #: output fields for a query operation
 232 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
 233
 234
 235 #: the shutdown timeout
 236 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
 237                      _TPositiveInt)
 238
 239 #: the force parameter
 240 _PForce = ("force", False, _TBool)
 241
 242 #: a required instance name (for single-instance LUs)
 243 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
 244
 245
 246 #: a required node name (for single-node LUs)
 247 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
 248
 249 #: the migration type (live/non-live)
 250 _PMigrationMode = ("mode", None, _TOr(_TNone,
 251                                       _TElemOf(constants.HT_MIGRATION_MODES)))
 252
 253 #: the obsolete 'live' mode (boolean)
 254 _PMigrationLive = ("live", None, _TMaybeBool)
 255
 256
 257 # End types
 258 class LogicalUnit(object):
 259   """Logical Unit base class.
 260
 261   Subclasses must follow these rules:
 262     - implement ExpandNames
 263     - implement CheckPrereq (except when tasklets are used)
 264     - implement Exec (except when tasklets are used)
 265     - implement BuildHooksEnv
 266     - redefine HPATH and HTYPE
 267     - optionally redefine their run requirements:
 268         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 269
 270   Note that all commands require root permissions.
 271
 272   @ivar dry_run_result: the value (if any) that will be returned to the caller
 273       in dry-run mode (signalled by opcode dry_run parameter)
 274   @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
 275       they should get if not already defined, and types they must match
 276
 277   """
 278   HPATH = None
 279   HTYPE = None
 280   _OP_PARAMS = []
 281   REQ_BGL = True
 282
 283   def __init__(self, processor, op, context, rpc):
 284     """Constructor for LogicalUnit.
 285
 286     This needs to be overridden in derived classes in order to check op
 287     validity.
 288
 289     """
 290     self.proc = processor
 291     self.op = op
 292     self.cfg = context.cfg
 293     self.context = context
 294     self.rpc = rpc
 295     # Dicts used to declare locking needs to mcpu
 296     self.needed_locks = None
 297     self.acquired_locks = {}
 298     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 299     self.add_locks = {}
 300     self.remove_locks = {}
 301     # Used to force good behavior when calling helper functions
 302     self.recalculate_locks = {}
 303     self.__ssh = None
 304     # logging
 305     self.Log = processor.Log # pylint: disable-msg=C0103
 306     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 307     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 308     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 309     # support for dry-run
 310     self.dry_run_result = None
 311     # support for generic debug attribute
 312     if (not hasattr(self.op, "debug_level") or
 313         not isinstance(self.op.debug_level, int)):
 314       self.op.debug_level = 0
 315
 316     # Tasklets
 317     self.tasklets = None
 318
 319     # The new kind-of-type-system
 320     op_id = self.op.OP_ID
 321     for attr_name, aval, test in self._OP_PARAMS:
 322       if not hasattr(op, attr_name):
 323         if aval == _NoDefault:
 324           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 325                                      (op_id, attr_name), errors.ECODE_INVAL)
 326         else:
 327           if callable(aval):
 328             dval = aval()
 329           else:
 330             dval = aval
 331           setattr(self.op, attr_name, dval)
 332       attr_val = getattr(op, attr_name)
 333       if test == _NoType:
 334         # no tests here
 335         continue
 336       if not callable(test):
 337         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 338                                      " given type is not a proper type (%s)" %
 339                                      (op_id, attr_name, test))
 340       if not test(attr_val):
 341         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 342                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 343         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 344                                    (op_id, attr_name), errors.ECODE_INVAL)
 345
 346     self.CheckArguments()
 347
 348   def __GetSSH(self):
 349     """Returns the SshRunner object
 350
 351     """
 352     if not self.__ssh:
 353       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 354     return self.__ssh
 355
 356   ssh = property(fget=__GetSSH)
 357
 358   def CheckArguments(self):
 359     """Check syntactic validity for the opcode arguments.
 360
 361     This method is for doing a simple syntactic check and ensure
 362     validity of opcode parameters, without any cluster-related
 363     checks. While the same can be accomplished in ExpandNames and/or
 364     CheckPrereq, doing these separate is better because:
 365
 366       - ExpandNames is left as as purely a lock-related function
 367       - CheckPrereq is run after we have acquired locks (and possible
 368         waited for them)
 369
 370     The function is allowed to change the self.op attribute so that
 371     later methods can no longer worry about missing parameters.
 372
 373     """
 374     pass
 375
 376   def ExpandNames(self):
 377     """Expand names for this LU.
 378
 379     This method is called before starting to execute the opcode, and it should
 380     update all the parameters of the opcode to their canonical form (e.g. a
 381     short node name must be fully expanded after this method has successfully
 382     completed). This way locking, hooks, logging, ecc. can work correctly.
 383
 384     LUs which implement this method must also populate the self.needed_locks
 385     member, as a dict with lock levels as keys, and a list of needed lock names
 386     as values. Rules:
 387
 388       - use an empty dict if you don't need any lock
 389       - if you don't need any lock at a particular level omit that level
 390       - don't put anything for the BGL level
 391       - if you want all locks at a level use locking.ALL_SET as a value
 392
 393     If you need to share locks (rather than acquire them exclusively) at one
 394     level you can modify self.share_locks, setting a true value (usually 1) for
 395     that level. By default locks are not shared.
 396
 397     This function can also define a list of tasklets, which then will be
 398     executed in order instead of the usual LU-level CheckPrereq and Exec
 399     functions, if those are not defined by the LU.
 400
 401     Examples::
 402
 403       # Acquire all nodes and one instance
 404       self.needed_locks = {
 405         locking.LEVEL_NODE: locking.ALL_SET,
 406         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 407       }
 408       # Acquire just two nodes
 409       self.needed_locks = {
 410         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 411       }
 412       # Acquire no locks
 413       self.needed_locks = {} # No, you can't leave it to the default value None
 414
 415     """
 416     # The implementation of this method is mandatory only if the new LU is
 417     # concurrent, so that old LUs don't need to be changed all at the same
 418     # time.
 419     if self.REQ_BGL:
 420       self.needed_locks = {} # Exclusive LUs don't need locks.
 421     else:
 422       raise NotImplementedError
 423
 424   def DeclareLocks(self, level):
 425     """Declare LU locking needs for a level
 426
 427     While most LUs can just declare their locking needs at ExpandNames time,
 428     sometimes there's the need to calculate some locks after having acquired
 429     the ones before. This function is called just before acquiring locks at a
 430     particular level, but after acquiring the ones at lower levels, and permits
 431     such calculations. It can be used to modify self.needed_locks, and by
 432     default it does nothing.
 433
 434     This function is only called if you have something already set in
 435     self.needed_locks for the level.
 436
 437     @param level: Locking level which is going to be locked
 438     @type level: member of ganeti.locking.LEVELS
 439
 440     """
 441
 442   def CheckPrereq(self):
 443     """Check prerequisites for this LU.
 444
 445     This method should check that the prerequisites for the execution
 446     of this LU are fulfilled. It can do internode communication, but
 447     it should be idempotent - no cluster or system changes are
 448     allowed.
 449
 450     The method should raise errors.OpPrereqError in case something is
 451     not fulfilled. Its return value is ignored.
 452
 453     This method should also update all the parameters of the opcode to
 454     their canonical form if it hasn't been done by ExpandNames before.
 455
 456     """
 457     if self.tasklets is not None:
 458       for (idx, tl) in enumerate(self.tasklets):
 459         logging.debug("Checking prerequisites for tasklet %s/%s",
 460                       idx + 1, len(self.tasklets))
 461         tl.CheckPrereq()
 462     else:
 463       pass
 464
 465   def Exec(self, feedback_fn):
 466     """Execute the LU.
 467
 468     This method should implement the actual work. It should raise
 469     errors.OpExecError for failures that are somewhat dealt with in
 470     code, or expected.
 471
 472     """
 473     if self.tasklets is not None:
 474       for (idx, tl) in enumerate(self.tasklets):
 475         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 476         tl.Exec(feedback_fn)
 477     else:
 478       raise NotImplementedError
 479
 480   def BuildHooksEnv(self):
 481     """Build hooks environment for this LU.
 482
 483     This method should return a three-node tuple consisting of: a dict
 484     containing the environment that will be used for running the
 485     specific hook for this LU, a list of node names on which the hook
 486     should run before the execution, and a list of node names on which
 487     the hook should run after the execution.
 488
 489     The keys of the dict must not have 'GANETI_' prefixed as this will
 490     be handled in the hooks runner. Also note additional keys will be
 491     added by the hooks runner. If the LU doesn't define any
 492     environment, an empty dict (and not None) should be returned.
 493
 494     No nodes should be returned as an empty list (and not None).
 495
 496     Note that if the HPATH for a LU class is None, this function will
 497     not be called.
 498
 499     """
 500     raise NotImplementedError
 501
 502   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 503     """Notify the LU about the results of its hooks.
 504
 505     This method is called every time a hooks phase is executed, and notifies
 506     the Logical Unit about the hooks' result. The LU can then use it to alter
 507     its result based on the hooks.  By default the method does nothing and the
 508     previous result is passed back unchanged but any LU can define it if it
 509     wants to use the local cluster hook-scripts somehow.
 510
 511     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 512         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 513     @param hook_results: the results of the multi-node hooks rpc call
 514     @param feedback_fn: function used send feedback back to the caller
 515     @param lu_result: the previous Exec result this LU had, or None
 516         in the PRE phase
 517     @return: the new Exec result, based on the previous result
 518         and hook results
 519
 520     """
 521     # API must be kept, thus we ignore the unused argument and could
 522     # be a function warnings
 523     # pylint: disable-msg=W0613,R0201
 524     return lu_result
 525
 526   def _ExpandAndLockInstance(self):
 527     """Helper function to expand and lock an instance.
 528
 529     Many LUs that work on an instance take its name in self.op.instance_name
 530     and need to expand it and then declare the expanded name for locking. This
 531     function does it, and then updates self.op.instance_name to the expanded
 532     name. It also initializes needed_locks as a dict, if this hasn't been done
 533     before.
 534
 535     """
 536     if self.needed_locks is None:
 537       self.needed_locks = {}
 538     else:
 539       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 540         "_ExpandAndLockInstance called with instance-level locks set"
 541     self.op.instance_name = _ExpandInstanceName(self.cfg,
 542                                                 self.op.instance_name)
 543     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 544
 545   def _LockInstancesNodes(self, primary_only=False):
 546     """Helper function to declare instances' nodes for locking.
 547
 548     This function should be called after locking one or more instances to lock
 549     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 550     with all primary or secondary nodes for instances already locked and
 551     present in self.needed_locks[locking.LEVEL_INSTANCE].
 552
 553     It should be called from DeclareLocks, and for safety only works if
 554     self.recalculate_locks[locking.LEVEL_NODE] is set.
 555
 556     In the future it may grow parameters to just lock some instance's nodes, or
 557     to just lock primaries or secondary nodes, if needed.
 558
 559     If should be called in DeclareLocks in a way similar to::
 560
 561       if level == locking.LEVEL_NODE:
 562         self._LockInstancesNodes()
 563
 564     @type primary_only: boolean
 565     @param primary_only: only lock primary nodes of locked instances
 566
 567     """
 568     assert locking.LEVEL_NODE in self.recalculate_locks, \
 569       "_LockInstancesNodes helper function called with no nodes to recalculate"
 570
 571     # TODO: check if we're really been called with the instance locks held
 572
 573     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 574     # future we might want to have different behaviors depending on the value
 575     # of self.recalculate_locks[locking.LEVEL_NODE]
 576     wanted_nodes = []
 577     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 578       instance = self.context.cfg.GetInstanceInfo(instance_name)
 579       wanted_nodes.append(instance.primary_node)
 580       if not primary_only:
 581         wanted_nodes.extend(instance.secondary_nodes)
 582
 583     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 584       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 585     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 586       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 587
 588     del self.recalculate_locks[locking.LEVEL_NODE]
 589
 590
 591 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 592   """Simple LU which runs no hooks.
 593
 594   This LU is intended as a parent for other LogicalUnits which will
 595   run no hooks, in order to reduce duplicate code.
 596
 597   """
 598   HPATH = None
 599   HTYPE = None
 600
 601   def BuildHooksEnv(self):
 602     """Empty BuildHooksEnv for NoHooksLu.
 603
 604     This just raises an error.
 605
 606     """
 607     assert False, "BuildHooksEnv called for NoHooksLUs"
 608
 609
 610 class Tasklet:
 611   """Tasklet base class.
 612
 613   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 614   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 615   tasklets know nothing about locks.
 616
 617   Subclasses must follow these rules:
 618     - Implement CheckPrereq
 619     - Implement Exec
 620
 621   """
 622   def __init__(self, lu):
 623     self.lu = lu
 624
 625     # Shortcuts
 626     self.cfg = lu.cfg
 627     self.rpc = lu.rpc
 628
 629   def CheckPrereq(self):
 630     """Check prerequisites for this tasklets.
 631
 632     This method should check whether the prerequisites for the execution of
 633     this tasklet are fulfilled. It can do internode communication, but it
 634     should be idempotent - no cluster or system changes are allowed.
 635
 636     The method should raise errors.OpPrereqError in case something is not
 637     fulfilled. Its return value is ignored.
 638
 639     This method should also update all parameters to their canonical form if it
 640     hasn't been done before.
 641
 642     """
 643     pass
 644
 645   def Exec(self, feedback_fn):
 646     """Execute the tasklet.
 647
 648     This method should implement the actual work. It should raise
 649     errors.OpExecError for failures that are somewhat dealt with in code, or
 650     expected.
 651
 652     """
 653     raise NotImplementedError
 654
 655
 656 def _GetWantedNodes(lu, nodes):
 657   """Returns list of checked and expanded node names.
 658
 659   @type lu: L{LogicalUnit}
 660   @param lu: the logical unit on whose behalf we execute
 661   @type nodes: list
 662   @param nodes: list of node names or None for all nodes
 663   @rtype: list
 664   @return: the list of nodes, sorted
 665   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 666
 667   """
 668   if not nodes:
 669     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 670       " non-empty list of nodes whose name is to be expanded.")
 671
 672   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 673   return utils.NiceSort(wanted)
 674
 675
 676 def _GetWantedInstances(lu, instances):
 677   """Returns list of checked and expanded instance names.
 678
 679   @type lu: L{LogicalUnit}
 680   @param lu: the logical unit on whose behalf we execute
 681   @type instances: list
 682   @param instances: list of instance names or None for all instances
 683   @rtype: list
 684   @return: the list of instances, sorted
 685   @raise errors.OpPrereqError: if the instances parameter is wrong type
 686   @raise errors.OpPrereqError: if any of the passed instances is not found
 687
 688   """
 689   if instances:
 690     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 691   else:
 692     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 693   return wanted
 694
 695
 696 def _GetUpdatedParams(old_params, update_dict,
 697                       use_default=True, use_none=False):
 698   """Return the new version of a parameter dictionary.
 699
 700   @type old_params: dict
 701   @param old_params: old parameters
 702   @type update_dict: dict
 703   @param update_dict: dict containing new parameter values, or
 704       constants.VALUE_DEFAULT to reset the parameter to its default
 705       value
 706   @param use_default: boolean
 707   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 708       values as 'to be deleted' values
 709   @param use_none: boolean
 710   @type use_none: whether to recognise C{None} values as 'to be
 711       deleted' values
 712   @rtype: dict
 713   @return: the new parameter dictionary
 714
 715   """
 716   params_copy = copy.deepcopy(old_params)
 717   for key, val in update_dict.iteritems():
 718     if ((use_default and val == constants.VALUE_DEFAULT) or
 719         (use_none and val is None)):
 720       try:
 721         del params_copy[key]
 722       except KeyError:
 723         pass
 724     else:
 725       params_copy[key] = val
 726   return params_copy
 727
 728
 729 def _CheckOutputFields(static, dynamic, selected):
 730   """Checks whether all selected fields are valid.
 731
 732   @type static: L{utils.FieldSet}
 733   @param static: static fields set
 734   @type dynamic: L{utils.FieldSet}
 735   @param dynamic: dynamic fields set
 736
 737   """
 738   f = utils.FieldSet()
 739   f.Extend(static)
 740   f.Extend(dynamic)
 741
 742   delta = f.NonMatching(selected)
 743   if delta:
 744     raise errors.OpPrereqError("Unknown output fields selected: %s"
 745                                % ",".join(delta), errors.ECODE_INVAL)
 746
 747
 748 def _CheckGlobalHvParams(params):
 749   """Validates that given hypervisor params are not global ones.
 750
 751   This will ensure that instances don't get customised versions of
 752   global params.
 753
 754   """
 755   used_globals = constants.HVC_GLOBALS.intersection(params)
 756   if used_globals:
 757     msg = ("The following hypervisor parameters are global and cannot"
 758            " be customized at instance level, please modify them at"
 759            " cluster level: %s" % utils.CommaJoin(used_globals))
 760     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 761
 762
 763 def _CheckNodeOnline(lu, node):
 764   """Ensure that a given node is online.
 765
 766   @param lu: the LU on behalf of which we make the check
 767   @param node: the node to check
 768   @raise errors.OpPrereqError: if the node is offline
 769
 770   """
 771   if lu.cfg.GetNodeInfo(node).offline:
 772     raise errors.OpPrereqError("Can't use offline node %s" % node,
 773                                errors.ECODE_INVAL)
 774
 775
 776 def _CheckNodeNotDrained(lu, node):
 777   """Ensure that a given node is not drained.
 778
 779   @param lu: the LU on behalf of which we make the check
 780   @param node: the node to check
 781   @raise errors.OpPrereqError: if the node is drained
 782
 783   """
 784   if lu.cfg.GetNodeInfo(node).drained:
 785     raise errors.OpPrereqError("Can't use drained node %s" % node,
 786                                errors.ECODE_INVAL)
 787
 788
 789 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 790   """Ensure that a node supports a given OS.
 791
 792   @param lu: the LU on behalf of which we make the check
 793   @param node: the node to check
 794   @param os_name: the OS to query about
 795   @param force_variant: whether to ignore variant errors
 796   @raise errors.OpPrereqError: if the node is not supporting the OS
 797
 798   """
 799   result = lu.rpc.call_os_get(node, os_name)
 800   result.Raise("OS '%s' not in supported OS list for node %s" %
 801                (os_name, node),
 802                prereq=True, ecode=errors.ECODE_INVAL)
 803   if not force_variant:
 804     _CheckOSVariant(result.payload, os_name)
 805
 806
 807 def _RequireFileStorage():
 808   """Checks that file storage is enabled.
 809
 810   @raise errors.OpPrereqError: when file storage is disabled
 811
 812   """
 813   if not constants.ENABLE_FILE_STORAGE:
 814     raise errors.OpPrereqError("File storage disabled at configure time",
 815                                errors.ECODE_INVAL)
 816
 817
 818 def _CheckDiskTemplate(template):
 819   """Ensure a given disk template is valid.
 820
 821   """
 822   if template not in constants.DISK_TEMPLATES:
 823     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 824            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 825     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 826   if template == constants.DT_FILE:
 827     _RequireFileStorage()
 828   return True
 829
 830
 831 def _CheckStorageType(storage_type):
 832   """Ensure a given storage type is valid.
 833
 834   """
 835   if storage_type not in constants.VALID_STORAGE_TYPES:
 836     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 837                                errors.ECODE_INVAL)
 838   if storage_type == constants.ST_FILE:
 839     _RequireFileStorage()
 840   return True
 841
 842
 843 def _GetClusterDomainSecret():
 844   """Reads the cluster domain secret.
 845
 846   """
 847   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 848                                strict=True)
 849
 850
 851 def _CheckInstanceDown(lu, instance, reason):
 852   """Ensure that an instance is not running."""
 853   if instance.admin_up:
 854     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 855                                (instance.name, reason), errors.ECODE_STATE)
 856
 857   pnode = instance.primary_node
 858   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 859   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 860               prereq=True, ecode=errors.ECODE_ENVIRON)
 861
 862   if instance.name in ins_l.payload:
 863     raise errors.OpPrereqError("Instance %s is running, %s" %
 864                                (instance.name, reason), errors.ECODE_STATE)
 865
 866
 867 def _ExpandItemName(fn, name, kind):
 868   """Expand an item name.
 869
 870   @param fn: the function to use for expansion
 871   @param name: requested item name
 872   @param kind: text description ('Node' or 'Instance')
 873   @return: the resolved (full) name
 874   @raise errors.OpPrereqError: if the item is not found
 875
 876   """
 877   full_name = fn(name)
 878   if full_name is None:
 879     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 880                                errors.ECODE_NOENT)
 881   return full_name
 882
 883
 884 def _ExpandNodeName(cfg, name):
 885   """Wrapper over L{_ExpandItemName} for nodes."""
 886   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 887
 888
 889 def _ExpandInstanceName(cfg, name):
 890   """Wrapper over L{_ExpandItemName} for instance."""
 891   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 892
 893
 894 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 895                           memory, vcpus, nics, disk_template, disks,
 896                           bep, hvp, hypervisor_name):
 897   """Builds instance related env variables for hooks
 898
 899   This builds the hook environment from individual variables.
 900
 901   @type name: string
 902   @param name: the name of the instance
 903   @type primary_node: string
 904   @param primary_node: the name of the instance's primary node
 905   @type secondary_nodes: list
 906   @param secondary_nodes: list of secondary nodes as strings
 907   @type os_type: string
 908   @param os_type: the name of the instance's OS
 909   @type status: boolean
 910   @param status: the should_run status of the instance
 911   @type memory: string
 912   @param memory: the memory size of the instance
 913   @type vcpus: string
 914   @param vcpus: the count of VCPUs the instance has
 915   @type nics: list
 916   @param nics: list of tuples (ip, mac, mode, link) representing
 917       the NICs the instance has
 918   @type disk_template: string
 919   @param disk_template: the disk template of the instance
 920   @type disks: list
 921   @param disks: the list of (size, mode) pairs
 922   @type bep: dict
 923   @param bep: the backend parameters for the instance
 924   @type hvp: dict
 925   @param hvp: the hypervisor parameters for the instance
 926   @type hypervisor_name: string
 927   @param hypervisor_name: the hypervisor for the instance
 928   @rtype: dict
 929   @return: the hook environment for this instance
 930
 931   """
 932   if status:
 933     str_status = "up"
 934   else:
 935     str_status = "down"
 936   env = {
 937     "OP_TARGET": name,
 938     "INSTANCE_NAME": name,
 939     "INSTANCE_PRIMARY": primary_node,
 940     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 941     "INSTANCE_OS_TYPE": os_type,
 942     "INSTANCE_STATUS": str_status,
 943     "INSTANCE_MEMORY": memory,
 944     "INSTANCE_VCPUS": vcpus,
 945     "INSTANCE_DISK_TEMPLATE": disk_template,
 946     "INSTANCE_HYPERVISOR": hypervisor_name,
 947   }
 948
 949   if nics:
 950     nic_count = len(nics)
 951     for idx, (ip, mac, mode, link) in enumerate(nics):
 952       if ip is None:
 953         ip = ""
 954       env["INSTANCE_NIC%d_IP" % idx] = ip
 955       env["INSTANCE_NIC%d_MAC" % idx] = mac
 956       env["INSTANCE_NIC%d_MODE" % idx] = mode
 957       env["INSTANCE_NIC%d_LINK" % idx] = link
 958       if mode == constants.NIC_MODE_BRIDGED:
 959         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 960   else:
 961     nic_count = 0
 962
 963   env["INSTANCE_NIC_COUNT"] = nic_count
 964
 965   if disks:
 966     disk_count = len(disks)
 967     for idx, (size, mode) in enumerate(disks):
 968       env["INSTANCE_DISK%d_SIZE" % idx] = size
 969       env["INSTANCE_DISK%d_MODE" % idx] = mode
 970   else:
 971     disk_count = 0
 972
 973   env["INSTANCE_DISK_COUNT"] = disk_count
 974
 975   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 976     for key, value in source.items():
 977       env["INSTANCE_%s_%s" % (kind, key)] = value
 978
 979   return env
 980
 981
 982 def _NICListToTuple(lu, nics):
 983   """Build a list of nic information tuples.
 984
 985   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 986   value in LUQueryInstanceData.
 987
 988   @type lu:  L{LogicalUnit}
 989   @param lu: the logical unit on whose behalf we execute
 990   @type nics: list of L{objects.NIC}
 991   @param nics: list of nics to convert to hooks tuples
 992
 993   """
 994   hooks_nics = []
 995   cluster = lu.cfg.GetClusterInfo()
 996   for nic in nics:
 997     ip = nic.ip
 998     mac = nic.mac
 999     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000     mode = filled_params[constants.NIC_MODE]
1001     link = filled_params[constants.NIC_LINK]
1002     hooks_nics.append((ip, mac, mode, link))
1003   return hooks_nics
1004
1005
1006 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007   """Builds instance related env variables for hooks from an object.
1008
1009   @type lu: L{LogicalUnit}
1010   @param lu: the logical unit on whose behalf we execute
1011   @type instance: L{objects.Instance}
1012   @param instance: the instance for which we should build the
1013       environment
1014   @type override: dict
1015   @param override: dictionary with key/values that will override
1016       our values
1017   @rtype: dict
1018   @return: the hook environment dictionary
1019
1020   """
1021   cluster = lu.cfg.GetClusterInfo()
1022   bep = cluster.FillBE(instance)
1023   hvp = cluster.FillHV(instance)
1024   args = {
1025     'name': instance.name,
1026     'primary_node': instance.primary_node,
1027     'secondary_nodes': instance.secondary_nodes,
1028     'os_type': instance.os,
1029     'status': instance.admin_up,
1030     'memory': bep[constants.BE_MEMORY],
1031     'vcpus': bep[constants.BE_VCPUS],
1032     'nics': _NICListToTuple(lu, instance.nics),
1033     'disk_template': instance.disk_template,
1034     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1035     'bep': bep,
1036     'hvp': hvp,
1037     'hypervisor_name': instance.hypervisor,
1038   }
1039   if override:
1040     args.update(override)
1041   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1042
1043
1044 def _AdjustCandidatePool(lu, exceptions):
1045   """Adjust the candidate pool after node operations.
1046
1047   """
1048   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1049   if mod_list:
1050     lu.LogInfo("Promoted nodes to master candidate role: %s",
1051                utils.CommaJoin(node.name for node in mod_list))
1052     for name in mod_list:
1053       lu.context.ReaddNode(name)
1054   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1055   if mc_now > mc_max:
1056     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1057                (mc_now, mc_max))
1058
1059
1060 def _DecideSelfPromotion(lu, exceptions=None):
1061   """Decide whether I should promote myself as a master candidate.
1062
1063   """
1064   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1065   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1066   # the new node will increase mc_max with one, so:
1067   mc_should = min(mc_should + 1, cp_size)
1068   return mc_now < mc_should
1069
1070
1071 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072   """Check that the brigdes needed by a list of nics exist.
1073
1074   """
1075   cluster = lu.cfg.GetClusterInfo()
1076   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1077   brlist = [params[constants.NIC_LINK] for params in paramslist
1078             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1079   if brlist:
1080     result = lu.rpc.call_bridges_exist(target_node, brlist)
1081     result.Raise("Error checking bridges on destination node '%s'" %
1082                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1083
1084
1085 def _CheckInstanceBridgesExist(lu, instance, node=None):
1086   """Check that the brigdes needed by an instance exist.
1087
1088   """
1089   if node is None:
1090     node = instance.primary_node
1091   _CheckNicsBridgesExist(lu, instance.nics, node)
1092
1093
1094 def _CheckOSVariant(os_obj, name):
1095   """Check whether an OS name conforms to the os variants specification.
1096
1097   @type os_obj: L{objects.OS}
1098   @param os_obj: OS object to check
1099   @type name: string
1100   @param name: OS name passed by the user, to check for validity
1101
1102   """
1103   if not os_obj.supported_variants:
1104     return
1105   variant = objects.OS.GetVariant(name)
1106   if not variant:
1107     raise errors.OpPrereqError("OS name must include a variant",
1108                                errors.ECODE_INVAL)
1109
1110   if variant not in os_obj.supported_variants:
1111     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1112
1113
1114 def _GetNodeInstancesInner(cfg, fn):
1115   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1116
1117
1118 def _GetNodeInstances(cfg, node_name):
1119   """Returns a list of all primary and secondary instances on a node.
1120
1121   """
1122
1123   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1124
1125
1126 def _GetNodePrimaryInstances(cfg, node_name):
1127   """Returns primary instances on a node.
1128
1129   """
1130   return _GetNodeInstancesInner(cfg,
1131                                 lambda inst: node_name == inst.primary_node)
1132
1133
1134 def _GetNodeSecondaryInstances(cfg, node_name):
1135   """Returns secondary instances on a node.
1136
1137   """
1138   return _GetNodeInstancesInner(cfg,
1139                                 lambda inst: node_name in inst.secondary_nodes)
1140
1141
1142 def _GetStorageTypeArgs(cfg, storage_type):
1143   """Returns the arguments for a storage type.
1144
1145   """
1146   # Special case for file storage
1147   if storage_type == constants.ST_FILE:
1148     # storage.FileStorage wants a list of storage directories
1149     return [[cfg.GetFileStorageDir()]]
1150
1151   return []
1152
1153
1154 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1155   faulty = []
1156
1157   for dev in instance.disks:
1158     cfg.SetDiskID(dev, node_name)
1159
1160   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161   result.Raise("Failed to get disk status from node %s" % node_name,
1162                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1163
1164   for idx, bdev_status in enumerate(result.payload):
1165     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1166       faulty.append(idx)
1167
1168   return faulty
1169
1170
1171 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172   """Check the sanity of iallocator and node arguments and use the
1173   cluster-wide iallocator if appropriate.
1174
1175   Check that at most one of (iallocator, node) is specified. If none is
1176   specified, then the LU's opcode's iallocator slot is filled with the
1177   cluster-wide default iallocator.
1178
1179   @type iallocator_slot: string
1180   @param iallocator_slot: the name of the opcode iallocator slot
1181   @type node_slot: string
1182   @param node_slot: the name of the opcode target node slot
1183
1184   """
1185   node = getattr(lu.op, node_slot, None)
1186   iallocator = getattr(lu.op, iallocator_slot, None)
1187
1188   if node is not None and iallocator is not None:
1189     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1190                                errors.ECODE_INVAL)
1191   elif node is None and iallocator is None:
1192     default_iallocator = lu.cfg.GetDefaultIAllocator()
1193     if default_iallocator:
1194       setattr(lu.op, iallocator_slot, default_iallocator)
1195     else:
1196       raise errors.OpPrereqError("No iallocator or node given and no"
1197                                  " cluster-wide default iallocator found."
1198                                  " Please specify either an iallocator or a"
1199                                  " node, or set a cluster-wide default"
1200                                  " iallocator.")
1201
1202
1203 class LUPostInitCluster(LogicalUnit):
1204   """Logical unit for running hooks after cluster initialization.
1205
1206   """
1207   HPATH = "cluster-init"
1208   HTYPE = constants.HTYPE_CLUSTER
1209
1210   def BuildHooksEnv(self):
1211     """Build hooks env.
1212
1213     """
1214     env = {"OP_TARGET": self.cfg.GetClusterName()}
1215     mn = self.cfg.GetMasterNode()
1216     return env, [], [mn]
1217
1218   def Exec(self, feedback_fn):
1219     """Nothing to do.
1220
1221     """
1222     return True
1223
1224
1225 class LUDestroyCluster(LogicalUnit):
1226   """Logical unit for destroying the cluster.
1227
1228   """
1229   HPATH = "cluster-destroy"
1230   HTYPE = constants.HTYPE_CLUSTER
1231
1232   def BuildHooksEnv(self):
1233     """Build hooks env.
1234
1235     """
1236     env = {"OP_TARGET": self.cfg.GetClusterName()}
1237     return env, [], []
1238
1239   def CheckPrereq(self):
1240     """Check prerequisites.
1241
1242     This checks whether the cluster is empty.
1243
1244     Any errors are signaled by raising errors.OpPrereqError.
1245
1246     """
1247     master = self.cfg.GetMasterNode()
1248
1249     nodelist = self.cfg.GetNodeList()
1250     if len(nodelist) != 1 or nodelist[0] != master:
1251       raise errors.OpPrereqError("There are still %d node(s) in"
1252                                  " this cluster." % (len(nodelist) - 1),
1253                                  errors.ECODE_INVAL)
1254     instancelist = self.cfg.GetInstanceList()
1255     if instancelist:
1256       raise errors.OpPrereqError("There are still %d instance(s) in"
1257                                  " this cluster." % len(instancelist),
1258                                  errors.ECODE_INVAL)
1259
1260   def Exec(self, feedback_fn):
1261     """Destroys the cluster.
1262
1263     """
1264     master = self.cfg.GetMasterNode()
1265     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1266
1267     # Run post hooks on master node before it's removed
1268     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1269     try:
1270       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1271     except:
1272       # pylint: disable-msg=W0702
1273       self.LogWarning("Errors occurred running hooks on %s" % master)
1274
1275     result = self.rpc.call_node_stop_master(master, False)
1276     result.Raise("Could not disable the master role")
1277
1278     if modify_ssh_setup:
1279       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1280       utils.CreateBackup(priv_key)
1281       utils.CreateBackup(pub_key)
1282
1283     return master
1284
1285
1286 def _VerifyCertificate(filename):
1287   """Verifies a certificate for LUVerifyCluster.
1288
1289   @type filename: string
1290   @param filename: Path to PEM file
1291
1292   """
1293   try:
1294     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1295                                            utils.ReadFile(filename))
1296   except Exception, err: # pylint: disable-msg=W0703
1297     return (LUVerifyCluster.ETYPE_ERROR,
1298             "Failed to load X509 certificate %s: %s" % (filename, err))
1299
1300   (errcode, msg) = \
1301     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1302                                 constants.SSL_CERT_EXPIRATION_ERROR)
1303
1304   if msg:
1305     fnamemsg = "While verifying %s: %s" % (filename, msg)
1306   else:
1307     fnamemsg = None
1308
1309   if errcode is None:
1310     return (None, fnamemsg)
1311   elif errcode == utils.CERT_WARNING:
1312     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1313   elif errcode == utils.CERT_ERROR:
1314     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1315
1316   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1317
1318
1319 class LUVerifyCluster(LogicalUnit):
1320   """Verifies the cluster status.
1321
1322   """
1323   HPATH = "cluster-verify"
1324   HTYPE = constants.HTYPE_CLUSTER
1325   _OP_PARAMS = [
1326     ("skip_checks", _EmptyList,
1327      _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1328     ("verbose", False, _TBool),
1329     ("error_codes", False, _TBool),
1330     ("debug_simulate_errors", False, _TBool),
1331     ]
1332   REQ_BGL = False
1333
1334   TCLUSTER = "cluster"
1335   TNODE = "node"
1336   TINSTANCE = "instance"
1337
1338   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1339   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1340   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1341   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1342   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1343   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1344   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1345   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1346   ENODEDRBD = (TNODE, "ENODEDRBD")
1347   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1348   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1349   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1350   ENODEHV = (TNODE, "ENODEHV")
1351   ENODELVM = (TNODE, "ENODELVM")
1352   ENODEN1 = (TNODE, "ENODEN1")
1353   ENODENET = (TNODE, "ENODENET")
1354   ENODEOS = (TNODE, "ENODEOS")
1355   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1356   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1357   ENODERPC = (TNODE, "ENODERPC")
1358   ENODESSH = (TNODE, "ENODESSH")
1359   ENODEVERSION = (TNODE, "ENODEVERSION")
1360   ENODESETUP = (TNODE, "ENODESETUP")
1361   ENODETIME = (TNODE, "ENODETIME")
1362
1363   ETYPE_FIELD = "code"
1364   ETYPE_ERROR = "ERROR"
1365   ETYPE_WARNING = "WARNING"
1366
1367   class NodeImage(object):
1368     """A class representing the logical and physical status of a node.
1369
1370     @type name: string
1371     @ivar name: the node name to which this object refers
1372     @ivar volumes: a structure as returned from
1373         L{ganeti.backend.GetVolumeList} (runtime)
1374     @ivar instances: a list of running instances (runtime)
1375     @ivar pinst: list of configured primary instances (config)
1376     @ivar sinst: list of configured secondary instances (config)
1377     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1378         of this node (config)
1379     @ivar mfree: free memory, as reported by hypervisor (runtime)
1380     @ivar dfree: free disk, as reported by the node (runtime)
1381     @ivar offline: the offline status (config)
1382     @type rpc_fail: boolean
1383     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1384         not whether the individual keys were correct) (runtime)
1385     @type lvm_fail: boolean
1386     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1387     @type hyp_fail: boolean
1388     @ivar hyp_fail: whether the RPC call didn't return the instance list
1389     @type ghost: boolean
1390     @ivar ghost: whether this is a known node or not (config)
1391     @type os_fail: boolean
1392     @ivar os_fail: whether the RPC call didn't return valid OS data
1393     @type oslist: list
1394     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1395
1396     """
1397     def __init__(self, offline=False, name=None):
1398       self.name = name
1399       self.volumes = {}
1400       self.instances = []
1401       self.pinst = []
1402       self.sinst = []
1403       self.sbp = {}
1404       self.mfree = 0
1405       self.dfree = 0
1406       self.offline = offline
1407       self.rpc_fail = False
1408       self.lvm_fail = False
1409       self.hyp_fail = False
1410       self.ghost = False
1411       self.os_fail = False
1412       self.oslist = {}
1413
1414   def ExpandNames(self):
1415     self.needed_locks = {
1416       locking.LEVEL_NODE: locking.ALL_SET,
1417       locking.LEVEL_INSTANCE: locking.ALL_SET,
1418     }
1419     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1420
1421   def _Error(self, ecode, item, msg, *args, **kwargs):
1422     """Format an error message.
1423
1424     Based on the opcode's error_codes parameter, either format a
1425     parseable error code, or a simpler error string.
1426
1427     This must be called only from Exec and functions called from Exec.
1428
1429     """
1430     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1431     itype, etxt = ecode
1432     # first complete the msg
1433     if args:
1434       msg = msg % args
1435     # then format the whole message
1436     if self.op.error_codes:
1437       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1438     else:
1439       if item:
1440         item = " " + item
1441       else:
1442         item = ""
1443       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1444     # and finally report it via the feedback_fn
1445     self._feedback_fn("  - %s" % msg)
1446
1447   def _ErrorIf(self, cond, *args, **kwargs):
1448     """Log an error message if the passed condition is True.
1449
1450     """
1451     cond = bool(cond) or self.op.debug_simulate_errors
1452     if cond:
1453       self._Error(*args, **kwargs)
1454     # do not mark the operation as failed for WARN cases only
1455     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1456       self.bad = self.bad or cond
1457
1458   def _VerifyNode(self, ninfo, nresult):
1459     """Perform some basic validation on data returned from a node.
1460
1461       - check the result data structure is well formed and has all the
1462         mandatory fields
1463       - check ganeti version
1464
1465     @type ninfo: L{objects.Node}
1466     @param ninfo: the node to check
1467     @param nresult: the results from the node
1468     @rtype: boolean
1469     @return: whether overall this call was successful (and we can expect
1470          reasonable values in the respose)
1471
1472     """
1473     node = ninfo.name
1474     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1475
1476     # main result, nresult should be a non-empty dict
1477     test = not nresult or not isinstance(nresult, dict)
1478     _ErrorIf(test, self.ENODERPC, node,
1479                   "unable to verify node: no data returned")
1480     if test:
1481       return False
1482
1483     # compares ganeti version
1484     local_version = constants.PROTOCOL_VERSION
1485     remote_version = nresult.get("version", None)
1486     test = not (remote_version and
1487                 isinstance(remote_version, (list, tuple)) and
1488                 len(remote_version) == 2)
1489     _ErrorIf(test, self.ENODERPC, node,
1490              "connection to node returned invalid data")
1491     if test:
1492       return False
1493
1494     test = local_version != remote_version[0]
1495     _ErrorIf(test, self.ENODEVERSION, node,
1496              "incompatible protocol versions: master %s,"
1497              " node %s", local_version, remote_version[0])
1498     if test:
1499       return False
1500
1501     # node seems compatible, we can actually try to look into its results
1502
1503     # full package version
1504     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1505                   self.ENODEVERSION, node,
1506                   "software version mismatch: master %s, node %s",
1507                   constants.RELEASE_VERSION, remote_version[1],
1508                   code=self.ETYPE_WARNING)
1509
1510     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1511     if isinstance(hyp_result, dict):
1512       for hv_name, hv_result in hyp_result.iteritems():
1513         test = hv_result is not None
1514         _ErrorIf(test, self.ENODEHV, node,
1515                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1516
1517
1518     test = nresult.get(constants.NV_NODESETUP,
1519                            ["Missing NODESETUP results"])
1520     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1521              "; ".join(test))
1522
1523     return True
1524
1525   def _VerifyNodeTime(self, ninfo, nresult,
1526                       nvinfo_starttime, nvinfo_endtime):
1527     """Check the node time.
1528
1529     @type ninfo: L{objects.Node}
1530     @param ninfo: the node to check
1531     @param nresult: the remote results for the node
1532     @param nvinfo_starttime: the start time of the RPC call
1533     @param nvinfo_endtime: the end time of the RPC call
1534
1535     """
1536     node = ninfo.name
1537     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1538
1539     ntime = nresult.get(constants.NV_TIME, None)
1540     try:
1541       ntime_merged = utils.MergeTime(ntime)
1542     except (ValueError, TypeError):
1543       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1544       return
1545
1546     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1550     else:
1551       ntime_diff = None
1552
1553     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554              "Node time diverges by at least %s from master node time",
1555              ntime_diff)
1556
1557   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1558     """Check the node time.
1559
1560     @type ninfo: L{objects.Node}
1561     @param ninfo: the node to check
1562     @param nresult: the remote results for the node
1563     @param vg_name: the configured VG name
1564
1565     """
1566     if vg_name is None:
1567       return
1568
1569     node = ninfo.name
1570     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1571
1572     # checks vg existence and size > 20G
1573     vglist = nresult.get(constants.NV_VGLIST, None)
1574     test = not vglist
1575     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1576     if not test:
1577       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1578                                             constants.MIN_VG_SIZE)
1579       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1580
1581     # check pv names
1582     pvlist = nresult.get(constants.NV_PVLIST, None)
1583     test = pvlist is None
1584     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1585     if not test:
1586       # check that ':' is not present in PV names, since it's a
1587       # special character for lvcreate (denotes the range of PEs to
1588       # use on the PV)
1589       for _, pvname, owner_vg in pvlist:
1590         test = ":" in pvname
1591         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1592                  " '%s' of VG '%s'", pvname, owner_vg)
1593
1594   def _VerifyNodeNetwork(self, ninfo, nresult):
1595     """Check the node time.
1596
1597     @type ninfo: L{objects.Node}
1598     @param ninfo: the node to check
1599     @param nresult: the remote results for the node
1600
1601     """
1602     node = ninfo.name
1603     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1604
1605     test = constants.NV_NODELIST not in nresult
1606     _ErrorIf(test, self.ENODESSH, node,
1607              "node hasn't returned node ssh connectivity data")
1608     if not test:
1609       if nresult[constants.NV_NODELIST]:
1610         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1611           _ErrorIf(True, self.ENODESSH, node,
1612                    "ssh communication with node '%s': %s", a_node, a_msg)
1613
1614     test = constants.NV_NODENETTEST not in nresult
1615     _ErrorIf(test, self.ENODENET, node,
1616              "node hasn't returned node tcp connectivity data")
1617     if not test:
1618       if nresult[constants.NV_NODENETTEST]:
1619         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1620         for anode in nlist:
1621           _ErrorIf(True, self.ENODENET, node,
1622                    "tcp communication with node '%s': %s",
1623                    anode, nresult[constants.NV_NODENETTEST][anode])
1624
1625     test = constants.NV_MASTERIP not in nresult
1626     _ErrorIf(test, self.ENODENET, node,
1627              "node hasn't returned node master IP reachability data")
1628     if not test:
1629       if not nresult[constants.NV_MASTERIP]:
1630         if node == self.master_node:
1631           msg = "the master node cannot reach the master IP (not configured?)"
1632         else:
1633           msg = "cannot reach the master IP"
1634         _ErrorIf(True, self.ENODENET, node, msg)
1635
1636
1637   def _VerifyInstance(self, instance, instanceconfig, node_image):
1638     """Verify an instance.
1639
1640     This function checks to see if the required block devices are
1641     available on the instance's node.
1642
1643     """
1644     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1645     node_current = instanceconfig.primary_node
1646
1647     node_vol_should = {}
1648     instanceconfig.MapLVsByNode(node_vol_should)
1649
1650     for node in node_vol_should:
1651       n_img = node_image[node]
1652       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1653         # ignore missing volumes on offline or broken nodes
1654         continue
1655       for volume in node_vol_should[node]:
1656         test = volume not in n_img.volumes
1657         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1658                  "volume %s missing on node %s", volume, node)
1659
1660     if instanceconfig.admin_up:
1661       pri_img = node_image[node_current]
1662       test = instance not in pri_img.instances and not pri_img.offline
1663       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1664                "instance not running on its primary node %s",
1665                node_current)
1666
1667     for node, n_img in node_image.items():
1668       if (not node == node_current):
1669         test = instance in n_img.instances
1670         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1671                  "instance should not run on node %s", node)
1672
1673   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1674     """Verify if there are any unknown volumes in the cluster.
1675
1676     The .os, .swap and backup volumes are ignored. All other volumes are
1677     reported as unknown.
1678
1679     @type reserved: L{ganeti.utils.FieldSet}
1680     @param reserved: a FieldSet of reserved volume names
1681
1682     """
1683     for node, n_img in node_image.items():
1684       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1685         # skip non-healthy nodes
1686         continue
1687       for volume in n_img.volumes:
1688         test = ((node not in node_vol_should or
1689                 volume not in node_vol_should[node]) and
1690                 not reserved.Matches(volume))
1691         self._ErrorIf(test, self.ENODEORPHANLV, node,
1692                       "volume %s is unknown", volume)
1693
1694   def _VerifyOrphanInstances(self, instancelist, node_image):
1695     """Verify the list of running instances.
1696
1697     This checks what instances are running but unknown to the cluster.
1698
1699     """
1700     for node, n_img in node_image.items():
1701       for o_inst in n_img.instances:
1702         test = o_inst not in instancelist
1703         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1704                       "instance %s on node %s should not exist", o_inst, node)
1705
1706   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1707     """Verify N+1 Memory Resilience.
1708
1709     Check that if one single node dies we can still start all the
1710     instances it was primary for.
1711
1712     """
1713     for node, n_img in node_image.items():
1714       # This code checks that every node which is now listed as
1715       # secondary has enough memory to host all instances it is
1716       # supposed to should a single other node in the cluster fail.
1717       # FIXME: not ready for failover to an arbitrary node
1718       # FIXME: does not support file-backed instances
1719       # WARNING: we currently take into account down instances as well
1720       # as up ones, considering that even if they're down someone
1721       # might want to start them even in the event of a node failure.
1722       for prinode, instances in n_img.sbp.items():
1723         needed_mem = 0
1724         for instance in instances:
1725           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1726           if bep[constants.BE_AUTO_BALANCE]:
1727             needed_mem += bep[constants.BE_MEMORY]
1728         test = n_img.mfree < needed_mem
1729         self._ErrorIf(test, self.ENODEN1, node,
1730                       "not enough memory on to accommodate"
1731                       " failovers should peer node %s fail", prinode)
1732
1733   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1734                        master_files):
1735     """Verifies and computes the node required file checksums.
1736
1737     @type ninfo: L{objects.Node}
1738     @param ninfo: the node to check
1739     @param nresult: the remote results for the node
1740     @param file_list: required list of files
1741     @param local_cksum: dictionary of local files and their checksums
1742     @param master_files: list of files that only masters should have
1743
1744     """
1745     node = ninfo.name
1746     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1747
1748     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1749     test = not isinstance(remote_cksum, dict)
1750     _ErrorIf(test, self.ENODEFILECHECK, node,
1751              "node hasn't returned file checksum data")
1752     if test:
1753       return
1754
1755     for file_name in file_list:
1756       node_is_mc = ninfo.master_candidate
1757       must_have = (file_name not in master_files) or node_is_mc
1758       # missing
1759       test1 = file_name not in remote_cksum
1760       # invalid checksum
1761       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1762       # existing and good
1763       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1764       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1765                "file '%s' missing", file_name)
1766       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1767                "file '%s' has wrong checksum", file_name)
1768       # not candidate and this is not a must-have file
1769       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1770                "file '%s' should not exist on non master"
1771                " candidates (and the file is outdated)", file_name)
1772       # all good, except non-master/non-must have combination
1773       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1774                "file '%s' should not exist"
1775                " on non master candidates", file_name)
1776
1777   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1778                       drbd_map):
1779     """Verifies and the node DRBD status.
1780
1781     @type ninfo: L{objects.Node}
1782     @param ninfo: the node to check
1783     @param nresult: the remote results for the node
1784     @param instanceinfo: the dict of instances
1785     @param drbd_helper: the configured DRBD usermode helper
1786     @param drbd_map: the DRBD map as returned by
1787         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1788
1789     """
1790     node = ninfo.name
1791     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1792
1793     if drbd_helper:
1794       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1795       test = (helper_result == None)
1796       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1797                "no drbd usermode helper returned")
1798       if helper_result:
1799         status, payload = helper_result
1800         test = not status
1801         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1802                  "drbd usermode helper check unsuccessful: %s", payload)
1803         test = status and (payload != drbd_helper)
1804         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1805                  "wrong drbd usermode helper: %s", payload)
1806
1807     # compute the DRBD minors
1808     node_drbd = {}
1809     for minor, instance in drbd_map[node].items():
1810       test = instance not in instanceinfo
1811       _ErrorIf(test, self.ECLUSTERCFG, None,
1812                "ghost instance '%s' in temporary DRBD map", instance)
1813         # ghost instance should not be running, but otherwise we
1814         # don't give double warnings (both ghost instance and
1815         # unallocated minor in use)
1816       if test:
1817         node_drbd[minor] = (instance, False)
1818       else:
1819         instance = instanceinfo[instance]
1820         node_drbd[minor] = (instance.name, instance.admin_up)
1821
1822     # and now check them
1823     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1824     test = not isinstance(used_minors, (tuple, list))
1825     _ErrorIf(test, self.ENODEDRBD, node,
1826              "cannot parse drbd status file: %s", str(used_minors))
1827     if test:
1828       # we cannot check drbd status
1829       return
1830
1831     for minor, (iname, must_exist) in node_drbd.items():
1832       test = minor not in used_minors and must_exist
1833       _ErrorIf(test, self.ENODEDRBD, node,
1834                "drbd minor %d of instance %s is not active", minor, iname)
1835     for minor in used_minors:
1836       test = minor not in node_drbd
1837       _ErrorIf(test, self.ENODEDRBD, node,
1838                "unallocated drbd minor %d is in use", minor)
1839
1840   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1841     """Builds the node OS structures.
1842
1843     @type ninfo: L{objects.Node}
1844     @param ninfo: the node to check
1845     @param nresult: the remote results for the node
1846     @param nimg: the node image object
1847
1848     """
1849     node = ninfo.name
1850     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1851
1852     remote_os = nresult.get(constants.NV_OSLIST, None)
1853     test = (not isinstance(remote_os, list) or
1854             not compat.all(isinstance(v, list) and len(v) == 7
1855                            for v in remote_os))
1856
1857     _ErrorIf(test, self.ENODEOS, node,
1858              "node hasn't returned valid OS data")
1859
1860     nimg.os_fail = test
1861
1862     if test:
1863       return
1864
1865     os_dict = {}
1866
1867     for (name, os_path, status, diagnose,
1868          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1869
1870       if name not in os_dict:
1871         os_dict[name] = []
1872
1873       # parameters is a list of lists instead of list of tuples due to
1874       # JSON lacking a real tuple type, fix it:
1875       parameters = [tuple(v) for v in parameters]
1876       os_dict[name].append((os_path, status, diagnose,
1877                             set(variants), set(parameters), set(api_ver)))
1878
1879     nimg.oslist = os_dict
1880
1881   def _VerifyNodeOS(self, ninfo, nimg, base):
1882     """Verifies the node OS list.
1883
1884     @type ninfo: L{objects.Node}
1885     @param ninfo: the node to check
1886     @param nimg: the node image object
1887     @param base: the 'template' node we match against (e.g. from the master)
1888
1889     """
1890     node = ninfo.name
1891     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1892
1893     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1894
1895     for os_name, os_data in nimg.oslist.items():
1896       assert os_data, "Empty OS status for OS %s?!" % os_name
1897       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1898       _ErrorIf(not f_status, self.ENODEOS, node,
1899                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1900       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1901                "OS '%s' has multiple entries (first one shadows the rest): %s",
1902                os_name, utils.CommaJoin([v[0] for v in os_data]))
1903       # this will catched in backend too
1904       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1905                and not f_var, self.ENODEOS, node,
1906                "OS %s with API at least %d does not declare any variant",
1907                os_name, constants.OS_API_V15)
1908       # comparisons with the 'base' image
1909       test = os_name not in base.oslist
1910       _ErrorIf(test, self.ENODEOS, node,
1911                "Extra OS %s not present on reference node (%s)",
1912                os_name, base.name)
1913       if test:
1914         continue
1915       assert base.oslist[os_name], "Base node has empty OS status?"
1916       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1917       if not b_status:
1918         # base OS is invalid, skipping
1919         continue
1920       for kind, a, b in [("API version", f_api, b_api),
1921                          ("variants list", f_var, b_var),
1922                          ("parameters", f_param, b_param)]:
1923         _ErrorIf(a != b, self.ENODEOS, node,
1924                  "OS %s %s differs from reference node %s: %s vs. %s",
1925                  kind, os_name, base.name,
1926                  utils.CommaJoin(a), utils.CommaJoin(b))
1927
1928     # check any missing OSes
1929     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1930     _ErrorIf(missing, self.ENODEOS, node,
1931              "OSes present on reference node %s but missing on this node: %s",
1932              base.name, utils.CommaJoin(missing))
1933
1934   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1935     """Verifies and updates the node volume data.
1936
1937     This function will update a L{NodeImage}'s internal structures
1938     with data from the remote call.
1939
1940     @type ninfo: L{objects.Node}
1941     @param ninfo: the node to check
1942     @param nresult: the remote results for the node
1943     @param nimg: the node image object
1944     @param vg_name: the configured VG name
1945
1946     """
1947     node = ninfo.name
1948     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1949
1950     nimg.lvm_fail = True
1951     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1952     if vg_name is None:
1953       pass
1954     elif isinstance(lvdata, basestring):
1955       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1956                utils.SafeEncode(lvdata))
1957     elif not isinstance(lvdata, dict):
1958       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1959     else:
1960       nimg.volumes = lvdata
1961       nimg.lvm_fail = False
1962
1963   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1964     """Verifies and updates the node instance list.
1965
1966     If the listing was successful, then updates this node's instance
1967     list. Otherwise, it marks the RPC call as failed for the instance
1968     list key.
1969
1970     @type ninfo: L{objects.Node}
1971     @param ninfo: the node to check
1972     @param nresult: the remote results for the node
1973     @param nimg: the node image object
1974
1975     """
1976     idata = nresult.get(constants.NV_INSTANCELIST, None)
1977     test = not isinstance(idata, list)
1978     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1979                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1980     if test:
1981       nimg.hyp_fail = True
1982     else:
1983       nimg.instances = idata
1984
1985   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1986     """Verifies and computes a node information map
1987
1988     @type ninfo: L{objects.Node}
1989     @param ninfo: the node to check
1990     @param nresult: the remote results for the node
1991     @param nimg: the node image object
1992     @param vg_name: the configured VG name
1993
1994     """
1995     node = ninfo.name
1996     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1997
1998     # try to read free memory (from the hypervisor)
1999     hv_info = nresult.get(constants.NV_HVINFO, None)
2000     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2001     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2002     if not test:
2003       try:
2004         nimg.mfree = int(hv_info["memory_free"])
2005       except (ValueError, TypeError):
2006         _ErrorIf(True, self.ENODERPC, node,
2007                  "node returned invalid nodeinfo, check hypervisor")
2008
2009     # FIXME: devise a free space model for file based instances as well
2010     if vg_name is not None:
2011       test = (constants.NV_VGLIST not in nresult or
2012               vg_name not in nresult[constants.NV_VGLIST])
2013       _ErrorIf(test, self.ENODELVM, node,
2014                "node didn't return data for the volume group '%s'"
2015                " - it is either missing or broken", vg_name)
2016       if not test:
2017         try:
2018           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2019         except (ValueError, TypeError):
2020           _ErrorIf(True, self.ENODERPC, node,
2021                    "node returned invalid LVM info, check LVM status")
2022
2023   def BuildHooksEnv(self):
2024     """Build hooks env.
2025
2026     Cluster-Verify hooks just ran in the post phase and their failure makes
2027     the output be logged in the verify output and the verification to fail.
2028
2029     """
2030     all_nodes = self.cfg.GetNodeList()
2031     env = {
2032       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2033       }
2034     for node in self.cfg.GetAllNodesInfo().values():
2035       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2036
2037     return env, [], all_nodes
2038
2039   def Exec(self, feedback_fn):
2040     """Verify integrity of cluster, performing various test on nodes.
2041
2042     """
2043     self.bad = False
2044     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2045     verbose = self.op.verbose
2046     self._feedback_fn = feedback_fn
2047     feedback_fn("* Verifying global settings")
2048     for msg in self.cfg.VerifyConfig():
2049       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2050
2051     # Check the cluster certificates
2052     for cert_filename in constants.ALL_CERT_FILES:
2053       (errcode, msg) = _VerifyCertificate(cert_filename)
2054       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2055
2056     vg_name = self.cfg.GetVGName()
2057     drbd_helper = self.cfg.GetDRBDHelper()
2058     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2059     cluster = self.cfg.GetClusterInfo()
2060     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2061     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2062     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2063     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2064                         for iname in instancelist)
2065     i_non_redundant = [] # Non redundant instances
2066     i_non_a_balanced = [] # Non auto-balanced instances
2067     n_offline = 0 # Count of offline nodes
2068     n_drained = 0 # Count of nodes being drained
2069     node_vol_should = {}
2070
2071     # FIXME: verify OS list
2072     # do local checksums
2073     master_files = [constants.CLUSTER_CONF_FILE]
2074     master_node = self.master_node = self.cfg.GetMasterNode()
2075     master_ip = self.cfg.GetMasterIP()
2076
2077     file_names = ssconf.SimpleStore().GetFileList()
2078     file_names.extend(constants.ALL_CERT_FILES)
2079     file_names.extend(master_files)
2080     if cluster.modify_etc_hosts:
2081       file_names.append(constants.ETC_HOSTS)
2082
2083     local_checksums = utils.FingerprintFiles(file_names)
2084
2085     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2086     node_verify_param = {
2087       constants.NV_FILELIST: file_names,
2088       constants.NV_NODELIST: [node.name for node in nodeinfo
2089                               if not node.offline],
2090       constants.NV_HYPERVISOR: hypervisors,
2091       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2092                                   node.secondary_ip) for node in nodeinfo
2093                                  if not node.offline],
2094       constants.NV_INSTANCELIST: hypervisors,
2095       constants.NV_VERSION: None,
2096       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2097       constants.NV_NODESETUP: None,
2098       constants.NV_TIME: None,
2099       constants.NV_MASTERIP: (master_node, master_ip),
2100       constants.NV_OSLIST: None,
2101       }
2102
2103     if vg_name is not None:
2104       node_verify_param[constants.NV_VGLIST] = None
2105       node_verify_param[constants.NV_LVLIST] = vg_name
2106       node_verify_param[constants.NV_PVLIST] = [vg_name]
2107       node_verify_param[constants.NV_DRBDLIST] = None
2108
2109     if drbd_helper:
2110       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2111
2112     # Build our expected cluster state
2113     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2114                                                  name=node.name))
2115                       for node in nodeinfo)
2116
2117     for instance in instancelist:
2118       inst_config = instanceinfo[instance]
2119
2120       for nname in inst_config.all_nodes:
2121         if nname not in node_image:
2122           # ghost node
2123           gnode = self.NodeImage(name=nname)
2124           gnode.ghost = True
2125           node_image[nname] = gnode
2126
2127       inst_config.MapLVsByNode(node_vol_should)
2128
2129       pnode = inst_config.primary_node
2130       node_image[pnode].pinst.append(instance)
2131
2132       for snode in inst_config.secondary_nodes:
2133         nimg = node_image[snode]
2134         nimg.sinst.append(instance)
2135         if pnode not in nimg.sbp:
2136           nimg.sbp[pnode] = []
2137         nimg.sbp[pnode].append(instance)
2138
2139     # At this point, we have the in-memory data structures complete,
2140     # except for the runtime information, which we'll gather next
2141
2142     # Due to the way our RPC system works, exact response times cannot be
2143     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2144     # time before and after executing the request, we can at least have a time
2145     # window.
2146     nvinfo_starttime = time.time()
2147     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2148                                            self.cfg.GetClusterName())
2149     nvinfo_endtime = time.time()
2150
2151     all_drbd_map = self.cfg.ComputeDRBDMap()
2152
2153     feedback_fn("* Verifying node status")
2154
2155     refos_img = None
2156
2157     for node_i in nodeinfo:
2158       node = node_i.name
2159       nimg = node_image[node]
2160
2161       if node_i.offline:
2162         if verbose:
2163           feedback_fn("* Skipping offline node %s" % (node,))
2164         n_offline += 1
2165         continue
2166
2167       if node == master_node:
2168         ntype = "master"
2169       elif node_i.master_candidate:
2170         ntype = "master candidate"
2171       elif node_i.drained:
2172         ntype = "drained"
2173         n_drained += 1
2174       else:
2175         ntype = "regular"
2176       if verbose:
2177         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2178
2179       msg = all_nvinfo[node].fail_msg
2180       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2181       if msg:
2182         nimg.rpc_fail = True
2183         continue
2184
2185       nresult = all_nvinfo[node].payload
2186
2187       nimg.call_ok = self._VerifyNode(node_i, nresult)
2188       self._VerifyNodeNetwork(node_i, nresult)
2189       self._VerifyNodeLVM(node_i, nresult, vg_name)
2190       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2191                             master_files)
2192       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2193                            all_drbd_map)
2194       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2195
2196       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2197       self._UpdateNodeInstances(node_i, nresult, nimg)
2198       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2199       self._UpdateNodeOS(node_i, nresult, nimg)
2200       if not nimg.os_fail:
2201         if refos_img is None:
2202           refos_img = nimg
2203         self._VerifyNodeOS(node_i, nimg, refos_img)
2204
2205     feedback_fn("* Verifying instance status")
2206     for instance in instancelist:
2207       if verbose:
2208         feedback_fn("* Verifying instance %s" % instance)
2209       inst_config = instanceinfo[instance]
2210       self._VerifyInstance(instance, inst_config, node_image)
2211       inst_nodes_offline = []
2212
2213       pnode = inst_config.primary_node
2214       pnode_img = node_image[pnode]
2215       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2216                self.ENODERPC, pnode, "instance %s, connection to"
2217                " primary node failed", instance)
2218
2219       if pnode_img.offline:
2220         inst_nodes_offline.append(pnode)
2221
2222       # If the instance is non-redundant we cannot survive losing its primary
2223       # node, so we are not N+1 compliant. On the other hand we have no disk
2224       # templates with more than one secondary so that situation is not well
2225       # supported either.
2226       # FIXME: does not support file-backed instances
2227       if not inst_config.secondary_nodes:
2228         i_non_redundant.append(instance)
2229       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2230                instance, "instance has multiple secondary nodes: %s",
2231                utils.CommaJoin(inst_config.secondary_nodes),
2232                code=self.ETYPE_WARNING)
2233
2234       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2235         i_non_a_balanced.append(instance)
2236
2237       for snode in inst_config.secondary_nodes:
2238         s_img = node_image[snode]
2239         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2240                  "instance %s, connection to secondary node failed", instance)
2241
2242         if s_img.offline:
2243           inst_nodes_offline.append(snode)
2244
2245       # warn that the instance lives on offline nodes
2246       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2247                "instance lives on offline node(s) %s",
2248                utils.CommaJoin(inst_nodes_offline))
2249       # ... or ghost nodes
2250       for node in inst_config.all_nodes:
2251         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2252                  "instance lives on ghost node %s", node)
2253
2254     feedback_fn("* Verifying orphan volumes")
2255     reserved = utils.FieldSet(*cluster.reserved_lvs)
2256     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2257
2258     feedback_fn("* Verifying orphan instances")
2259     self._VerifyOrphanInstances(instancelist, node_image)
2260
2261     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2262       feedback_fn("* Verifying N+1 Memory redundancy")
2263       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2264
2265     feedback_fn("* Other Notes")
2266     if i_non_redundant:
2267       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2268                   % len(i_non_redundant))
2269
2270     if i_non_a_balanced:
2271       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2272                   % len(i_non_a_balanced))
2273
2274     if n_offline:
2275       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2276
2277     if n_drained:
2278       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2279
2280     return not self.bad
2281
2282   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2283     """Analyze the post-hooks' result
2284
2285     This method analyses the hook result, handles it, and sends some
2286     nicely-formatted feedback back to the user.
2287
2288     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2289         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2290     @param hooks_results: the results of the multi-node hooks rpc call
2291     @param feedback_fn: function used send feedback back to the caller
2292     @param lu_result: previous Exec result
2293     @return: the new Exec result, based on the previous result
2294         and hook results
2295
2296     """
2297     # We only really run POST phase hooks, and are only interested in
2298     # their results
2299     if phase == constants.HOOKS_PHASE_POST:
2300       # Used to change hooks' output to proper indentation
2301       indent_re = re.compile('^', re.M)
2302       feedback_fn("* Hooks Results")
2303       assert hooks_results, "invalid result from hooks"
2304
2305       for node_name in hooks_results:
2306         res = hooks_results[node_name]
2307         msg = res.fail_msg
2308         test = msg and not res.offline
2309         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2310                       "Communication failure in hooks execution: %s", msg)
2311         if res.offline or msg:
2312           # No need to investigate payload if node is offline or gave an error.
2313           # override manually lu_result here as _ErrorIf only
2314           # overrides self.bad
2315           lu_result = 1
2316           continue
2317         for script, hkr, output in res.payload:
2318           test = hkr == constants.HKR_FAIL
2319           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2320                         "Script %s failed, output:", script)
2321           if test:
2322             output = indent_re.sub('      ', output)
2323             feedback_fn("%s" % output)
2324             lu_result = 0
2325
2326       return lu_result
2327
2328
2329 class LUVerifyDisks(NoHooksLU):
2330   """Verifies the cluster disks status.
2331
2332   """
2333   REQ_BGL = False
2334
2335   def ExpandNames(self):
2336     self.needed_locks = {
2337       locking.LEVEL_NODE: locking.ALL_SET,
2338       locking.LEVEL_INSTANCE: locking.ALL_SET,
2339     }
2340     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2341
2342   def Exec(self, feedback_fn):
2343     """Verify integrity of cluster disks.
2344
2345     @rtype: tuple of three items
2346     @return: a tuple of (dict of node-to-node_error, list of instances
2347         which need activate-disks, dict of instance: (node, volume) for
2348         missing volumes
2349
2350     """
2351     result = res_nodes, res_instances, res_missing = {}, [], {}
2352
2353     vg_name = self.cfg.GetVGName()
2354     nodes = utils.NiceSort(self.cfg.GetNodeList())
2355     instances = [self.cfg.GetInstanceInfo(name)
2356                  for name in self.cfg.GetInstanceList()]
2357
2358     nv_dict = {}
2359     for inst in instances:
2360       inst_lvs = {}
2361       if (not inst.admin_up or
2362           inst.disk_template not in constants.DTS_NET_MIRROR):
2363         continue
2364       inst.MapLVsByNode(inst_lvs)
2365       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2366       for node, vol_list in inst_lvs.iteritems():
2367         for vol in vol_list:
2368           nv_dict[(node, vol)] = inst
2369
2370     if not nv_dict:
2371       return result
2372
2373     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2374
2375     for node in nodes:
2376       # node_volume
2377       node_res = node_lvs[node]
2378       if node_res.offline:
2379         continue
2380       msg = node_res.fail_msg
2381       if msg:
2382         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2383         res_nodes[node] = msg
2384         continue
2385
2386       lvs = node_res.payload
2387       for lv_name, (_, _, lv_online) in lvs.items():
2388         inst = nv_dict.pop((node, lv_name), None)
2389         if (not lv_online and inst is not None
2390             and inst.name not in res_instances):
2391           res_instances.append(inst.name)
2392
2393     # any leftover items in nv_dict are missing LVs, let's arrange the
2394     # data better
2395     for key, inst in nv_dict.iteritems():
2396       if inst.name not in res_missing:
2397         res_missing[inst.name] = []
2398       res_missing[inst.name].append(key)
2399
2400     return result
2401
2402
2403 class LURepairDiskSizes(NoHooksLU):
2404   """Verifies the cluster disks sizes.
2405
2406   """
2407   _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2408   REQ_BGL = False
2409
2410   def ExpandNames(self):
2411     if self.op.instances:
2412       self.wanted_names = []
2413       for name in self.op.instances:
2414         full_name = _ExpandInstanceName(self.cfg, name)
2415         self.wanted_names.append(full_name)
2416       self.needed_locks = {
2417         locking.LEVEL_NODE: [],
2418         locking.LEVEL_INSTANCE: self.wanted_names,
2419         }
2420       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2421     else:
2422       self.wanted_names = None
2423       self.needed_locks = {
2424         locking.LEVEL_NODE: locking.ALL_SET,
2425         locking.LEVEL_INSTANCE: locking.ALL_SET,
2426         }
2427     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2428
2429   def DeclareLocks(self, level):
2430     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2431       self._LockInstancesNodes(primary_only=True)
2432
2433   def CheckPrereq(self):
2434     """Check prerequisites.
2435
2436     This only checks the optional instance list against the existing names.
2437
2438     """
2439     if self.wanted_names is None:
2440       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2441
2442     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2443                              in self.wanted_names]
2444
2445   def _EnsureChildSizes(self, disk):
2446     """Ensure children of the disk have the needed disk size.
2447
2448     This is valid mainly for DRBD8 and fixes an issue where the
2449     children have smaller disk size.
2450
2451     @param disk: an L{ganeti.objects.Disk} object
2452
2453     """
2454     if disk.dev_type == constants.LD_DRBD8:
2455       assert disk.children, "Empty children for DRBD8?"
2456       fchild = disk.children[0]
2457       mismatch = fchild.size < disk.size
2458       if mismatch:
2459         self.LogInfo("Child disk has size %d, parent %d, fixing",
2460                      fchild.size, disk.size)
2461         fchild.size = disk.size
2462
2463       # and we recurse on this child only, not on the metadev
2464       return self._EnsureChildSizes(fchild) or mismatch
2465     else:
2466       return False
2467
2468   def Exec(self, feedback_fn):
2469     """Verify the size of cluster disks.
2470
2471     """
2472     # TODO: check child disks too
2473     # TODO: check differences in size between primary/secondary nodes
2474     per_node_disks = {}
2475     for instance in self.wanted_instances:
2476       pnode = instance.primary_node
2477       if pnode not in per_node_disks:
2478         per_node_disks[pnode] = []
2479       for idx, disk in enumerate(instance.disks):
2480         per_node_disks[pnode].append((instance, idx, disk))
2481
2482     changed = []
2483     for node, dskl in per_node_disks.items():
2484       newl = [v[2].Copy() for v in dskl]
2485       for dsk in newl:
2486         self.cfg.SetDiskID(dsk, node)
2487       result = self.rpc.call_blockdev_getsizes(node, newl)
2488       if result.fail_msg:
2489         self.LogWarning("Failure in blockdev_getsizes call to node"
2490                         " %s, ignoring", node)
2491         continue
2492       if len(result.data) != len(dskl):
2493         self.LogWarning("Invalid result from node %s, ignoring node results",
2494                         node)
2495         continue
2496       for ((instance, idx, disk), size) in zip(dskl, result.data):
2497         if size is None:
2498           self.LogWarning("Disk %d of instance %s did not return size"
2499                           " information, ignoring", idx, instance.name)
2500           continue
2501         if not isinstance(size, (int, long)):
2502           self.LogWarning("Disk %d of instance %s did not return valid"
2503                           " size information, ignoring", idx, instance.name)
2504           continue
2505         size = size >> 20
2506         if size != disk.size:
2507           self.LogInfo("Disk %d of instance %s has mismatched size,"
2508                        " correcting: recorded %d, actual %d", idx,
2509                        instance.name, disk.size, size)
2510           disk.size = size
2511           self.cfg.Update(instance, feedback_fn)
2512           changed.append((instance.name, idx, size))
2513         if self._EnsureChildSizes(disk):
2514           self.cfg.Update(instance, feedback_fn)
2515           changed.append((instance.name, idx, disk.size))
2516     return changed
2517
2518
2519 class LURenameCluster(LogicalUnit):
2520   """Rename the cluster.
2521
2522   """
2523   HPATH = "cluster-rename"
2524   HTYPE = constants.HTYPE_CLUSTER
2525   _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2526
2527   def BuildHooksEnv(self):
2528     """Build hooks env.
2529
2530     """
2531     env = {
2532       "OP_TARGET": self.cfg.GetClusterName(),
2533       "NEW_NAME": self.op.name,
2534       }
2535     mn = self.cfg.GetMasterNode()
2536     all_nodes = self.cfg.GetNodeList()
2537     return env, [mn], all_nodes
2538
2539   def CheckPrereq(self):
2540     """Verify that the passed name is a valid one.
2541
2542     """
2543     hostname = netutils.GetHostInfo(self.op.name)
2544
2545     new_name = hostname.name
2546     self.ip = new_ip = hostname.ip
2547     old_name = self.cfg.GetClusterName()
2548     old_ip = self.cfg.GetMasterIP()
2549     if new_name == old_name and new_ip == old_ip:
2550       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2551                                  " cluster has changed",
2552                                  errors.ECODE_INVAL)
2553     if new_ip != old_ip:
2554       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2555         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2556                                    " reachable on the network. Aborting." %
2557                                    new_ip, errors.ECODE_NOTUNIQUE)
2558
2559     self.op.name = new_name
2560
2561   def Exec(self, feedback_fn):
2562     """Rename the cluster.
2563
2564     """
2565     clustername = self.op.name
2566     ip = self.ip
2567
2568     # shutdown the master IP
2569     master = self.cfg.GetMasterNode()
2570     result = self.rpc.call_node_stop_master(master, False)
2571     result.Raise("Could not disable the master role")
2572
2573     try:
2574       cluster = self.cfg.GetClusterInfo()
2575       cluster.cluster_name = clustername
2576       cluster.master_ip = ip
2577       self.cfg.Update(cluster, feedback_fn)
2578
2579       # update the known hosts file
2580       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2581       node_list = self.cfg.GetNodeList()
2582       try:
2583         node_list.remove(master)
2584       except ValueError:
2585         pass
2586       result = self.rpc.call_upload_file(node_list,
2587                                          constants.SSH_KNOWN_HOSTS_FILE)
2588       for to_node, to_result in result.iteritems():
2589         msg = to_result.fail_msg
2590         if msg:
2591           msg = ("Copy of file %s to node %s failed: %s" %
2592                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2593           self.proc.LogWarning(msg)
2594
2595     finally:
2596       result = self.rpc.call_node_start_master(master, False, False)
2597       msg = result.fail_msg
2598       if msg:
2599         self.LogWarning("Could not re-enable the master role on"
2600                         " the master, please restart manually: %s", msg)
2601
2602     return clustername
2603
2604
2605 class LUSetClusterParams(LogicalUnit):
2606   """Change the parameters of the cluster.
2607
2608   """
2609   HPATH = "cluster-modify"
2610   HTYPE = constants.HTYPE_CLUSTER
2611   _OP_PARAMS = [
2612     ("vg_name", None, _TMaybeString),
2613     ("enabled_hypervisors", None,
2614      _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2615     ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2616     ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2617     ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2618     ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2619     ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2620     ("uid_pool", None, _NoType),
2621     ("add_uids", None, _NoType),
2622     ("remove_uids", None, _NoType),
2623     ("maintain_node_health", None, _TMaybeBool),
2624     ("nicparams", None, _TOr(_TDict, _TNone)),
2625     ("drbd_helper", None, _TOr(_TString, _TNone)),
2626     ("default_iallocator", None, _TMaybeString),
2627     ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2628     ("hidden_oss", None, _TOr(_TListOf(\
2629           _TAnd(_TList,
2630                 _TIsLength(2),
2631                 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2632           _TNone)),
2633     ("blacklisted_oss", None, _TOr(_TListOf(\
2634           _TAnd(_TList,
2635                 _TIsLength(2),
2636                 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2637           _TNone)),
2638     ]
2639   REQ_BGL = False
2640
2641   def CheckArguments(self):
2642     """Check parameters
2643
2644     """
2645     if self.op.uid_pool:
2646       uidpool.CheckUidPool(self.op.uid_pool)
2647
2648     if self.op.add_uids:
2649       uidpool.CheckUidPool(self.op.add_uids)
2650
2651     if self.op.remove_uids:
2652       uidpool.CheckUidPool(self.op.remove_uids)
2653
2654   def ExpandNames(self):
2655     # FIXME: in the future maybe other cluster params won't require checking on
2656     # all nodes to be modified.
2657     self.needed_locks = {
2658       locking.LEVEL_NODE: locking.ALL_SET,
2659     }
2660     self.share_locks[locking.LEVEL_NODE] = 1
2661
2662   def BuildHooksEnv(self):
2663     """Build hooks env.
2664
2665     """
2666     env = {
2667       "OP_TARGET": self.cfg.GetClusterName(),
2668       "NEW_VG_NAME": self.op.vg_name,
2669       }
2670     mn = self.cfg.GetMasterNode()
2671     return env, [mn], [mn]
2672
2673   def CheckPrereq(self):
2674     """Check prerequisites.
2675
2676     This checks whether the given params don't conflict and
2677     if the given volume group is valid.
2678
2679     """
2680     if self.op.vg_name is not None and not self.op.vg_name:
2681       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2682         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2683                                    " instances exist", errors.ECODE_INVAL)
2684
2685     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2686       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2687         raise errors.OpPrereqError("Cannot disable drbd helper while"
2688                                    " drbd-based instances exist",
2689                                    errors.ECODE_INVAL)
2690
2691     node_list = self.acquired_locks[locking.LEVEL_NODE]
2692
2693     # if vg_name not None, checks given volume group on all nodes
2694     if self.op.vg_name:
2695       vglist = self.rpc.call_vg_list(node_list)
2696       for node in node_list:
2697         msg = vglist[node].fail_msg
2698         if msg:
2699           # ignoring down node
2700           self.LogWarning("Error while gathering data on node %s"
2701                           " (ignoring node): %s", node, msg)
2702           continue
2703         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2704                                               self.op.vg_name,
2705                                               constants.MIN_VG_SIZE)
2706         if vgstatus:
2707           raise errors.OpPrereqError("Error on node '%s': %s" %
2708                                      (node, vgstatus), errors.ECODE_ENVIRON)
2709
2710     if self.op.drbd_helper:
2711       # checks given drbd helper on all nodes
2712       helpers = self.rpc.call_drbd_helper(node_list)
2713       for node in node_list:
2714         ninfo = self.cfg.GetNodeInfo(node)
2715         if ninfo.offline:
2716           self.LogInfo("Not checking drbd helper on offline node %s", node)
2717           continue
2718         msg = helpers[node].fail_msg
2719         if msg:
2720           raise errors.OpPrereqError("Error checking drbd helper on node"
2721                                      " '%s': %s" % (node, msg),
2722                                      errors.ECODE_ENVIRON)
2723         node_helper = helpers[node].payload
2724         if node_helper != self.op.drbd_helper:
2725           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2726                                      (node, node_helper), errors.ECODE_ENVIRON)
2727
2728     self.cluster = cluster = self.cfg.GetClusterInfo()
2729     # validate params changes
2730     if self.op.beparams:
2731       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2732       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2733
2734     if self.op.nicparams:
2735       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2736       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2737       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2738       nic_errors = []
2739
2740       # check all instances for consistency
2741       for instance in self.cfg.GetAllInstancesInfo().values():
2742         for nic_idx, nic in enumerate(instance.nics):
2743           params_copy = copy.deepcopy(nic.nicparams)
2744           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2745
2746           # check parameter syntax
2747           try:
2748             objects.NIC.CheckParameterSyntax(params_filled)
2749           except errors.ConfigurationError, err:
2750             nic_errors.append("Instance %s, nic/%d: %s" %
2751                               (instance.name, nic_idx, err))
2752
2753           # if we're moving instances to routed, check that they have an ip
2754           target_mode = params_filled[constants.NIC_MODE]
2755           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2756             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2757                               (instance.name, nic_idx))
2758       if nic_errors:
2759         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2760                                    "\n".join(nic_errors))
2761
2762     # hypervisor list/parameters
2763     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2764     if self.op.hvparams:
2765       for hv_name, hv_dict in self.op.hvparams.items():
2766         if hv_name not in self.new_hvparams:
2767           self.new_hvparams[hv_name] = hv_dict
2768         else:
2769           self.new_hvparams[hv_name].update(hv_dict)
2770
2771     # os hypervisor parameters
2772     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2773     if self.op.os_hvp:
2774       for os_name, hvs in self.op.os_hvp.items():
2775         if os_name not in self.new_os_hvp:
2776           self.new_os_hvp[os_name] = hvs
2777         else:
2778           for hv_name, hv_dict in hvs.items():
2779             if hv_name not in self.new_os_hvp[os_name]:
2780               self.new_os_hvp[os_name][hv_name] = hv_dict
2781             else:
2782               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2783
2784     # os parameters
2785     self.new_osp = objects.FillDict(cluster.osparams, {})
2786     if self.op.osparams:
2787       for os_name, osp in self.op.osparams.items():
2788         if os_name not in self.new_osp:
2789           self.new_osp[os_name] = {}
2790
2791         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2792                                                   use_none=True)
2793
2794         if not self.new_osp[os_name]:
2795           # we removed all parameters
2796           del self.new_osp[os_name]
2797         else:
2798           # check the parameter validity (remote check)
2799           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2800                          os_name, self.new_osp[os_name])
2801
2802     # changes to the hypervisor list
2803     if self.op.enabled_hypervisors is not None:
2804       self.hv_list = self.op.enabled_hypervisors
2805       for hv in self.hv_list:
2806         # if the hypervisor doesn't already exist in the cluster
2807         # hvparams, we initialize it to empty, and then (in both
2808         # cases) we make sure to fill the defaults, as we might not
2809         # have a complete defaults list if the hypervisor wasn't
2810         # enabled before
2811         if hv not in new_hvp:
2812           new_hvp[hv] = {}
2813         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2814         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2815     else:
2816       self.hv_list = cluster.enabled_hypervisors
2817
2818     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2819       # either the enabled list has changed, or the parameters have, validate
2820       for hv_name, hv_params in self.new_hvparams.items():
2821         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2822             (self.op.enabled_hypervisors and
2823              hv_name in self.op.enabled_hypervisors)):
2824           # either this is a new hypervisor, or its parameters have changed
2825           hv_class = hypervisor.GetHypervisor(hv_name)
2826           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2827           hv_class.CheckParameterSyntax(hv_params)
2828           _CheckHVParams(self, node_list, hv_name, hv_params)
2829
2830     if self.op.os_hvp:
2831       # no need to check any newly-enabled hypervisors, since the
2832       # defaults have already been checked in the above code-block
2833       for os_name, os_hvp in self.new_os_hvp.items():
2834         for hv_name, hv_params in os_hvp.items():
2835           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2836           # we need to fill in the new os_hvp on top of the actual hv_p
2837           cluster_defaults = self.new_hvparams.get(hv_name, {})
2838           new_osp = objects.FillDict(cluster_defaults, hv_params)
2839           hv_class = hypervisor.GetHypervisor(hv_name)
2840           hv_class.CheckParameterSyntax(new_osp)
2841           _CheckHVParams(self, node_list, hv_name, new_osp)
2842
2843     if self.op.default_iallocator:
2844       alloc_script = utils.FindFile(self.op.default_iallocator,
2845                                     constants.IALLOCATOR_SEARCH_PATH,
2846                                     os.path.isfile)
2847       if alloc_script is None:
2848         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2849                                    " specified" % self.op.default_iallocator,
2850                                    errors.ECODE_INVAL)
2851
2852   def Exec(self, feedback_fn):
2853     """Change the parameters of the cluster.
2854
2855     """
2856     if self.op.vg_name is not None:
2857       new_volume = self.op.vg_name
2858       if not new_volume:
2859         new_volume = None
2860       if new_volume != self.cfg.GetVGName():
2861         self.cfg.SetVGName(new_volume)
2862       else:
2863         feedback_fn("Cluster LVM configuration already in desired"
2864                     " state, not changing")
2865     if self.op.drbd_helper is not None:
2866       new_helper = self.op.drbd_helper
2867       if not new_helper:
2868         new_helper = None
2869       if new_helper != self.cfg.GetDRBDHelper():
2870         self.cfg.SetDRBDHelper(new_helper)
2871       else:
2872         feedback_fn("Cluster DRBD helper already in desired state,"
2873                     " not changing")
2874     if self.op.hvparams:
2875       self.cluster.hvparams = self.new_hvparams
2876     if self.op.os_hvp:
2877       self.cluster.os_hvp = self.new_os_hvp
2878     if self.op.enabled_hypervisors is not None:
2879       self.cluster.hvparams = self.new_hvparams
2880       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2881     if self.op.beparams:
2882       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2883     if self.op.nicparams:
2884       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2885     if self.op.osparams:
2886       self.cluster.osparams = self.new_osp
2887
2888     if self.op.candidate_pool_size is not None:
2889       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2890       # we need to update the pool size here, otherwise the save will fail
2891       _AdjustCandidatePool(self, [])
2892
2893     if self.op.maintain_node_health is not None:
2894       self.cluster.maintain_node_health = self.op.maintain_node_health
2895
2896     if self.op.add_uids is not None:
2897       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2898
2899     if self.op.remove_uids is not None:
2900       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2901
2902     if self.op.uid_pool is not None:
2903       self.cluster.uid_pool = self.op.uid_pool
2904
2905     if self.op.default_iallocator is not None:
2906       self.cluster.default_iallocator = self.op.default_iallocator
2907
2908     if self.op.reserved_lvs is not None:
2909       self.cluster.reserved_lvs = self.op.reserved_lvs
2910
2911     def helper_oss(aname, mods, desc):
2912       lst = getattr(self.cluster, aname)
2913       for key, val in mods:
2914         if key == constants.DDM_ADD:
2915           if val in lst:
2916             feedback_fn("OS %s already in %s, ignoring", val, desc)
2917           else:
2918             lst.append(val)
2919         elif key == constants.DDM_REMOVE:
2920           if val in lst:
2921             lst.remove(val)
2922           else:
2923             feedback_fn("OS %s not found in %s, ignoring", val, desc)
2924         else:
2925           raise errors.ProgrammerError("Invalid modification '%s'" % key)
2926
2927     if self.op.hidden_oss:
2928       helper_oss("hidden_oss", self.op.hidden_oss,
2929                  "hidden OS list")
2930
2931     if self.op.blacklisted_oss:
2932       helper_oss("blacklisted_oss", self.op.blacklisted_oss,
2933                  "blacklisted OS list")
2934
2935     self.cfg.Update(self.cluster, feedback_fn)
2936
2937
2938 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2939   """Distribute additional files which are part of the cluster configuration.
2940
2941   ConfigWriter takes care of distributing the config and ssconf files, but
2942   there are more files which should be distributed to all nodes. This function
2943   makes sure those are copied.
2944
2945   @param lu: calling logical unit
2946   @param additional_nodes: list of nodes not in the config to distribute to
2947
2948   """
2949   # 1. Gather target nodes
2950   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2951   dist_nodes = lu.cfg.GetOnlineNodeList()
2952   if additional_nodes is not None:
2953     dist_nodes.extend(additional_nodes)
2954   if myself.name in dist_nodes:
2955     dist_nodes.remove(myself.name)
2956
2957   # 2. Gather files to distribute
2958   dist_files = set([constants.ETC_HOSTS,
2959                     constants.SSH_KNOWN_HOSTS_FILE,
2960                     constants.RAPI_CERT_FILE,
2961                     constants.RAPI_USERS_FILE,
2962                     constants.CONFD_HMAC_KEY,
2963                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2964                    ])
2965
2966   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2967   for hv_name in enabled_hypervisors:
2968     hv_class = hypervisor.GetHypervisor(hv_name)
2969     dist_files.update(hv_class.GetAncillaryFiles())
2970
2971   # 3. Perform the files upload
2972   for fname in dist_files:
2973     if os.path.exists(fname):
2974       result = lu.rpc.call_upload_file(dist_nodes, fname)
2975       for to_node, to_result in result.items():
2976         msg = to_result.fail_msg
2977         if msg:
2978           msg = ("Copy of file %s to node %s failed: %s" %
2979                  (fname, to_node, msg))
2980           lu.proc.LogWarning(msg)
2981
2982
2983 class LURedistributeConfig(NoHooksLU):
2984   """Force the redistribution of cluster configuration.
2985
2986   This is a very simple LU.
2987
2988   """
2989   REQ_BGL = False
2990
2991   def ExpandNames(self):
2992     self.needed_locks = {
2993       locking.LEVEL_NODE: locking.ALL_SET,
2994     }
2995     self.share_locks[locking.LEVEL_NODE] = 1
2996
2997   def Exec(self, feedback_fn):
2998     """Redistribute the configuration.
2999
3000     """
3001     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3002     _RedistributeAncillaryFiles(self)
3003
3004
3005 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3006   """Sleep and poll for an instance's disk to sync.
3007
3008   """
3009   if not instance.disks or disks is not None and not disks:
3010     return True
3011
3012   disks = _ExpandCheckDisks(instance, disks)
3013
3014   if not oneshot:
3015     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3016
3017   node = instance.primary_node
3018
3019   for dev in disks:
3020     lu.cfg.SetDiskID(dev, node)
3021
3022   # TODO: Convert to utils.Retry
3023
3024   retries = 0
3025   degr_retries = 10 # in seconds, as we sleep 1 second each time
3026   while True:
3027     max_time = 0
3028     done = True
3029     cumul_degraded = False
3030     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3031     msg = rstats.fail_msg
3032     if msg:
3033       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3034       retries += 1
3035       if retries >= 10:
3036         raise errors.RemoteError("Can't contact node %s for mirror data,"
3037                                  " aborting." % node)
3038       time.sleep(6)
3039       continue
3040     rstats = rstats.payload
3041     retries = 0
3042     for i, mstat in enumerate(rstats):
3043       if mstat is None:
3044         lu.LogWarning("Can't compute data for node %s/%s",
3045                            node, disks[i].iv_name)
3046         continue
3047
3048       cumul_degraded = (cumul_degraded or
3049                         (mstat.is_degraded and mstat.sync_percent is None))
3050       if mstat.sync_percent is not None:
3051         done = False
3052         if mstat.estimated_time is not None:
3053           rem_time = ("%s remaining (estimated)" %
3054                       utils.FormatSeconds(mstat.estimated_time))
3055           max_time = mstat.estimated_time
3056         else:
3057           rem_time = "no time estimate"
3058         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3059                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3060
3061     # if we're done but degraded, let's do a few small retries, to
3062     # make sure we see a stable and not transient situation; therefore
3063     # we force restart of the loop
3064     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3065       logging.info("Degraded disks found, %d retries left", degr_retries)
3066       degr_retries -= 1
3067       time.sleep(1)
3068       continue
3069
3070     if done or oneshot:
3071       break
3072
3073     time.sleep(min(60, max_time))
3074
3075   if done:
3076     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3077   return not cumul_degraded
3078
3079
3080 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3081   """Check that mirrors are not degraded.
3082
3083   The ldisk parameter, if True, will change the test from the
3084   is_degraded attribute (which represents overall non-ok status for
3085   the device(s)) to the ldisk (representing the local storage status).
3086
3087   """
3088   lu.cfg.SetDiskID(dev, node)
3089
3090   result = True
3091
3092   if on_primary or dev.AssembleOnSecondary():
3093     rstats = lu.rpc.call_blockdev_find(node, dev)
3094     msg = rstats.fail_msg
3095     if msg:
3096       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3097       result = False
3098     elif not rstats.payload:
3099       lu.LogWarning("Can't find disk on node %s", node)
3100       result = False
3101     else:
3102       if ldisk:
3103         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3104       else:
3105         result = result and not rstats.payload.is_degraded
3106
3107   if dev.children:
3108     for child in dev.children:
3109       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3110
3111   return result
3112
3113
3114 class LUDiagnoseOS(NoHooksLU):
3115   """Logical unit for OS diagnose/query.
3116
3117   """
3118   _OP_PARAMS = [
3119     _POutputFields,
3120     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3121     ]
3122   REQ_BGL = False
3123   _HID = "hidden"
3124   _BLK = "blacklisted"
3125   _VLD = "valid"
3126   _FIELDS_STATIC = utils.FieldSet()
3127   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3128                                    "parameters", "api_versions", _HID, _BLK)
3129
3130   def CheckArguments(self):
3131     if self.op.names:
3132       raise errors.OpPrereqError("Selective OS query not supported",
3133                                  errors.ECODE_INVAL)
3134
3135     _CheckOutputFields(static=self._FIELDS_STATIC,
3136                        dynamic=self._FIELDS_DYNAMIC,
3137                        selected=self.op.output_fields)
3138
3139   def ExpandNames(self):
3140     # Lock all nodes, in shared mode
3141     # Temporary removal of locks, should be reverted later
3142     # TODO: reintroduce locks when they are lighter-weight
3143     self.needed_locks = {}
3144     #self.share_locks[locking.LEVEL_NODE] = 1
3145     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3146
3147   @staticmethod
3148   def _DiagnoseByOS(rlist):
3149     """Remaps a per-node return list into an a per-os per-node dictionary
3150
3151     @param rlist: a map with node names as keys and OS objects as values
3152
3153     @rtype: dict
3154     @return: a dictionary with osnames as keys and as value another
3155         map, with nodes as keys and tuples of (path, status, diagnose,
3156         variants, parameters, api_versions) as values, eg::
3157
3158           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3159                                      (/srv/..., False, "invalid api")],
3160                            "node2": [(/srv/..., True, "", [], [])]}
3161           }
3162
3163     """
3164     all_os = {}
3165     # we build here the list of nodes that didn't fail the RPC (at RPC
3166     # level), so that nodes with a non-responding node daemon don't
3167     # make all OSes invalid
3168     good_nodes = [node_name for node_name in rlist
3169                   if not rlist[node_name].fail_msg]
3170     for node_name, nr in rlist.items():
3171       if nr.fail_msg or not nr.payload:
3172         continue
3173       for (name, path, status, diagnose, variants,
3174            params, api_versions) in nr.payload:
3175         if name not in all_os:
3176           # build a list of nodes for this os containing empty lists
3177           # for each node in node_list
3178           all_os[name] = {}
3179           for nname in good_nodes:
3180             all_os[name][nname] = []
3181         # convert params from [name, help] to (name, help)
3182         params = [tuple(v) for v in params]
3183         all_os[name][node_name].append((path, status, diagnose,
3184                                         variants, params, api_versions))
3185     return all_os
3186
3187   def Exec(self, feedback_fn):
3188     """Compute the list of OSes.
3189
3190     """
3191     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3192     node_data = self.rpc.call_os_diagnose(valid_nodes)
3193     pol = self._DiagnoseByOS(node_data)
3194     output = []
3195     cluster = self.cfg.GetClusterInfo()
3196
3197     for os_name, os_data in pol.items():
3198       row = []
3199       valid = True
3200       (variants, params, api_versions) = null_state = (set(), set(), set())
3201       for idx, osl in enumerate(os_data.values()):
3202         valid = bool(valid and osl and osl[0][1])
3203         if not valid:
3204           (variants, params, api_versions) = null_state
3205           break
3206         node_variants, node_params, node_api = osl[0][3:6]
3207         if idx == 0: # first entry
3208           variants = set(node_variants)
3209           params = set(node_params)
3210           api_versions = set(node_api)
3211         else: # keep consistency
3212           variants.intersection_update(node_variants)
3213           params.intersection_update(node_params)
3214           api_versions.intersection_update(node_api)
3215
3216       is_hid = os_name in cluster.hidden_oss
3217       is_blk = os_name in cluster.blacklisted_oss
3218       if ((self._HID not in self.op.output_fields and is_hid) or
3219           (self._BLK not in self.op.output_fields and is_blk) or
3220           (self._VLD not in self.op.output_fields and not valid)):
3221         continue
3222
3223       for field in self.op.output_fields:
3224         if field == "name":
3225           val = os_name
3226         elif field == self._VLD:
3227           val = valid
3228         elif field == "node_status":
3229           # this is just a copy of the dict
3230           val = {}
3231           for node_name, nos_list in os_data.items():
3232             val[node_name] = nos_list
3233         elif field == "variants":
3234           val = list(variants)
3235         elif field == "parameters":
3236           val = list(params)
3237         elif field == "api_versions":
3238           val = list(api_versions)
3239         elif field == self._HID:
3240           val = is_hid
3241         elif field == self._BLK:
3242           val = is_blk
3243         else:
3244           raise errors.ParameterError(field)
3245         row.append(val)
3246       output.append(row)
3247
3248     return output
3249
3250
3251 class LURemoveNode(LogicalUnit):
3252   """Logical unit for removing a node.
3253
3254   """
3255   HPATH = "node-remove"
3256   HTYPE = constants.HTYPE_NODE
3257   _OP_PARAMS = [
3258     _PNodeName,
3259     ]
3260
3261   def BuildHooksEnv(self):
3262     """Build hooks env.
3263
3264     This doesn't run on the target node in the pre phase as a failed
3265     node would then be impossible to remove.
3266
3267     """
3268     env = {
3269       "OP_TARGET": self.op.node_name,
3270       "NODE_NAME": self.op.node_name,
3271       }
3272     all_nodes = self.cfg.GetNodeList()
3273     try:
3274       all_nodes.remove(self.op.node_name)
3275     except ValueError:
3276       logging.warning("Node %s which is about to be removed not found"
3277                       " in the all nodes list", self.op.node_name)
3278     return env, all_nodes, all_nodes
3279
3280   def CheckPrereq(self):
3281     """Check prerequisites.
3282
3283     This checks:
3284      - the node exists in the configuration
3285      - it does not have primary or secondary instances
3286      - it's not the master
3287
3288     Any errors are signaled by raising errors.OpPrereqError.
3289
3290     """
3291     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3292     node = self.cfg.GetNodeInfo(self.op.node_name)
3293     assert node is not None
3294
3295     instance_list = self.cfg.GetInstanceList()
3296
3297     masternode = self.cfg.GetMasterNode()
3298     if node.name == masternode:
3299       raise errors.OpPrereqError("Node is the master node,"
3300                                  " you need to failover first.",
3301                                  errors.ECODE_INVAL)
3302
3303     for instance_name in instance_list:
3304       instance = self.cfg.GetInstanceInfo(instance_name)
3305       if node.name in instance.all_nodes:
3306         raise errors.OpPrereqError("Instance %s is still running on the node,"
3307                                    " please remove first." % instance_name,
3308                                    errors.ECODE_INVAL)
3309     self.op.node_name = node.name
3310     self.node = node
3311
3312   def Exec(self, feedback_fn):
3313     """Removes the node from the cluster.
3314
3315     """
3316     node = self.node
3317     logging.info("Stopping the node daemon and removing configs from node %s",
3318                  node.name)
3319
3320     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3321
3322     # Promote nodes to master candidate as needed
3323     _AdjustCandidatePool(self, exceptions=[node.name])
3324     self.context.RemoveNode(node.name)
3325
3326     # Run post hooks on the node before it's removed
3327     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3328     try:
3329       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3330     except:
3331       # pylint: disable-msg=W0702
3332       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3333
3334     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3335     msg = result.fail_msg
3336     if msg:
3337       self.LogWarning("Errors encountered on the remote node while leaving"
3338                       " the cluster: %s", msg)
3339
3340     # Remove node from our /etc/hosts
3341     if self.cfg.GetClusterInfo().modify_etc_hosts:
3342       # FIXME: this should be done via an rpc call to node daemon
3343       utils.RemoveHostFromEtcHosts(node.name)
3344       _RedistributeAncillaryFiles(self)
3345
3346
3347 class LUQueryNodes(NoHooksLU):
3348   """Logical unit for querying nodes.
3349
3350   """
3351   # pylint: disable-msg=W0142
3352   _OP_PARAMS = [
3353     _POutputFields,
3354     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3355     ("use_locking", False, _TBool),
3356     ]
3357   REQ_BGL = False
3358
3359   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3360                     "master_candidate", "offline", "drained"]
3361
3362   _FIELDS_DYNAMIC = utils.FieldSet(
3363     "dtotal", "dfree",
3364     "mtotal", "mnode", "mfree",
3365     "bootid",
3366     "ctotal", "cnodes", "csockets",
3367     )
3368
3369   _FIELDS_STATIC = utils.FieldSet(*[
3370     "pinst_cnt", "sinst_cnt",
3371     "pinst_list", "sinst_list",
3372     "pip", "sip", "tags",
3373     "master",
3374     "role"] + _SIMPLE_FIELDS
3375     )
3376
3377   def CheckArguments(self):
3378     _CheckOutputFields(static=self._FIELDS_STATIC,
3379                        dynamic=self._FIELDS_DYNAMIC,
3380                        selected=self.op.output_fields)
3381
3382   def ExpandNames(self):
3383     self.needed_locks = {}
3384     self.share_locks[locking.LEVEL_NODE] = 1
3385
3386     if self.op.names:
3387       self.wanted = _GetWantedNodes(self, self.op.names)
3388     else:
3389       self.wanted = locking.ALL_SET
3390
3391     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3392     self.do_locking = self.do_node_query and self.op.use_locking
3393     if self.do_locking:
3394       # if we don't request only static fields, we need to lock the nodes
3395       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3396
3397   def Exec(self, feedback_fn):
3398     """Computes the list of nodes and their attributes.
3399
3400     """
3401     all_info = self.cfg.GetAllNodesInfo()
3402     if self.do_locking:
3403       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3404     elif self.wanted != locking.ALL_SET:
3405       nodenames = self.wanted
3406       missing = set(nodenames).difference(all_info.keys())
3407       if missing:
3408         raise errors.OpExecError(
3409           "Some nodes were removed before retrieving their data: %s" % missing)
3410     else:
3411       nodenames = all_info.keys()
3412
3413     nodenames = utils.NiceSort(nodenames)
3414     nodelist = [all_info[name] for name in nodenames]
3415
3416     # begin data gathering
3417
3418     if self.do_node_query:
3419       live_data = {}
3420       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3421                                           self.cfg.GetHypervisorType())
3422       for name in nodenames:
3423         nodeinfo = node_data[name]
3424         if not nodeinfo.fail_msg and nodeinfo.payload:
3425           nodeinfo = nodeinfo.payload
3426           fn = utils.TryConvert
3427           live_data[name] = {
3428             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3429             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3430             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3431             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3432             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3433             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3434             "bootid": nodeinfo.get('bootid', None),
3435             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3436             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3437             }
3438         else:
3439           live_data[name] = {}
3440     else:
3441       live_data = dict.fromkeys(nodenames, {})
3442
3443     node_to_primary = dict([(name, set()) for name in nodenames])
3444     node_to_secondary = dict([(name, set()) for name in nodenames])
3445
3446     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3447                              "sinst_cnt", "sinst_list"))
3448     if inst_fields & frozenset(self.op.output_fields):
3449       inst_data = self.cfg.GetAllInstancesInfo()
3450
3451       for inst in inst_data.values():
3452         if inst.primary_node in node_to_primary:
3453           node_to_primary[inst.primary_node].add(inst.name)
3454         for secnode in inst.secondary_nodes:
3455           if secnode in node_to_secondary:
3456             node_to_secondary[secnode].add(inst.name)
3457
3458     master_node = self.cfg.GetMasterNode()
3459
3460     # end data gathering
3461
3462     output = []
3463     for node in nodelist:
3464       node_output = []
3465       for field in self.op.output_fields:
3466         if field in self._SIMPLE_FIELDS:
3467           val = getattr(node, field)
3468         elif field == "pinst_list":
3469           val = list(node_to_primary[node.name])
3470         elif field == "sinst_list":
3471           val = list(node_to_secondary[node.name])
3472         elif field == "pinst_cnt":
3473           val = len(node_to_primary[node.name])
3474         elif field == "sinst_cnt":
3475           val = len(node_to_secondary[node.name])
3476         elif field == "pip":
3477           val = node.primary_ip
3478         elif field == "sip":
3479           val = node.secondary_ip
3480         elif field == "tags":
3481           val = list(node.GetTags())
3482         elif field == "master":
3483           val = node.name == master_node
3484         elif self._FIELDS_DYNAMIC.Matches(field):
3485           val = live_data[node.name].get(field, None)
3486         elif field == "role":
3487           if node.name == master_node:
3488             val = "M"
3489           elif node.master_candidate:
3490             val = "C"
3491           elif node.drained:
3492             val = "D"
3493           elif node.offline:
3494             val = "O"
3495           else:
3496             val = "R"
3497         else:
3498           raise errors.ParameterError(field)
3499         node_output.append(val)
3500       output.append(node_output)
3501
3502     return output
3503
3504
3505 class LUQueryNodeVolumes(NoHooksLU):
3506   """Logical unit for getting volumes on node(s).
3507
3508   """
3509   _OP_PARAMS = [
3510     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3511     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3512     ]
3513   REQ_BGL = False
3514   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3515   _FIELDS_STATIC = utils.FieldSet("node")
3516
3517   def CheckArguments(self):
3518     _CheckOutputFields(static=self._FIELDS_STATIC,
3519                        dynamic=self._FIELDS_DYNAMIC,
3520                        selected=self.op.output_fields)
3521
3522   def ExpandNames(self):
3523     self.needed_locks = {}
3524     self.share_locks[locking.LEVEL_NODE] = 1
3525     if not self.op.nodes:
3526       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3527     else:
3528       self.needed_locks[locking.LEVEL_NODE] = \
3529         _GetWantedNodes(self, self.op.nodes)
3530
3531   def Exec(self, feedback_fn):
3532     """Computes the list of nodes and their attributes.
3533
3534     """
3535     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3536     volumes = self.rpc.call_node_volumes(nodenames)
3537
3538     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3539              in self.cfg.GetInstanceList()]
3540
3541     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3542
3543     output = []
3544     for node in nodenames:
3545       nresult = volumes[node]
3546       if nresult.offline:
3547         continue
3548       msg = nresult.fail_msg
3549       if msg:
3550         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3551         continue
3552
3553       node_vols = nresult.payload[:]
3554       node_vols.sort(key=lambda vol: vol['dev'])
3555
3556       for vol in node_vols:
3557         node_output = []
3558         for field in self.op.output_fields:
3559           if field == "node":
3560             val = node
3561           elif field == "phys":
3562             val = vol['dev']
3563           elif field == "vg":
3564             val = vol['vg']
3565           elif field == "name":
3566             val = vol['name']
3567           elif field == "size":
3568             val = int(float(vol['size']))
3569           elif field == "instance":
3570             for inst in ilist:
3571               if node not in lv_by_node[inst]:
3572                 continue
3573               if vol['name'] in lv_by_node[inst][node]:
3574                 val = inst.name
3575                 break
3576             else:
3577               val = '-'
3578           else:
3579             raise errors.ParameterError(field)
3580           node_output.append(str(val))
3581
3582         output.append(node_output)
3583
3584     return output
3585
3586
3587 class LUQueryNodeStorage(NoHooksLU):
3588   """Logical unit for getting information on storage units on node(s).
3589
3590   """
3591   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3592   _OP_PARAMS = [
3593     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3594     ("storage_type", _NoDefault, _CheckStorageType),
3595     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3596     ("name", None, _TMaybeString),
3597     ]
3598   REQ_BGL = False
3599
3600   def CheckArguments(self):
3601     _CheckOutputFields(static=self._FIELDS_STATIC,
3602                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3603                        selected=self.op.output_fields)
3604
3605   def ExpandNames(self):
3606     self.needed_locks = {}
3607     self.share_locks[locking.LEVEL_NODE] = 1
3608
3609     if self.op.nodes:
3610       self.needed_locks[locking.LEVEL_NODE] = \
3611         _GetWantedNodes(self, self.op.nodes)
3612     else:
3613       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3614
3615   def Exec(self, feedback_fn):
3616     """Computes the list of nodes and their attributes.
3617
3618     """
3619     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3620
3621     # Always get name to sort by
3622     if constants.SF_NAME in self.op.output_fields:
3623       fields = self.op.output_fields[:]
3624     else:
3625       fields = [constants.SF_NAME] + self.op.output_fields
3626
3627     # Never ask for node or type as it's only known to the LU
3628     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3629       while extra in fields:
3630         fields.remove(extra)
3631
3632     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3633     name_idx = field_idx[constants.SF_NAME]
3634
3635     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3636     data = self.rpc.call_storage_list(self.nodes,
3637                                       self.op.storage_type, st_args,
3638                                       self.op.name, fields)
3639
3640     result = []
3641
3642     for node in utils.NiceSort(self.nodes):
3643       nresult = data[node]
3644       if nresult.offline:
3645         continue
3646
3647       msg = nresult.fail_msg
3648       if msg:
3649         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3650         continue
3651
3652       rows = dict([(row[name_idx], row) for row in nresult.payload])
3653
3654       for name in utils.NiceSort(rows.keys()):
3655         row = rows[name]
3656
3657         out = []
3658
3659         for field in self.op.output_fields:
3660           if field == constants.SF_NODE:
3661             val = node
3662           elif field == constants.SF_TYPE:
3663             val = self.op.storage_type
3664           elif field in field_idx:
3665             val = row[field_idx[field]]
3666           else:
3667             raise errors.ParameterError(field)
3668
3669           out.append(val)
3670
3671         result.append(out)
3672
3673     return result
3674
3675
3676 class LUModifyNodeStorage(NoHooksLU):
3677   """Logical unit for modifying a storage volume on a node.
3678
3679   """
3680   _OP_PARAMS = [
3681     _PNodeName,
3682     ("storage_type", _NoDefault, _CheckStorageType),
3683     ("name", _NoDefault, _TNonEmptyString),
3684     ("changes", _NoDefault, _TDict),
3685     ]
3686   REQ_BGL = False
3687
3688   def CheckArguments(self):
3689     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3690
3691     storage_type = self.op.storage_type
3692
3693     try:
3694       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3695     except KeyError:
3696       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3697                                  " modified" % storage_type,
3698                                  errors.ECODE_INVAL)
3699
3700     diff = set(self.op.changes.keys()) - modifiable
3701     if diff:
3702       raise errors.OpPrereqError("The following fields can not be modified for"
3703                                  " storage units of type '%s': %r" %
3704                                  (storage_type, list(diff)),
3705                                  errors.ECODE_INVAL)
3706
3707   def ExpandNames(self):
3708     self.needed_locks = {
3709       locking.LEVEL_NODE: self.op.node_name,
3710       }
3711
3712   def Exec(self, feedback_fn):
3713     """Computes the list of nodes and their attributes.
3714
3715     """
3716     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3717     result = self.rpc.call_storage_modify(self.op.node_name,
3718                                           self.op.storage_type, st_args,
3719                                           self.op.name, self.op.changes)
3720     result.Raise("Failed to modify storage unit '%s' on %s" %
3721                  (self.op.name, self.op.node_name))
3722
3723
3724 class LUAddNode(LogicalUnit):
3725   """Logical unit for adding node to the cluster.
3726
3727   """
3728   HPATH = "node-add"
3729   HTYPE = constants.HTYPE_NODE
3730   _OP_PARAMS = [
3731     _PNodeName,
3732     ("primary_ip", None, _NoType),
3733     ("secondary_ip", None, _TMaybeString),
3734     ("readd", False, _TBool),
3735     ]
3736
3737   def CheckArguments(self):
3738     # validate/normalize the node name
3739     self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3740
3741   def BuildHooksEnv(self):
3742     """Build hooks env.
3743
3744     This will run on all nodes before, and on all nodes + the new node after.
3745
3746     """
3747     env = {
3748       "OP_TARGET": self.op.node_name,
3749       "NODE_NAME": self.op.node_name,
3750       "NODE_PIP": self.op.primary_ip,
3751       "NODE_SIP": self.op.secondary_ip,
3752       }
3753     nodes_0 = self.cfg.GetNodeList()
3754     nodes_1 = nodes_0 + [self.op.node_name, ]
3755     return env, nodes_0, nodes_1
3756
3757   def CheckPrereq(self):
3758     """Check prerequisites.
3759
3760     This checks:
3761      - the new node is not already in the config
3762      - it is resolvable
3763      - its parameters (single/dual homed) matches the cluster
3764
3765     Any errors are signaled by raising errors.OpPrereqError.
3766
3767     """
3768     node_name = self.op.node_name
3769     cfg = self.cfg
3770
3771     dns_data = netutils.GetHostInfo(node_name)
3772
3773     node = dns_data.name
3774     primary_ip = self.op.primary_ip = dns_data.ip
3775     if self.op.secondary_ip is None:
3776       self.op.secondary_ip = primary_ip
3777     if not netutils.IsValidIP4(self.op.secondary_ip):
3778       raise errors.OpPrereqError("Invalid secondary IP given",
3779                                  errors.ECODE_INVAL)
3780     secondary_ip = self.op.secondary_ip
3781
3782     node_list = cfg.GetNodeList()
3783     if not self.op.readd and node in node_list:
3784       raise errors.OpPrereqError("Node %s is already in the configuration" %
3785                                  node, errors.ECODE_EXISTS)
3786     elif self.op.readd and node not in node_list:
3787       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3788                                  errors.ECODE_NOENT)
3789
3790     self.changed_primary_ip = False
3791
3792     for existing_node_name in node_list:
3793       existing_node = cfg.GetNodeInfo(existing_node_name)
3794
3795       if self.op.readd and node == existing_node_name:
3796         if existing_node.secondary_ip != secondary_ip:
3797           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3798                                      " address configuration as before",
3799                                      errors.ECODE_INVAL)
3800         if existing_node.primary_ip != primary_ip:
3801           self.changed_primary_ip = True
3802
3803         continue
3804
3805       if (existing_node.primary_ip == primary_ip or
3806           existing_node.secondary_ip == primary_ip or
3807           existing_node.primary_ip == secondary_ip or
3808           existing_node.secondary_ip == secondary_ip):
3809         raise errors.OpPrereqError("New node ip address(es) conflict with"
3810                                    " existing node %s" % existing_node.name,
3811                                    errors.ECODE_NOTUNIQUE)
3812
3813     # check that the type of the node (single versus dual homed) is the
3814     # same as for the master
3815     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3816     master_singlehomed = myself.secondary_ip == myself.primary_ip
3817     newbie_singlehomed = secondary_ip == primary_ip
3818     if master_singlehomed != newbie_singlehomed:
3819       if master_singlehomed:
3820         raise errors.OpPrereqError("The master has no private ip but the"
3821                                    " new node has one",
3822                                    errors.ECODE_INVAL)
3823       else:
3824         raise errors.OpPrereqError("The master has a private ip but the"
3825                                    " new node doesn't have one",
3826                                    errors.ECODE_INVAL)
3827
3828     # checks reachability
3829     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3830       raise errors.OpPrereqError("Node not reachable by ping",
3831                                  errors.ECODE_ENVIRON)
3832
3833     if not newbie_singlehomed:
3834       # check reachability from my secondary ip to newbie's secondary ip
3835       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3836                            source=myself.secondary_ip):
3837         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3838                                    " based ping to noded port",
3839                                    errors.ECODE_ENVIRON)
3840
3841     if self.op.readd:
3842       exceptions = [node]
3843     else:
3844       exceptions = []
3845
3846     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3847
3848     if self.op.readd:
3849       self.new_node = self.cfg.GetNodeInfo(node)
3850       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3851     else:
3852       self.new_node = objects.Node(name=node,
3853                                    primary_ip=primary_ip,
3854                                    secondary_ip=secondary_ip,
3855                                    master_candidate=self.master_candidate,
3856                                    offline=False, drained=False)
3857
3858   def Exec(self, feedback_fn):
3859     """Adds the new node to the cluster.
3860
3861     """
3862     new_node = self.new_node
3863     node = new_node.name
3864
3865     # for re-adds, reset the offline/drained/master-candidate flags;
3866     # we need to reset here, otherwise offline would prevent RPC calls
3867     # later in the procedure; this also means that if the re-add
3868     # fails, we are left with a non-offlined, broken node
3869     if self.op.readd:
3870       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3871       self.LogInfo("Readding a node, the offline/drained flags were reset")
3872       # if we demote the node, we do cleanup later in the procedure
3873       new_node.master_candidate = self.master_candidate
3874       if self.changed_primary_ip:
3875         new_node.primary_ip = self.op.primary_ip
3876
3877     # notify the user about any possible mc promotion
3878     if new_node.master_candidate:
3879       self.LogInfo("Node will be a master candidate")
3880
3881     # check connectivity
3882     result = self.rpc.call_version([node])[node]
3883     result.Raise("Can't get version information from node %s" % node)
3884     if constants.PROTOCOL_VERSION == result.payload:
3885       logging.info("Communication to node %s fine, sw version %s match",
3886                    node, result.payload)
3887     else:
3888       raise errors.OpExecError("Version mismatch master version %s,"
3889                                " node version %s" %
3890                                (constants.PROTOCOL_VERSION, result.payload))
3891
3892     # setup ssh on node
3893     if self.cfg.GetClusterInfo().modify_ssh_setup:
3894       logging.info("Copy ssh key to node %s", node)
3895       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3896       keyarray = []
3897       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3898                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3899                   priv_key, pub_key]
3900
3901       for i in keyfiles:
3902         keyarray.append(utils.ReadFile(i))
3903
3904       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3905                                       keyarray[2], keyarray[3], keyarray[4],
3906                                       keyarray[5])
3907       result.Raise("Cannot transfer ssh keys to the new node")
3908
3909     # Add node to our /etc/hosts, and add key to known_hosts
3910     if self.cfg.GetClusterInfo().modify_etc_hosts:
3911       # FIXME: this should be done via an rpc call to node daemon
3912       utils.AddHostToEtcHosts(new_node.name)
3913
3914     if new_node.secondary_ip != new_node.primary_ip:
3915       result = self.rpc.call_node_has_ip_address(new_node.name,
3916                                                  new_node.secondary_ip)
3917       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3918                    prereq=True, ecode=errors.ECODE_ENVIRON)
3919       if not result.payload:
3920         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3921                                  " you gave (%s). Please fix and re-run this"
3922                                  " command." % new_node.secondary_ip)
3923
3924     node_verify_list = [self.cfg.GetMasterNode()]
3925     node_verify_param = {
3926       constants.NV_NODELIST: [node],
3927       # TODO: do a node-net-test as well?
3928     }
3929
3930     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3931                                        self.cfg.GetClusterName())
3932     for verifier in node_verify_list:
3933       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3934       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3935       if nl_payload:
3936         for failed in nl_payload:
3937           feedback_fn("ssh/hostname verification failed"
3938                       " (checking from %s): %s" %
3939                       (verifier, nl_payload[failed]))
3940         raise errors.OpExecError("ssh/hostname verification failed.")
3941
3942     if self.op.readd:
3943       _RedistributeAncillaryFiles(self)
3944       self.context.ReaddNode(new_node)
3945       # make sure we redistribute the config
3946       self.cfg.Update(new_node, feedback_fn)
3947       # and make sure the new node will not have old files around
3948       if not new_node.master_candidate:
3949         result = self.rpc.call_node_demote_from_mc(new_node.name)
3950         msg = result.fail_msg
3951         if msg:
3952           self.LogWarning("Node failed to demote itself from master"
3953                           " candidate status: %s" % msg)
3954     else:
3955       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3956       self.context.AddNode(new_node, self.proc.GetECId())
3957
3958
3959 class LUSetNodeParams(LogicalUnit):
3960   """Modifies the parameters of a node.
3961
3962   """
3963   HPATH = "node-modify"
3964   HTYPE = constants.HTYPE_NODE
3965   _OP_PARAMS = [
3966     _PNodeName,
3967     ("master_candidate", None, _TMaybeBool),
3968     ("offline", None, _TMaybeBool),
3969     ("drained", None, _TMaybeBool),
3970     ("auto_promote", False, _TBool),
3971     _PForce,
3972     ]
3973   REQ_BGL = False
3974
3975   def CheckArguments(self):
3976     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3977     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3978     if all_mods.count(None) == 3:
3979       raise errors.OpPrereqError("Please pass at least one modification",
3980                                  errors.ECODE_INVAL)
3981     if all_mods.count(True) > 1:
3982       raise errors.OpPrereqError("Can't set the node into more than one"
3983                                  " state at the same time",
3984                                  errors.ECODE_INVAL)
3985
3986     # Boolean value that tells us whether we're offlining or draining the node
3987     self.offline_or_drain = (self.op.offline == True or
3988                              self.op.drained == True)
3989     self.deoffline_or_drain = (self.op.offline == False or
3990                                self.op.drained == False)
3991     self.might_demote = (self.op.master_candidate == False or
3992                          self.offline_or_drain)
3993
3994     self.lock_all = self.op.auto_promote and self.might_demote
3995
3996
3997   def ExpandNames(self):
3998     if self.lock_all:
3999       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4000     else:
4001       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4002
4003   def BuildHooksEnv(self):
4004     """Build hooks env.
4005
4006     This runs on the master node.
4007
4008     """
4009     env = {
4010       "OP_TARGET": self.op.node_name,
4011       "MASTER_CANDIDATE": str(self.op.master_candidate),
4012       "OFFLINE": str(self.op.offline),
4013       "DRAINED": str(self.op.drained),
4014       }
4015     nl = [self.cfg.GetMasterNode(),
4016           self.op.node_name]
4017     return env, nl, nl
4018
4019   def CheckPrereq(self):
4020     """Check prerequisites.
4021
4022     This only checks the instance list against the existing names.
4023
4024     """
4025     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4026
4027     if (self.op.master_candidate is not None or
4028         self.op.drained is not None or
4029         self.op.offline is not None):
4030       # we can't change the master's node flags
4031       if self.op.node_name == self.cfg.GetMasterNode():
4032         raise errors.OpPrereqError("The master role can be changed"
4033                                    " only via master-failover",
4034                                    errors.ECODE_INVAL)
4035
4036
4037     if node.master_candidate and self.might_demote and not self.lock_all:
4038       assert not self.op.auto_promote, "auto-promote set but lock_all not"
4039       # check if after removing the current node, we're missing master
4040       # candidates
4041       (mc_remaining, mc_should, _) = \
4042           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4043       if mc_remaining < mc_should:
4044         raise errors.OpPrereqError("Not enough master candidates, please"
4045                                    " pass auto_promote to allow promotion",
4046                                    errors.ECODE_INVAL)
4047
4048     if (self.op.master_candidate == True and
4049         ((node.offline and not self.op.offline == False) or
4050          (node.drained and not self.op.drained == False))):
4051       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4052                                  " to master_candidate" % node.name,
4053                                  errors.ECODE_INVAL)
4054
4055     # If we're being deofflined/drained, we'll MC ourself if needed
4056     if (self.deoffline_or_drain and not self.offline_or_drain and not
4057         self.op.master_candidate == True and not node.master_candidate):
4058       self.op.master_candidate = _DecideSelfPromotion(self)
4059       if self.op.master_candidate:
4060         self.LogInfo("Autopromoting node to master candidate")
4061
4062     return
4063
4064   def Exec(self, feedback_fn):
4065     """Modifies a node.
4066
4067     """
4068     node = self.node
4069
4070     result = []
4071     changed_mc = False
4072
4073     if self.op.offline is not None:
4074       node.offline = self.op.offline
4075       result.append(("offline", str(self.op.offline)))
4076       if self.op.offline == True:
4077         if node.master_candidate:
4078           node.master_candidate = False
4079           changed_mc = True
4080           result.append(("master_candidate", "auto-demotion due to offline"))
4081         if node.drained:
4082           node.drained = False
4083           result.append(("drained", "clear drained status due to offline"))
4084
4085     if self.op.master_candidate is not None:
4086       node.master_candidate = self.op.master_candidate
4087       changed_mc = True
4088       result.append(("master_candidate", str(self.op.master_candidate)))
4089       if self.op.master_candidate == False:
4090         rrc = self.rpc.call_node_demote_from_mc(node.name)
4091         msg = rrc.fail_msg
4092         if msg:
4093           self.LogWarning("Node failed to demote itself: %s" % msg)
4094
4095     if self.op.drained is not None:
4096       node.drained = self.op.drained
4097       result.append(("drained", str(self.op.drained)))
4098       if self.op.drained == True:
4099         if node.master_candidate:
4100           node.master_candidate = False
4101           changed_mc = True
4102           result.append(("master_candidate", "auto-demotion due to drain"))
4103           rrc = self.rpc.call_node_demote_from_mc(node.name)
4104           msg = rrc.fail_msg
4105           if msg:
4106             self.LogWarning("Node failed to demote itself: %s" % msg)
4107         if node.offline:
4108           node.offline = False
4109           result.append(("offline", "clear offline status due to drain"))
4110
4111     # we locked all nodes, we adjust the CP before updating this node
4112     if self.lock_all:
4113       _AdjustCandidatePool(self, [node.name])
4114
4115     # this will trigger configuration file update, if needed
4116     self.cfg.Update(node, feedback_fn)
4117
4118     # this will trigger job queue propagation or cleanup
4119     if changed_mc:
4120       self.context.ReaddNode(node)
4121
4122     return result
4123
4124
4125 class LUPowercycleNode(NoHooksLU):
4126   """Powercycles a node.
4127
4128   """
4129   _OP_PARAMS = [
4130     _PNodeName,
4131     _PForce,
4132     ]
4133   REQ_BGL = False
4134
4135   def CheckArguments(self):
4136     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4137     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4138       raise errors.OpPrereqError("The node is the master and the force"
4139                                  " parameter was not set",
4140                                  errors.ECODE_INVAL)
4141
4142   def ExpandNames(self):
4143     """Locking for PowercycleNode.
4144
4145     This is a last-resort option and shouldn't block on other
4146     jobs. Therefore, we grab no locks.
4147
4148     """
4149     self.needed_locks = {}
4150
4151   def Exec(self, feedback_fn):
4152     """Reboots a node.
4153
4154     """
4155     result = self.rpc.call_node_powercycle(self.op.node_name,
4156                                            self.cfg.GetHypervisorType())
4157     result.Raise("Failed to schedule the reboot")
4158     return result.payload
4159
4160
4161 class LUQueryClusterInfo(NoHooksLU):
4162   """Query cluster configuration.
4163
4164   """
4165   REQ_BGL = False
4166
4167   def ExpandNames(self):
4168     self.needed_locks = {}
4169
4170   def Exec(self, feedback_fn):
4171     """Return cluster config.
4172
4173     """
4174     cluster = self.cfg.GetClusterInfo()
4175     os_hvp = {}
4176
4177     # Filter just for enabled hypervisors
4178     for os_name, hv_dict in cluster.os_hvp.items():
4179       os_hvp[os_name] = {}
4180       for hv_name, hv_params in hv_dict.items():
4181         if hv_name in cluster.enabled_hypervisors:
4182           os_hvp[os_name][hv_name] = hv_params
4183
4184     result = {
4185       "software_version": constants.RELEASE_VERSION,
4186       "protocol_version": constants.PROTOCOL_VERSION,
4187       "config_version": constants.CONFIG_VERSION,
4188       "os_api_version": max(constants.OS_API_VERSIONS),
4189       "export_version": constants.EXPORT_VERSION,
4190       "architecture": (platform.architecture()[0], platform.machine()),
4191       "name": cluster.cluster_name,
4192       "master": cluster.master_node,
4193       "default_hypervisor": cluster.enabled_hypervisors[0],
4194       "enabled_hypervisors": cluster.enabled_hypervisors,
4195       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4196                         for hypervisor_name in cluster.enabled_hypervisors]),
4197       "os_hvp": os_hvp,
4198       "beparams": cluster.beparams,
4199       "osparams": cluster.osparams,
4200       "nicparams": cluster.nicparams,
4201       "candidate_pool_size": cluster.candidate_pool_size,
4202       "master_netdev": cluster.master_netdev,
4203       "volume_group_name": cluster.volume_group_name,
4204       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4205       "file_storage_dir": cluster.file_storage_dir,
4206       "maintain_node_health": cluster.maintain_node_health,
4207       "ctime": cluster.ctime,
4208       "mtime": cluster.mtime,
4209       "uuid": cluster.uuid,
4210       "tags": list(cluster.GetTags()),
4211       "uid_pool": cluster.uid_pool,
4212       "default_iallocator": cluster.default_iallocator,
4213       "reserved_lvs": cluster.reserved_lvs,
4214       }
4215
4216     return result
4217
4218
4219 class LUQueryConfigValues(NoHooksLU):
4220   """Return configuration values.
4221
4222   """
4223   _OP_PARAMS = [_POutputFields]
4224   REQ_BGL = False
4225   _FIELDS_DYNAMIC = utils.FieldSet()
4226   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4227                                   "watcher_pause")
4228
4229   def CheckArguments(self):
4230     _CheckOutputFields(static=self._FIELDS_STATIC,
4231                        dynamic=self._FIELDS_DYNAMIC,
4232                        selected=self.op.output_fields)
4233
4234   def ExpandNames(self):
4235     self.needed_locks = {}
4236
4237   def Exec(self, feedback_fn):
4238     """Dump a representation of the cluster config to the standard output.
4239
4240     """
4241     values = []
4242     for field in self.op.output_fields:
4243       if field == "cluster_name":
4244         entry = self.cfg.GetClusterName()
4245       elif field == "master_node":
4246         entry = self.cfg.GetMasterNode()
4247       elif field == "drain_flag":
4248         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4249       elif field == "watcher_pause":
4250         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4251       else:
4252         raise errors.ParameterError(field)
4253       values.append(entry)
4254     return values
4255
4256
4257 class LUActivateInstanceDisks(NoHooksLU):
4258   """Bring up an instance's disks.
4259
4260   """
4261   _OP_PARAMS = [
4262     _PInstanceName,
4263     ("ignore_size", False, _TBool),
4264     ]
4265   REQ_BGL = False
4266
4267   def ExpandNames(self):
4268     self._ExpandAndLockInstance()
4269     self.needed_locks[locking.LEVEL_NODE] = []
4270     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4271
4272   def DeclareLocks(self, level):
4273     if level == locking.LEVEL_NODE:
4274       self._LockInstancesNodes()
4275
4276   def CheckPrereq(self):
4277     """Check prerequisites.
4278
4279     This checks that the instance is in the cluster.
4280
4281     """
4282     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4283     assert self.instance is not None, \
4284       "Cannot retrieve locked instance %s" % self.op.instance_name
4285     _CheckNodeOnline(self, self.instance.primary_node)
4286
4287   def Exec(self, feedback_fn):
4288     """Activate the disks.
4289
4290     """
4291     disks_ok, disks_info = \
4292               _AssembleInstanceDisks(self, self.instance,
4293                                      ignore_size=self.op.ignore_size)
4294     if not disks_ok:
4295       raise errors.OpExecError("Cannot activate block devices")
4296
4297     return disks_info
4298
4299
4300 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4301                            ignore_size=False):
4302   """Prepare the block devices for an instance.
4303
4304   This sets up the block devices on all nodes.
4305
4306   @type lu: L{LogicalUnit}
4307   @param lu: the logical unit on whose behalf we execute
4308   @type instance: L{objects.Instance}
4309   @param instance: the instance for whose disks we assemble
4310   @type disks: list of L{objects.Disk} or None
4311   @param disks: which disks to assemble (or all, if None)
4312   @type ignore_secondaries: boolean
4313   @param ignore_secondaries: if true, errors on secondary nodes
4314       won't result in an error return from the function
4315   @type ignore_size: boolean
4316   @param ignore_size: if true, the current known size of the disk
4317       will not be used during the disk activation, useful for cases
4318       when the size is wrong
4319   @return: False if the operation failed, otherwise a list of
4320       (host, instance_visible_name, node_visible_name)
4321       with the mapping from node devices to instance devices
4322
4323   """
4324   device_info = []
4325   disks_ok = True
4326   iname = instance.name
4327   disks = _ExpandCheckDisks(instance, disks)
4328
4329   # With the two passes mechanism we try to reduce the window of
4330   # opportunity for the race condition of switching DRBD to primary
4331   # before handshaking occured, but we do not eliminate it
4332
4333   # The proper fix would be to wait (with some limits) until the
4334   # connection has been made and drbd transitions from WFConnection
4335   # into any other network-connected state (Connected, SyncTarget,
4336   # SyncSource, etc.)
4337
4338   # 1st pass, assemble on all nodes in secondary mode
4339   for inst_disk in disks:
4340     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4341       if ignore_size:
4342         node_disk = node_disk.Copy()
4343         node_disk.UnsetSize()
4344       lu.cfg.SetDiskID(node_disk, node)
4345       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4346       msg = result.fail_msg
4347       if msg:
4348         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4349                            " (is_primary=False, pass=1): %s",
4350                            inst_disk.iv_name, node, msg)
4351         if not ignore_secondaries:
4352           disks_ok = False
4353
4354   # FIXME: race condition on drbd migration to primary
4355
4356   # 2nd pass, do only the primary node
4357   for inst_disk in disks:
4358     dev_path = None
4359
4360     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4361       if node != instance.primary_node:
4362         continue
4363       if ignore_size:
4364         node_disk = node_disk.Copy()
4365         node_disk.UnsetSize()
4366       lu.cfg.SetDiskID(node_disk, node)
4367       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4368       msg = result.fail_msg
4369       if msg:
4370         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4371                            " (is_primary=True, pass=2): %s",
4372                            inst_disk.iv_name, node, msg)
4373         disks_ok = False
4374       else:
4375         dev_path = result.payload
4376
4377     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4378
4379   # leave the disks configured for the primary node
4380   # this is a workaround that would be fixed better by
4381   # improving the logical/physical id handling
4382   for disk in disks:
4383     lu.cfg.SetDiskID(disk, instance.primary_node)
4384
4385   return disks_ok, device_info
4386
4387
4388 def _StartInstanceDisks(lu, instance, force):
4389   """Start the disks of an instance.
4390
4391   """
4392   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4393                                            ignore_secondaries=force)
4394   if not disks_ok:
4395     _ShutdownInstanceDisks(lu, instance)
4396     if force is not None and not force:
4397       lu.proc.LogWarning("", hint="If the message above refers to a"
4398                          " secondary node,"
4399                          " you can retry the operation using '--force'.")
4400     raise errors.OpExecError("Disk consistency error")
4401
4402
4403 class LUDeactivateInstanceDisks(NoHooksLU):
4404   """Shutdown an instance's disks.
4405
4406   """
4407   _OP_PARAMS = [
4408     _PInstanceName,
4409     ]
4410   REQ_BGL = False
4411
4412   def ExpandNames(self):
4413     self._ExpandAndLockInstance()
4414     self.needed_locks[locking.LEVEL_NODE] = []
4415     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4416
4417   def DeclareLocks(self, level):
4418     if level == locking.LEVEL_NODE:
4419       self._LockInstancesNodes()
4420
4421   def CheckPrereq(self):
4422     """Check prerequisites.
4423
4424     This checks that the instance is in the cluster.
4425
4426     """
4427     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4428     assert self.instance is not None, \
4429       "Cannot retrieve locked instance %s" % self.op.instance_name
4430
4431   def Exec(self, feedback_fn):
4432     """Deactivate the disks
4433
4434     """
4435     instance = self.instance
4436     _SafeShutdownInstanceDisks(self, instance)
4437
4438
4439 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4440   """Shutdown block devices of an instance.
4441
4442   This function checks if an instance is running, before calling
4443   _ShutdownInstanceDisks.
4444
4445   """
4446   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4447   _ShutdownInstanceDisks(lu, instance, disks=disks)
4448
4449
4450 def _ExpandCheckDisks(instance, disks):
4451   """Return the instance disks selected by the disks list
4452
4453   @type disks: list of L{objects.Disk} or None
4454   @param disks: selected disks
4455   @rtype: list of L{objects.Disk}
4456   @return: selected instance disks to act on
4457
4458   """
4459   if disks is None:
4460     return instance.disks
4461   else:
4462     if not set(disks).issubset(instance.disks):
4463       raise errors.ProgrammerError("Can only act on disks belonging to the"
4464                                    " target instance")
4465     return disks
4466
4467
4468 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4469   """Shutdown block devices of an instance.
4470
4471   This does the shutdown on all nodes of the instance.
4472
4473   If the ignore_primary is false, errors on the primary node are
4474   ignored.
4475
4476   """
4477   all_result = True
4478   disks = _ExpandCheckDisks(instance, disks)
4479
4480   for disk in disks:
4481     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4482       lu.cfg.SetDiskID(top_disk, node)
4483       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4484       msg = result.fail_msg
4485       if msg:
4486         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4487                       disk.iv_name, node, msg)
4488         if not ignore_primary or node != instance.primary_node:
4489           all_result = False
4490   return all_result
4491
4492
4493 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4494   """Checks if a node has enough free memory.
4495
4496   This function check if a given node has the needed amount of free
4497   memory. In case the node has less memory or we cannot get the
4498   information from the node, this function raise an OpPrereqError
4499   exception.
4500
4501   @type lu: C{LogicalUnit}
4502   @param lu: a logical unit from which we get configuration data
4503   @type node: C{str}
4504   @param node: the node to check
4505   @type reason: C{str}
4506   @param reason: string to use in the error message
4507   @type requested: C{int}
4508   @param requested: the amount of memory in MiB to check for
4509   @type hypervisor_name: C{str}
4510   @param hypervisor_name: the hypervisor to ask for memory stats
4511   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4512       we cannot check the node
4513
4514   """
4515   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4516   nodeinfo[node].Raise("Can't get data from node %s" % node,
4517                        prereq=True, ecode=errors.ECODE_ENVIRON)
4518   free_mem = nodeinfo[node].payload.get('memory_free', None)
4519   if not isinstance(free_mem, int):
4520     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4521                                " was '%s'" % (node, free_mem),
4522                                errors.ECODE_ENVIRON)
4523   if requested > free_mem:
4524     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4525                                " needed %s MiB, available %s MiB" %
4526                                (node, reason, requested, free_mem),
4527                                errors.ECODE_NORES)
4528
4529
4530 def _CheckNodesFreeDisk(lu, nodenames, requested):
4531   """Checks if nodes have enough free disk space in the default VG.
4532
4533   This function check if all given nodes have the needed amount of
4534   free disk. In case any node has less disk or we cannot get the
4535   information from the node, this function raise an OpPrereqError
4536   exception.
4537
4538   @type lu: C{LogicalUnit}
4539   @param lu: a logical unit from which we get configuration data
4540   @type nodenames: C{list}
4541   @param nodenames: the list of node names to check
4542   @type requested: C{int}
4543   @param requested: the amount of disk in MiB to check for
4544   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4545       we cannot check the node
4546
4547   """
4548   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4549                                    lu.cfg.GetHypervisorType())
4550   for node in nodenames:
4551     info = nodeinfo[node]
4552     info.Raise("Cannot get current information from node %s" % node,
4553                prereq=True, ecode=errors.ECODE_ENVIRON)
4554     vg_free = info.payload.get("vg_free", None)
4555     if not isinstance(vg_free, int):
4556       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4557                                  " result was '%s'" % (node, vg_free),
4558                                  errors.ECODE_ENVIRON)
4559     if requested > vg_free:
4560       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4561                                  " required %d MiB, available %d MiB" %
4562                                  (node, requested, vg_free),
4563                                  errors.ECODE_NORES)
4564
4565
4566 class LUStartupInstance(LogicalUnit):
4567   """Starts an instance.
4568
4569   """
4570   HPATH = "instance-start"
4571   HTYPE = constants.HTYPE_INSTANCE
4572   _OP_PARAMS = [
4573     _PInstanceName,
4574     _PForce,
4575     ("hvparams", _EmptyDict, _TDict),
4576     ("beparams", _EmptyDict, _TDict),
4577     ]
4578   REQ_BGL = False
4579
4580   def CheckArguments(self):
4581     # extra beparams
4582     if self.op.beparams:
4583       # fill the beparams dict
4584       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4585
4586   def ExpandNames(self):
4587     self._ExpandAndLockInstance()
4588
4589   def BuildHooksEnv(self):
4590     """Build hooks env.
4591
4592     This runs on master, primary and secondary nodes of the instance.
4593
4594     """
4595     env = {
4596       "FORCE": self.op.force,
4597       }
4598     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4599     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4600     return env, nl, nl
4601
4602   def CheckPrereq(self):
4603     """Check prerequisites.
4604
4605     This checks that the instance is in the cluster.
4606
4607     """
4608     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4609     assert self.instance is not None, \
4610       "Cannot retrieve locked instance %s" % self.op.instance_name
4611
4612     # extra hvparams
4613     if self.op.hvparams:
4614       # check hypervisor parameter syntax (locally)
4615       cluster = self.cfg.GetClusterInfo()
4616       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4617       filled_hvp = cluster.FillHV(instance)
4618       filled_hvp.update(self.op.hvparams)
4619       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4620       hv_type.CheckParameterSyntax(filled_hvp)
4621       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4622
4623     _CheckNodeOnline(self, instance.primary_node)
4624
4625     bep = self.cfg.GetClusterInfo().FillBE(instance)
4626     # check bridges existence
4627     _CheckInstanceBridgesExist(self, instance)
4628
4629     remote_info = self.rpc.call_instance_info(instance.primary_node,
4630                                               instance.name,
4631                                               instance.hypervisor)
4632     remote_info.Raise("Error checking node %s" % instance.primary_node,
4633                       prereq=True, ecode=errors.ECODE_ENVIRON)
4634     if not remote_info.payload: # not running already
4635       _CheckNodeFreeMemory(self, instance.primary_node,
4636                            "starting instance %s" % instance.name,
4637                            bep[constants.BE_MEMORY], instance.hypervisor)
4638
4639   def Exec(self, feedback_fn):
4640     """Start the instance.
4641
4642     """
4643     instance = self.instance
4644     force = self.op.force
4645
4646     self.cfg.MarkInstanceUp(instance.name)
4647
4648     node_current = instance.primary_node
4649
4650     _StartInstanceDisks(self, instance, force)
4651
4652     result = self.rpc.call_instance_start(node_current, instance,
4653                                           self.op.hvparams, self.op.beparams)
4654     msg = result.fail_msg
4655     if msg:
4656       _ShutdownInstanceDisks(self, instance)
4657       raise errors.OpExecError("Could not start instance: %s" % msg)
4658
4659
4660 class LURebootInstance(LogicalUnit):
4661   """Reboot an instance.
4662
4663   """
4664   HPATH = "instance-reboot"
4665   HTYPE = constants.HTYPE_INSTANCE
4666   _OP_PARAMS = [
4667     _PInstanceName,
4668     ("ignore_secondaries", False, _TBool),
4669     ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4670     _PShutdownTimeout,
4671     ]
4672   REQ_BGL = False
4673
4674   def ExpandNames(self):
4675     self._ExpandAndLockInstance()
4676
4677   def BuildHooksEnv(self):
4678     """Build hooks env.
4679
4680     This runs on master, primary and secondary nodes of the instance.
4681
4682     """
4683     env = {
4684       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4685       "REBOOT_TYPE": self.op.reboot_type,
4686       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4687       }
4688     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4689     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4690     return env, nl, nl
4691
4692   def CheckPrereq(self):
4693     """Check prerequisites.
4694
4695     This checks that the instance is in the cluster.
4696
4697     """
4698     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4699     assert self.instance is not None, \
4700       "Cannot retrieve locked instance %s" % self.op.instance_name
4701
4702     _CheckNodeOnline(self, instance.primary_node)
4703
4704     # check bridges existence
4705     _CheckInstanceBridgesExist(self, instance)
4706
4707   def Exec(self, feedback_fn):
4708     """Reboot the instance.
4709
4710     """
4711     instance = self.instance
4712     ignore_secondaries = self.op.ignore_secondaries
4713     reboot_type = self.op.reboot_type
4714
4715     node_current = instance.primary_node
4716
4717     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4718                        constants.INSTANCE_REBOOT_HARD]:
4719       for disk in instance.disks:
4720         self.cfg.SetDiskID(disk, node_current)
4721       result = self.rpc.call_instance_reboot(node_current, instance,
4722                                              reboot_type,
4723                                              self.op.shutdown_timeout)
4724       result.Raise("Could not reboot instance")
4725     else:
4726       result = self.rpc.call_instance_shutdown(node_current, instance,
4727                                                self.op.shutdown_timeout)
4728       result.Raise("Could not shutdown instance for full reboot")
4729       _ShutdownInstanceDisks(self, instance)
4730       _StartInstanceDisks(self, instance, ignore_secondaries)
4731       result = self.rpc.call_instance_start(node_current, instance, None, None)
4732       msg = result.fail_msg
4733       if msg:
4734         _ShutdownInstanceDisks(self, instance)
4735         raise errors.OpExecError("Could not start instance for"
4736                                  " full reboot: %s" % msg)
4737
4738     self.cfg.MarkInstanceUp(instance.name)
4739
4740
4741 class LUShutdownInstance(LogicalUnit):
4742   """Shutdown an instance.
4743
4744   """
4745   HPATH = "instance-stop"
4746   HTYPE = constants.HTYPE_INSTANCE
4747   _OP_PARAMS = [
4748     _PInstanceName,
4749     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4750     ]
4751   REQ_BGL = False
4752
4753   def ExpandNames(self):
4754     self._ExpandAndLockInstance()
4755
4756   def BuildHooksEnv(self):
4757     """Build hooks env.
4758
4759     This runs on master, primary and secondary nodes of the instance.
4760
4761     """
4762     env = _BuildInstanceHookEnvByObject(self, self.instance)
4763     env["TIMEOUT"] = self.op.timeout
4764     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4765     return env, nl, nl
4766
4767   def CheckPrereq(self):
4768     """Check prerequisites.
4769
4770     This checks that the instance is in the cluster.
4771
4772     """
4773     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4774     assert self.instance is not None, \
4775       "Cannot retrieve locked instance %s" % self.op.instance_name
4776     _CheckNodeOnline(self, self.instance.primary_node)
4777
4778   def Exec(self, feedback_fn):
4779     """Shutdown the instance.
4780
4781     """
4782     instance = self.instance
4783     node_current = instance.primary_node
4784     timeout = self.op.timeout
4785     self.cfg.MarkInstanceDown(instance.name)
4786     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4787     msg = result.fail_msg
4788     if msg:
4789       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4790
4791     _ShutdownInstanceDisks(self, instance)
4792
4793
4794 class LUReinstallInstance(LogicalUnit):
4795   """Reinstall an instance.
4796
4797   """
4798   HPATH = "instance-reinstall"
4799   HTYPE = constants.HTYPE_INSTANCE
4800   _OP_PARAMS = [
4801     _PInstanceName,
4802     ("os_type", None, _TMaybeString),
4803     ("force_variant", False, _TBool),
4804     ]
4805   REQ_BGL = False
4806
4807   def ExpandNames(self):
4808     self._ExpandAndLockInstance()
4809
4810   def BuildHooksEnv(self):
4811     """Build hooks env.
4812
4813     This runs on master, primary and secondary nodes of the instance.
4814
4815     """
4816     env = _BuildInstanceHookEnvByObject(self, self.instance)
4817     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4818     return env, nl, nl
4819
4820   def CheckPrereq(self):
4821     """Check prerequisites.
4822
4823     This checks that the instance is in the cluster and is not running.
4824
4825     """
4826     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4827     assert instance is not None, \
4828       "Cannot retrieve locked instance %s" % self.op.instance_name
4829     _CheckNodeOnline(self, instance.primary_node)
4830
4831     if instance.disk_template == constants.DT_DISKLESS:
4832       raise errors.OpPrereqError("Instance '%s' has no disks" %
4833                                  self.op.instance_name,
4834                                  errors.ECODE_INVAL)
4835     _CheckInstanceDown(self, instance, "cannot reinstall")
4836
4837     if self.op.os_type is not None:
4838       # OS verification
4839       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4840       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4841
4842     self.instance = instance
4843
4844   def Exec(self, feedback_fn):
4845     """Reinstall the instance.
4846
4847     """
4848     inst = self.instance
4849
4850     if self.op.os_type is not None:
4851       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4852       inst.os = self.op.os_type
4853       self.cfg.Update(inst, feedback_fn)
4854
4855     _StartInstanceDisks(self, inst, None)
4856     try:
4857       feedback_fn("Running the instance OS create scripts...")
4858       # FIXME: pass debug option from opcode to backend
4859       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4860                                              self.op.debug_level)
4861       result.Raise("Could not install OS for instance %s on node %s" %
4862                    (inst.name, inst.primary_node))
4863     finally:
4864       _ShutdownInstanceDisks(self, inst)
4865
4866
4867 class LURecreateInstanceDisks(LogicalUnit):
4868   """Recreate an instance's missing disks.
4869
4870   """
4871   HPATH = "instance-recreate-disks"
4872   HTYPE = constants.HTYPE_INSTANCE
4873   _OP_PARAMS = [
4874     _PInstanceName,
4875     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4876     ]
4877   REQ_BGL = False
4878
4879   def ExpandNames(self):
4880     self._ExpandAndLockInstance()
4881
4882   def BuildHooksEnv(self):
4883     """Build hooks env.
4884
4885     This runs on master, primary and secondary nodes of the instance.
4886
4887     """
4888     env = _BuildInstanceHookEnvByObject(self, self.instance)
4889     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4890     return env, nl, nl
4891
4892   def CheckPrereq(self):
4893     """Check prerequisites.
4894
4895     This checks that the instance is in the cluster and is not running.
4896
4897     """
4898     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4899     assert instance is not None, \
4900       "Cannot retrieve locked instance %s" % self.op.instance_name
4901     _CheckNodeOnline(self, instance.primary_node)
4902
4903     if instance.disk_template == constants.DT_DISKLESS:
4904       raise errors.OpPrereqError("Instance '%s' has no disks" %
4905                                  self.op.instance_name, errors.ECODE_INVAL)
4906     _CheckInstanceDown(self, instance, "cannot recreate disks")
4907
4908     if not self.op.disks:
4909       self.op.disks = range(len(instance.disks))
4910     else:
4911       for idx in self.op.disks:
4912         if idx >= len(instance.disks):
4913           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4914                                      errors.ECODE_INVAL)
4915
4916     self.instance = instance
4917
4918   def Exec(self, feedback_fn):
4919     """Recreate the disks.
4920
4921     """
4922     to_skip = []
4923     for idx, _ in enumerate(self.instance.disks):
4924       if idx not in self.op.disks: # disk idx has not been passed in
4925         to_skip.append(idx)
4926         continue
4927
4928     _CreateDisks(self, self.instance, to_skip=to_skip)
4929
4930
4931 class LURenameInstance(LogicalUnit):
4932   """Rename an instance.
4933
4934   """
4935   HPATH = "instance-rename"
4936   HTYPE = constants.HTYPE_INSTANCE
4937   _OP_PARAMS = [
4938     _PInstanceName,
4939     ("new_name", _NoDefault, _TNonEmptyString),
4940     ("ip_check", False, _TBool),
4941     ("name_check", True, _TBool),
4942     ]
4943
4944   def CheckArguments(self):
4945     """Check arguments.
4946
4947     """
4948     if self.op.ip_check and not self.op.name_check:
4949       # TODO: make the ip check more flexible and not depend on the name check
4950       raise errors.OpPrereqError("Cannot do ip check without a name check",
4951                                  errors.ECODE_INVAL)
4952
4953   def BuildHooksEnv(self):
4954     """Build hooks env.
4955
4956     This runs on master, primary and secondary nodes of the instance.
4957
4958     """
4959     env = _BuildInstanceHookEnvByObject(self, self.instance)
4960     env["INSTANCE_NEW_NAME"] = self.op.new_name
4961     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4962     return env, nl, nl
4963
4964   def CheckPrereq(self):
4965     """Check prerequisites.
4966
4967     This checks that the instance is in the cluster and is not running.
4968
4969     """
4970     self.op.instance_name = _ExpandInstanceName(self.cfg,
4971                                                 self.op.instance_name)
4972     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4973     assert instance is not None
4974     _CheckNodeOnline(self, instance.primary_node)
4975     _CheckInstanceDown(self, instance, "cannot rename")
4976     self.instance = instance
4977
4978     new_name = self.op.new_name
4979     if self.op.name_check:
4980       hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name))
4981       new_name = hostinfo.name
4982       if (self.op.ip_check and
4983           netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)):
4984         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4985                                    (hostinfo.ip, new_name),
4986                                    errors.ECODE_NOTUNIQUE)
4987
4988     instance_list = self.cfg.GetInstanceList()
4989     if new_name in instance_list:
4990       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4991                                  new_name, errors.ECODE_EXISTS)
4992
4993
4994   def Exec(self, feedback_fn):
4995     """Reinstall the instance.
4996
4997     """
4998     inst = self.instance
4999     old_name = inst.name
5000
5001     if inst.disk_template == constants.DT_FILE:
5002       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5003
5004     self.cfg.RenameInstance(inst.name, self.op.new_name)
5005     # Change the instance lock. This is definitely safe while we hold the BGL
5006     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5007     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5008
5009     # re-read the instance from the configuration after rename
5010     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5011
5012     if inst.disk_template == constants.DT_FILE:
5013       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5014       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5015                                                      old_file_storage_dir,
5016                                                      new_file_storage_dir)
5017       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5018                    " (but the instance has been renamed in Ganeti)" %
5019                    (inst.primary_node, old_file_storage_dir,
5020                     new_file_storage_dir))
5021
5022     _StartInstanceDisks(self, inst, None)
5023     try:
5024       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5025                                                  old_name, self.op.debug_level)
5026       msg = result.fail_msg
5027       if msg:
5028         msg = ("Could not run OS rename script for instance %s on node %s"
5029                " (but the instance has been renamed in Ganeti): %s" %
5030                (inst.name, inst.primary_node, msg))
5031         self.proc.LogWarning(msg)
5032     finally:
5033       _ShutdownInstanceDisks(self, inst)
5034
5035     return inst.name
5036
5037
5038 class LURemoveInstance(LogicalUnit):
5039   """Remove an instance.
5040
5041   """
5042   HPATH = "instance-remove"
5043   HTYPE = constants.HTYPE_INSTANCE
5044   _OP_PARAMS = [
5045     _PInstanceName,
5046     ("ignore_failures", False, _TBool),
5047     _PShutdownTimeout,
5048     ]
5049   REQ_BGL = False
5050
5051   def ExpandNames(self):
5052     self._ExpandAndLockInstance()
5053     self.needed_locks[locking.LEVEL_NODE] = []
5054     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5055
5056   def DeclareLocks(self, level):
5057     if level == locking.LEVEL_NODE:
5058       self._LockInstancesNodes()
5059
5060   def BuildHooksEnv(self):
5061     """Build hooks env.
5062
5063     This runs on master, primary and secondary nodes of the instance.
5064
5065     """
5066     env = _BuildInstanceHookEnvByObject(self, self.instance)
5067     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5068     nl = [self.cfg.GetMasterNode()]
5069     nl_post = list(self.instance.all_nodes) + nl
5070     return env, nl, nl_post
5071
5072   def CheckPrereq(self):
5073     """Check prerequisites.
5074
5075     This checks that the instance is in the cluster.
5076
5077     """
5078     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5079     assert self.instance is not None, \
5080       "Cannot retrieve locked instance %s" % self.op.instance_name
5081
5082   def Exec(self, feedback_fn):
5083     """Remove the instance.
5084
5085     """
5086     instance = self.instance
5087     logging.info("Shutting down instance %s on node %s",
5088                  instance.name, instance.primary_node)
5089
5090     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5091                                              self.op.shutdown_timeout)
5092     msg = result.fail_msg
5093     if msg:
5094       if self.op.ignore_failures:
5095         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5096       else:
5097         raise errors.OpExecError("Could not shutdown instance %s on"
5098                                  " node %s: %s" %
5099                                  (instance.name, instance.primary_node, msg))
5100
5101     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5102
5103
5104 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5105   """Utility function to remove an instance.
5106
5107   """
5108   logging.info("Removing block devices for instance %s", instance.name)
5109
5110   if not _RemoveDisks(lu, instance):
5111     if not ignore_failures:
5112       raise errors.OpExecError("Can't remove instance's disks")
5113     feedback_fn("Warning: can't remove instance's disks")
5114
5115   logging.info("Removing instance %s out of cluster config", instance.name)
5116
5117   lu.cfg.RemoveInstance(instance.name)
5118
5119   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5120     "Instance lock removal conflict"
5121
5122   # Remove lock for the instance
5123   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5124
5125
5126 class LUQueryInstances(NoHooksLU):
5127   """Logical unit for querying instances.
5128
5129   """
5130   # pylint: disable-msg=W0142
5131   _OP_PARAMS = [
5132     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5133     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5134     ("use_locking", False, _TBool),
5135     ]
5136   REQ_BGL = False
5137   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5138                     "serial_no", "ctime", "mtime", "uuid"]
5139   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5140                                     "admin_state",
5141                                     "disk_template", "ip", "mac", "bridge",
5142                                     "nic_mode", "nic_link",
5143                                     "sda_size", "sdb_size", "vcpus", "tags",
5144                                     "network_port", "beparams",
5145                                     r"(disk)\.(size)/([0-9]+)",
5146                                     r"(disk)\.(sizes)", "disk_usage",
5147                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5148                                     r"(nic)\.(bridge)/([0-9]+)",
5149                                     r"(nic)\.(macs|ips|modes|links|bridges)",
5150                                     r"(disk|nic)\.(count)",
5151                                     "hvparams",
5152                                     ] + _SIMPLE_FIELDS +
5153                                   ["hv/%s" % name
5154                                    for name in constants.HVS_PARAMETERS
5155                                    if name not in constants.HVC_GLOBALS] +
5156                                   ["be/%s" % name
5157                                    for name in constants.BES_PARAMETERS])
5158   _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5159                                    "oper_ram",
5160                                    "oper_vcpus",
5161                                    "status")
5162
5163
5164   def CheckArguments(self):
5165     _CheckOutputFields(static=self._FIELDS_STATIC,
5166                        dynamic=self._FIELDS_DYNAMIC,
5167                        selected=self.op.output_fields)
5168
5169   def ExpandNames(self):
5170     self.needed_locks = {}
5171     self.share_locks[locking.LEVEL_INSTANCE] = 1
5172     self.share_locks[locking.LEVEL_NODE] = 1
5173
5174     if self.op.names:
5175       self.wanted = _GetWantedInstances(self, self.op.names)
5176     else:
5177       self.wanted = locking.ALL_SET
5178
5179     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5180     self.do_locking = self.do_node_query and self.op.use_locking
5181     if self.do_locking:
5182       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5183       self.needed_locks[locking.LEVEL_NODE] = []
5184       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5185
5186   def DeclareLocks(self, level):
5187     if level == locking.LEVEL_NODE and self.do_locking:
5188       self._LockInstancesNodes()
5189
5190   def Exec(self, feedback_fn):
5191     """Computes the list of nodes and their attributes.
5192
5193     """
5194     # pylint: disable-msg=R0912
5195     # way too many branches here
5196     all_info = self.cfg.GetAllInstancesInfo()
5197     if self.wanted == locking.ALL_SET:
5198       # caller didn't specify instance names, so ordering is not important
5199       if self.do_locking:
5200         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5201       else:
5202         instance_names = all_info.keys()
5203       instance_names = utils.NiceSort(instance_names)
5204     else:
5205       # caller did specify names, so we must keep the ordering
5206       if self.do_locking:
5207         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5208       else:
5209         tgt_set = all_info.keys()
5210       missing = set(self.wanted).difference(tgt_set)
5211       if missing:
5212         raise errors.OpExecError("Some instances were removed before"
5213                                  " retrieving their data: %s" % missing)
5214       instance_names = self.wanted
5215
5216     instance_list = [all_info[iname] for iname in instance_names]
5217
5218     # begin data gathering
5219
5220     nodes = frozenset([inst.primary_node for inst in instance_list])
5221     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5222
5223     bad_nodes = []
5224     off_nodes = []
5225     if self.do_node_query:
5226       live_data = {}
5227       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5228       for name in nodes:
5229         result = node_data[name]
5230         if result.offline:
5231           # offline nodes will be in both lists
5232           off_nodes.append(name)
5233         if result.fail_msg:
5234           bad_nodes.append(name)
5235         else:
5236           if result.payload:
5237             live_data.update(result.payload)
5238           # else no instance is alive
5239     else:
5240       live_data = dict([(name, {}) for name in instance_names])
5241
5242     # end data gathering
5243
5244     HVPREFIX = "hv/"
5245     BEPREFIX = "be/"
5246     output = []
5247     cluster = self.cfg.GetClusterInfo()
5248     for instance in instance_list:
5249       iout = []
5250       i_hv = cluster.FillHV(instance, skip_globals=True)
5251       i_be = cluster.FillBE(instance)
5252       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5253       for field in self.op.output_fields:
5254         st_match = self._FIELDS_STATIC.Matches(field)
5255         if field in self._SIMPLE_FIELDS:
5256           val = getattr(instance, field)
5257         elif field == "pnode":
5258           val = instance.primary_node
5259         elif field == "snodes":
5260           val = list(instance.secondary_nodes)
5261         elif field == "admin_state":
5262           val = instance.admin_up
5263         elif field == "oper_state":
5264           if instance.primary_node in bad_nodes:
5265             val = None
5266           else:
5267             val = bool(live_data.get(instance.name))
5268         elif field == "status":
5269           if instance.primary_node in off_nodes:
5270             val = "ERROR_nodeoffline"
5271           elif instance.primary_node in bad_nodes:
5272             val = "ERROR_nodedown"
5273           else:
5274             running = bool(live_data.get(instance.name))
5275             if running:
5276               if instance.admin_up:
5277                 val = "running"
5278               else:
5279                 val = "ERROR_up"
5280             else:
5281               if instance.admin_up:
5282                 val = "ERROR_down"
5283               else:
5284                 val = "ADMIN_down"
5285         elif field == "oper_ram":
5286           if instance.primary_node in bad_nodes:
5287             val = None
5288           elif instance.name in live_data:
5289             val = live_data[instance.name].get("memory", "?")
5290           else:
5291             val = "-"
5292         elif field == "oper_vcpus":
5293           if instance.primary_node in bad_nodes:
5294             val = None
5295           elif instance.name in live_data:
5296             val = live_data[instance.name].get("vcpus", "?")
5297           else:
5298             val = "-"
5299         elif field == "vcpus":
5300           val = i_be[constants.BE_VCPUS]
5301         elif field == "disk_template":
5302           val = instance.disk_template
5303         elif field == "ip":
5304           if instance.nics:
5305             val = instance.nics[0].ip
5306           else:
5307             val = None
5308         elif field == "nic_mode":
5309           if instance.nics:
5310             val = i_nicp[0][constants.NIC_MODE]
5311           else:
5312             val = None
5313         elif field == "nic_link":
5314           if instance.nics:
5315             val = i_nicp[0][constants.NIC_LINK]
5316           else:
5317             val = None
5318         elif field == "bridge":
5319           if (instance.nics and
5320               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5321             val = i_nicp[0][constants.NIC_LINK]
5322           else:
5323             val = None
5324         elif field == "mac":
5325           if instance.nics:
5326             val = instance.nics[0].mac
5327           else:
5328             val = None
5329         elif field == "sda_size" or field == "sdb_size":
5330           idx = ord(field[2]) - ord('a')
5331           try:
5332             val = instance.FindDisk(idx).size
5333           except errors.OpPrereqError:
5334             val = None
5335         elif field == "disk_usage": # total disk usage per node
5336           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5337           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5338         elif field == "tags":
5339           val = list(instance.GetTags())
5340         elif field == "hvparams":
5341           val = i_hv
5342         elif (field.startswith(HVPREFIX) and
5343               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5344               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5345           val = i_hv.get(field[len(HVPREFIX):], None)
5346         elif field == "beparams":
5347           val = i_be
5348         elif (field.startswith(BEPREFIX) and
5349               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5350           val = i_be.get(field[len(BEPREFIX):], None)
5351         elif st_match and st_match.groups():
5352           # matches a variable list
5353           st_groups = st_match.groups()
5354           if st_groups and st_groups[0] == "disk":
5355             if st_groups[1] == "count":
5356               val = len(instance.disks)
5357             elif st_groups[1] == "sizes":
5358               val = [disk.size for disk in instance.disks]
5359             elif st_groups[1] == "size":
5360               try:
5361                 val = instance.FindDisk(st_groups[2]).size
5362               except errors.OpPrereqError:
5363                 val = None
5364             else:
5365               assert False, "Unhandled disk parameter"
5366           elif st_groups[0] == "nic":
5367             if st_groups[1] == "count":
5368               val = len(instance.nics)
5369             elif st_groups[1] == "macs":
5370               val = [nic.mac for nic in instance.nics]
5371             elif st_groups[1] == "ips":
5372               val = [nic.ip for nic in instance.nics]
5373             elif st_groups[1] == "modes":
5374               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5375             elif st_groups[1] == "links":
5376               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5377             elif st_groups[1] == "bridges":
5378               val = []
5379               for nicp in i_nicp:
5380                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5381                   val.append(nicp[constants.NIC_LINK])
5382                 else:
5383                   val.append(None)
5384             else:
5385               # index-based item
5386               nic_idx = int(st_groups[2])
5387               if nic_idx >= len(instance.nics):
5388                 val = None
5389               else:
5390                 if st_groups[1] == "mac":
5391                   val = instance.nics[nic_idx].mac
5392                 elif st_groups[1] == "ip":
5393                   val = instance.nics[nic_idx].ip
5394                 elif st_groups[1] == "mode":
5395                   val = i_nicp[nic_idx][constants.NIC_MODE]
5396                 elif st_groups[1] == "link":
5397                   val = i_nicp[nic_idx][constants.NIC_LINK]
5398                 elif st_groups[1] == "bridge":
5399                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5400                   if nic_mode == constants.NIC_MODE_BRIDGED:
5401                     val = i_nicp[nic_idx][constants.NIC_LINK]
5402                   else:
5403                     val = None
5404                 else:
5405                   assert False, "Unhandled NIC parameter"
5406           else:
5407             assert False, ("Declared but unhandled variable parameter '%s'" %
5408                            field)
5409         else:
5410           assert False, "Declared but unhandled parameter '%s'" % field
5411         iout.append(val)
5412       output.append(iout)
5413
5414     return output
5415
5416
5417 class LUFailoverInstance(LogicalUnit):
5418   """Failover an instance.
5419
5420   """
5421   HPATH = "instance-failover"
5422   HTYPE = constants.HTYPE_INSTANCE
5423   _OP_PARAMS = [
5424     _PInstanceName,
5425     ("ignore_consistency", False, _TBool),
5426     _PShutdownTimeout,
5427     ]
5428   REQ_BGL = False
5429
5430   def ExpandNames(self):
5431     self._ExpandAndLockInstance()
5432     self.needed_locks[locking.LEVEL_NODE] = []
5433     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5434
5435   def DeclareLocks(self, level):
5436     if level == locking.LEVEL_NODE:
5437       self._LockInstancesNodes()
5438
5439   def BuildHooksEnv(self):
5440     """Build hooks env.
5441
5442     This runs on master, primary and secondary nodes of the instance.
5443
5444     """
5445     instance = self.instance
5446     source_node = instance.primary_node
5447     target_node = instance.secondary_nodes[0]
5448     env = {
5449       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5450       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5451       "OLD_PRIMARY": source_node,
5452       "OLD_SECONDARY": target_node,
5453       "NEW_PRIMARY": target_node,
5454       "NEW_SECONDARY": source_node,
5455       }
5456     env.update(_BuildInstanceHookEnvByObject(self, instance))
5457     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5458     nl_post = list(nl)
5459     nl_post.append(source_node)
5460     return env, nl, nl_post
5461
5462   def CheckPrereq(self):
5463     """Check prerequisites.
5464
5465     This checks that the instance is in the cluster.
5466
5467     """
5468     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5469     assert self.instance is not None, \
5470       "Cannot retrieve locked instance %s" % self.op.instance_name
5471
5472     bep = self.cfg.GetClusterInfo().FillBE(instance)
5473     if instance.disk_template not in constants.DTS_NET_MIRROR:
5474       raise errors.OpPrereqError("Instance's disk layout is not"
5475                                  " network mirrored, cannot failover.",
5476                                  errors.ECODE_STATE)
5477
5478     secondary_nodes = instance.secondary_nodes
5479     if not secondary_nodes:
5480       raise errors.ProgrammerError("no secondary node but using "
5481                                    "a mirrored disk template")
5482
5483     target_node = secondary_nodes[0]
5484     _CheckNodeOnline(self, target_node)
5485     _CheckNodeNotDrained(self, target_node)
5486     if instance.admin_up:
5487       # check memory requirements on the secondary node
5488       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5489                            instance.name, bep[constants.BE_MEMORY],
5490                            instance.hypervisor)
5491     else:
5492       self.LogInfo("Not checking memory on the secondary node as"
5493                    " instance will not be started")
5494
5495     # check bridge existance
5496     _CheckInstanceBridgesExist(self, instance, node=target_node)
5497
5498   def Exec(self, feedback_fn):
5499     """Failover an instance.
5500
5501     The failover is done by shutting it down on its present node and
5502     starting it on the secondary.
5503
5504     """
5505     instance = self.instance
5506
5507     source_node = instance.primary_node
5508     target_node = instance.secondary_nodes[0]
5509
5510     if instance.admin_up:
5511       feedback_fn("* checking disk consistency between source and target")
5512       for dev in instance.disks:
5513         # for drbd, these are drbd over lvm
5514         if not _CheckDiskConsistency(self, dev, target_node, False):
5515           if not self.op.ignore_consistency:
5516             raise errors.OpExecError("Disk %s is degraded on target node,"
5517                                      " aborting failover." % dev.iv_name)
5518     else:
5519       feedback_fn("* not checking disk consistency as instance is not running")
5520
5521     feedback_fn("* shutting down instance on source node")
5522     logging.info("Shutting down instance %s on node %s",
5523                  instance.name, source_node)
5524
5525     result = self.rpc.call_instance_shutdown(source_node, instance,
5526                                              self.op.shutdown_timeout)
5527     msg = result.fail_msg
5528     if msg:
5529       if self.op.ignore_consistency:
5530         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5531                              " Proceeding anyway. Please make sure node"
5532                              " %s is down. Error details: %s",
5533                              instance.name, source_node, source_node, msg)
5534       else:
5535         raise errors.OpExecError("Could not shutdown instance %s on"
5536                                  " node %s: %s" %
5537                                  (instance.name, source_node, msg))
5538
5539     feedback_fn("* deactivating the instance's disks on source node")
5540     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5541       raise errors.OpExecError("Can't shut down the instance's disks.")
5542
5543     instance.primary_node = target_node
5544     # distribute new instance config to the other nodes
5545     self.cfg.Update(instance, feedback_fn)
5546
5547     # Only start the instance if it's marked as up
5548     if instance.admin_up:
5549       feedback_fn("* activating the instance's disks on target node")
5550       logging.info("Starting instance %s on node %s",
5551                    instance.name, target_node)
5552
5553       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5554                                            ignore_secondaries=True)
5555       if not disks_ok:
5556         _ShutdownInstanceDisks(self, instance)
5557         raise errors.OpExecError("Can't activate the instance's disks")
5558
5559       feedback_fn("* starting the instance on the target node")
5560       result = self.rpc.call_instance_start(target_node, instance, None, None)
5561       msg = result.fail_msg
5562       if msg:
5563         _ShutdownInstanceDisks(self, instance)
5564         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5565                                  (instance.name, target_node, msg))
5566
5567
5568 class LUMigrateInstance(LogicalUnit):
5569   """Migrate an instance.
5570
5571   This is migration without shutting down, compared to the failover,
5572   which is done with shutdown.
5573
5574   """
5575   HPATH = "instance-migrate"
5576   HTYPE = constants.HTYPE_INSTANCE
5577   _OP_PARAMS = [
5578     _PInstanceName,
5579     _PMigrationMode,
5580     _PMigrationLive,
5581     ("cleanup", False, _TBool),
5582     ]
5583
5584   REQ_BGL = False
5585
5586   def ExpandNames(self):
5587     self._ExpandAndLockInstance()
5588
5589     self.needed_locks[locking.LEVEL_NODE] = []
5590     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5591
5592     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5593                                        self.op.cleanup)
5594     self.tasklets = [self._migrater]
5595
5596   def DeclareLocks(self, level):
5597     if level == locking.LEVEL_NODE:
5598       self._LockInstancesNodes()
5599
5600   def BuildHooksEnv(self):
5601     """Build hooks env.
5602
5603     This runs on master, primary and secondary nodes of the instance.
5604
5605     """
5606     instance = self._migrater.instance
5607     source_node = instance.primary_node
5608     target_node = instance.secondary_nodes[0]
5609     env = _BuildInstanceHookEnvByObject(self, instance)
5610     env["MIGRATE_LIVE"] = self._migrater.live
5611     env["MIGRATE_CLEANUP"] = self.op.cleanup
5612     env.update({
5613         "OLD_PRIMARY": source_node,
5614         "OLD_SECONDARY": target_node,
5615         "NEW_PRIMARY": target_node,
5616         "NEW_SECONDARY": source_node,
5617         })
5618     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5619     nl_post = list(nl)
5620     nl_post.append(source_node)
5621     return env, nl, nl_post
5622
5623
5624 class LUMoveInstance(LogicalUnit):
5625   """Move an instance by data-copying.
5626
5627   """
5628   HPATH = "instance-move"
5629   HTYPE = constants.HTYPE_INSTANCE
5630   _OP_PARAMS = [
5631     _PInstanceName,
5632     ("target_node", _NoDefault, _TNonEmptyString),
5633     _PShutdownTimeout,
5634     ]
5635   REQ_BGL = False
5636
5637   def ExpandNames(self):
5638     self._ExpandAndLockInstance()
5639     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5640     self.op.target_node = target_node
5641     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5642     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5643
5644   def DeclareLocks(self, level):
5645     if level == locking.LEVEL_NODE:
5646       self._LockInstancesNodes(primary_only=True)
5647
5648   def BuildHooksEnv(self):
5649     """Build hooks env.
5650
5651     This runs on master, primary and secondary nodes of the instance.
5652
5653     """
5654     env = {
5655       "TARGET_NODE": self.op.target_node,
5656       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5657       }
5658     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5659     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5660                                        self.op.target_node]
5661     return env, nl, nl
5662
5663   def CheckPrereq(self):
5664     """Check prerequisites.
5665
5666     This checks that the instance is in the cluster.
5667
5668     """
5669     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5670     assert self.instance is not None, \
5671       "Cannot retrieve locked instance %s" % self.op.instance_name
5672
5673     node = self.cfg.GetNodeInfo(self.op.target_node)
5674     assert node is not None, \
5675       "Cannot retrieve locked node %s" % self.op.target_node
5676
5677     self.target_node = target_node = node.name
5678
5679     if target_node == instance.primary_node:
5680       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5681                                  (instance.name, target_node),
5682                                  errors.ECODE_STATE)
5683
5684     bep = self.cfg.GetClusterInfo().FillBE(instance)
5685
5686     for idx, dsk in enumerate(instance.disks):
5687       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5688         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5689                                    " cannot copy" % idx, errors.ECODE_STATE)
5690
5691     _CheckNodeOnline(self, target_node)
5692     _CheckNodeNotDrained(self, target_node)
5693
5694     if instance.admin_up:
5695       # check memory requirements on the secondary node
5696       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5697                            instance.name, bep[constants.BE_MEMORY],
5698                            instance.hypervisor)
5699     else:
5700       self.LogInfo("Not checking memory on the secondary node as"
5701                    " instance will not be started")
5702
5703     # check bridge existance
5704     _CheckInstanceBridgesExist(self, instance, node=target_node)
5705
5706   def Exec(self, feedback_fn):
5707     """Move an instance.
5708
5709     The move is done by shutting it down on its present node, copying
5710     the data over (slow) and starting it on the new node.
5711
5712     """
5713     instance = self.instance
5714
5715     source_node = instance.primary_node
5716     target_node = self.target_node
5717
5718     self.LogInfo("Shutting down instance %s on source node %s",
5719                  instance.name, source_node)
5720
5721     result = self.rpc.call_instance_shutdown(source_node, instance,
5722                                              self.op.shutdown_timeout)
5723     msg = result.fail_msg
5724     if msg:
5725       if self.op.ignore_consistency:
5726         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5727                              " Proceeding anyway. Please make sure node"
5728                              " %s is down. Error details: %s",
5729                              instance.name, source_node, source_node, msg)
5730       else:
5731         raise errors.OpExecError("Could not shutdown instance %s on"
5732                                  " node %s: %s" %
5733                                  (instance.name, source_node, msg))
5734
5735     # create the target disks
5736     try:
5737       _CreateDisks(self, instance, target_node=target_node)
5738     except errors.OpExecError:
5739       self.LogWarning("Device creation failed, reverting...")
5740       try:
5741         _RemoveDisks(self, instance, target_node=target_node)
5742       finally:
5743         self.cfg.ReleaseDRBDMinors(instance.name)
5744         raise
5745
5746     cluster_name = self.cfg.GetClusterInfo().cluster_name
5747
5748     errs = []
5749     # activate, get path, copy the data over
5750     for idx, disk in enumerate(instance.disks):
5751       self.LogInfo("Copying data for disk %d", idx)
5752       result = self.rpc.call_blockdev_assemble(target_node, disk,
5753                                                instance.name, True)
5754       if result.fail_msg:
5755         self.LogWarning("Can't assemble newly created disk %d: %s",
5756                         idx, result.fail_msg)
5757         errs.append(result.fail_msg)
5758         break
5759       dev_path = result.payload
5760       result = self.rpc.call_blockdev_export(source_node, disk,
5761                                              target_node, dev_path,
5762                                              cluster_name)
5763       if result.fail_msg:
5764         self.LogWarning("Can't copy data over for disk %d: %s",
5765                         idx, result.fail_msg)
5766         errs.append(result.fail_msg)
5767         break
5768
5769     if errs:
5770       self.LogWarning("Some disks failed to copy, aborting")
5771       try:
5772         _RemoveDisks(self, instance, target_node=target_node)
5773       finally:
5774         self.cfg.ReleaseDRBDMinors(instance.name)
5775         raise errors.OpExecError("Errors during disk copy: %s" %
5776                                  (",".join(errs),))
5777
5778     instance.primary_node = target_node
5779     self.cfg.Update(instance, feedback_fn)
5780
5781     self.LogInfo("Removing the disks on the original node")
5782     _RemoveDisks(self, instance, target_node=source_node)
5783
5784     # Only start the instance if it's marked as up
5785     if instance.admin_up:
5786       self.LogInfo("Starting instance %s on node %s",
5787                    instance.name, target_node)
5788
5789       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5790                                            ignore_secondaries=True)
5791       if not disks_ok:
5792         _ShutdownInstanceDisks(self, instance)
5793         raise errors.OpExecError("Can't activate the instance's disks")
5794
5795       result = self.rpc.call_instance_start(target_node, instance, None, None)
5796       msg = result.fail_msg
5797       if msg:
5798         _ShutdownInstanceDisks(self, instance)
5799         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5800                                  (instance.name, target_node, msg))
5801
5802
5803 class LUMigrateNode(LogicalUnit):
5804   """Migrate all instances from a node.
5805
5806   """
5807   HPATH = "node-migrate"
5808   HTYPE = constants.HTYPE_NODE
5809   _OP_PARAMS = [
5810     _PNodeName,
5811     _PMigrationMode,
5812     _PMigrationLive,
5813     ]
5814   REQ_BGL = False
5815
5816   def ExpandNames(self):
5817     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5818
5819     self.needed_locks = {
5820       locking.LEVEL_NODE: [self.op.node_name],
5821       }
5822
5823     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5824
5825     # Create tasklets for migrating instances for all instances on this node
5826     names = []
5827     tasklets = []
5828
5829     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5830       logging.debug("Migrating instance %s", inst.name)
5831       names.append(inst.name)
5832
5833       tasklets.append(TLMigrateInstance(self, inst.name, False))
5834
5835     self.tasklets = tasklets
5836
5837     # Declare instance locks
5838     self.needed_locks[locking.LEVEL_INSTANCE] = names
5839
5840   def DeclareLocks(self, level):
5841     if level == locking.LEVEL_NODE:
5842       self._LockInstancesNodes()
5843
5844   def BuildHooksEnv(self):
5845     """Build hooks env.
5846
5847     This runs on the master, the primary and all the secondaries.
5848
5849     """
5850     env = {
5851       "NODE_NAME": self.op.node_name,
5852       }
5853
5854     nl = [self.cfg.GetMasterNode()]
5855
5856     return (env, nl, nl)
5857
5858
5859 class TLMigrateInstance(Tasklet):
5860   """Tasklet class for instance migration.
5861
5862   @type live: boolean
5863   @ivar live: whether the migration will be done live or non-live;
5864       this variable is initalized only after CheckPrereq has run
5865
5866   """
5867   def __init__(self, lu, instance_name, cleanup):
5868     """Initializes this class.
5869
5870     """
5871     Tasklet.__init__(self, lu)
5872
5873     # Parameters
5874     self.instance_name = instance_name
5875     self.cleanup = cleanup
5876     self.live = False # will be overridden later
5877
5878   def CheckPrereq(self):
5879     """Check prerequisites.
5880
5881     This checks that the instance is in the cluster.
5882
5883     """
5884     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5885     instance = self.cfg.GetInstanceInfo(instance_name)
5886     assert instance is not None
5887
5888     if instance.disk_template != constants.DT_DRBD8:
5889       raise errors.OpPrereqError("Instance's disk layout is not"
5890                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5891
5892     secondary_nodes = instance.secondary_nodes
5893     if not secondary_nodes:
5894       raise errors.ConfigurationError("No secondary node but using"
5895                                       " drbd8 disk template")
5896
5897     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5898
5899     target_node = secondary_nodes[0]
5900     # check memory requirements on the secondary node
5901     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5902                          instance.name, i_be[constants.BE_MEMORY],
5903                          instance.hypervisor)
5904
5905     # check bridge existance
5906     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5907
5908     if not self.cleanup:
5909       _CheckNodeNotDrained(self.lu, target_node)
5910       result = self.rpc.call_instance_migratable(instance.primary_node,
5911                                                  instance)
5912       result.Raise("Can't migrate, please use failover",
5913                    prereq=True, ecode=errors.ECODE_STATE)
5914
5915     self.instance = instance
5916
5917     if self.lu.op.live is not None and self.lu.op.mode is not None:
5918       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5919                                  " parameters are accepted",
5920                                  errors.ECODE_INVAL)
5921     if self.lu.op.live is not None:
5922       if self.lu.op.live:
5923         self.lu.op.mode = constants.HT_MIGRATION_LIVE
5924       else:
5925         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5926       # reset the 'live' parameter to None so that repeated
5927       # invocations of CheckPrereq do not raise an exception
5928       self.lu.op.live = None
5929     elif self.lu.op.mode is None:
5930       # read the default value from the hypervisor
5931       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5932       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5933
5934     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5935
5936   def _WaitUntilSync(self):
5937     """Poll with custom rpc for disk sync.
5938
5939     This uses our own step-based rpc call.
5940
5941     """
5942     self.feedback_fn("* wait until resync is done")
5943     all_done = False
5944     while not all_done:
5945       all_done = True
5946       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5947                                             self.nodes_ip,
5948                                             self.instance.disks)
5949       min_percent = 100
5950       for node, nres in result.items():
5951         nres.Raise("Cannot resync disks on node %s" % node)
5952         node_done, node_percent = nres.payload
5953         all_done = all_done and node_done
5954         if node_percent is not None:
5955           min_percent = min(min_percent, node_percent)
5956       if not all_done:
5957         if min_percent < 100:
5958           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5959         time.sleep(2)
5960
5961   def _EnsureSecondary(self, node):
5962     """Demote a node to secondary.
5963
5964     """
5965     self.feedback_fn("* switching node %s to secondary mode" % node)
5966
5967     for dev in self.instance.disks:
5968       self.cfg.SetDiskID(dev, node)
5969
5970     result = self.rpc.call_blockdev_close(node, self.instance.name,
5971                                           self.instance.disks)
5972     result.Raise("Cannot change disk to secondary on node %s" % node)
5973
5974   def _GoStandalone(self):
5975     """Disconnect from the network.
5976
5977     """
5978     self.feedback_fn("* changing into standalone mode")
5979     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5980                                                self.instance.disks)
5981     for node, nres in result.items():
5982       nres.Raise("Cannot disconnect disks node %s" % node)
5983
5984   def _GoReconnect(self, multimaster):
5985     """Reconnect to the network.
5986
5987     """
5988     if multimaster:
5989       msg = "dual-master"
5990     else:
5991       msg = "single-master"
5992     self.feedback_fn("* changing disks into %s mode" % msg)
5993     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5994                                            self.instance.disks,
5995                                            self.instance.name, multimaster)
5996     for node, nres in result.items():
5997       nres.Raise("Cannot change disks config on node %s" % node)
5998
5999   def _ExecCleanup(self):
6000     """Try to cleanup after a failed migration.
6001
6002     The cleanup is done by:
6003       - check that the instance is running only on one node
6004         (and update the config if needed)
6005       - change disks on its secondary node to secondary
6006       - wait until disks are fully synchronized
6007       - disconnect from the network
6008       - change disks into single-master mode
6009       - wait again until disks are fully synchronized
6010
6011     """
6012     instance = self.instance
6013     target_node = self.target_node
6014     source_node = self.source_node
6015
6016     # check running on only one node
6017     self.feedback_fn("* checking where the instance actually runs"
6018                      " (if this hangs, the hypervisor might be in"
6019                      " a bad state)")
6020     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6021     for node, result in ins_l.items():
6022       result.Raise("Can't contact node %s" % node)
6023
6024     runningon_source = instance.name in ins_l[source_node].payload
6025     runningon_target = instance.name in ins_l[target_node].payload
6026
6027     if runningon_source and runningon_target:
6028       raise errors.OpExecError("Instance seems to be running on two nodes,"
6029                                " or the hypervisor is confused. You will have"
6030                                " to ensure manually that it runs only on one"
6031                                " and restart this operation.")
6032
6033     if not (runningon_source or runningon_target):
6034       raise errors.OpExecError("Instance does not seem to be running at all."
6035                                " In this case, it's safer to repair by"
6036                                " running 'gnt-instance stop' to ensure disk"
6037                                " shutdown, and then restarting it.")
6038
6039     if runningon_target:
6040       # the migration has actually succeeded, we need to update the config
6041       self.feedback_fn("* instance running on secondary node (%s),"
6042                        " updating config" % target_node)
6043       instance.primary_node = target_node
6044       self.cfg.Update(instance, self.feedback_fn)
6045       demoted_node = source_node
6046     else:
6047       self.feedback_fn("* instance confirmed to be running on its"
6048                        " primary node (%s)" % source_node)
6049       demoted_node = target_node
6050
6051     self._EnsureSecondary(demoted_node)
6052     try:
6053       self._WaitUntilSync()
6054     except errors.OpExecError:
6055       # we ignore here errors, since if the device is standalone, it
6056       # won't be able to sync
6057       pass
6058     self._GoStandalone()
6059     self._GoReconnect(False)
6060     self._WaitUntilSync()
6061
6062     self.feedback_fn("* done")
6063
6064   def _RevertDiskStatus(self):
6065     """Try to revert the disk status after a failed migration.
6066
6067     """
6068     target_node = self.target_node
6069     try:
6070       self._EnsureSecondary(target_node)
6071       self._GoStandalone()
6072       self._GoReconnect(False)
6073       self._WaitUntilSync()
6074     except errors.OpExecError, err:
6075       self.lu.LogWarning("Migration failed and I can't reconnect the"
6076                          " drives: error '%s'\n"
6077                          "Please look and recover the instance status" %
6078                          str(err))
6079
6080   def _AbortMigration(self):
6081     """Call the hypervisor code to abort a started migration.
6082
6083     """
6084     instance = self.instance
6085     target_node = self.target_node
6086     migration_info = self.migration_info
6087
6088     abort_result = self.rpc.call_finalize_migration(target_node,
6089                                                     instance,
6090                                                     migration_info,
6091                                                     False)
6092     abort_msg = abort_result.fail_msg
6093     if abort_msg:
6094       logging.error("Aborting migration failed on target node %s: %s",
6095                     target_node, abort_msg)
6096       # Don't raise an exception here, as we stil have to try to revert the
6097       # disk status, even if this step failed.
6098
6099   def _ExecMigration(self):
6100     """Migrate an instance.
6101
6102     The migrate is done by:
6103       - change the disks into dual-master mode
6104       - wait until disks are fully synchronized again
6105       - migrate the instance
6106       - change disks on the new secondary node (the old primary) to secondary
6107       - wait until disks are fully synchronized
6108       - change disks into single-master mode
6109
6110     """
6111     instance = self.instance
6112     target_node = self.target_node
6113     source_node = self.source_node
6114
6115     self.feedback_fn("* checking disk consistency between source and target")
6116     for dev in instance.disks:
6117       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6118         raise errors.OpExecError("Disk %s is degraded or not fully"
6119                                  " synchronized on target node,"
6120                                  " aborting migrate." % dev.iv_name)
6121
6122     # First get the migration information from the remote node
6123     result = self.rpc.call_migration_info(source_node, instance)
6124     msg = result.fail_msg
6125     if msg:
6126       log_err = ("Failed fetching source migration information from %s: %s" %
6127                  (source_node, msg))
6128       logging.error(log_err)
6129       raise errors.OpExecError(log_err)
6130
6131     self.migration_info = migration_info = result.payload
6132
6133     # Then switch the disks to master/master mode
6134     self._EnsureSecondary(target_node)
6135     self._GoStandalone()
6136     self._GoReconnect(True)
6137     self._WaitUntilSync()
6138
6139     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6140     result = self.rpc.call_accept_instance(target_node,
6141                                            instance,
6142                                            migration_info,
6143                                            self.nodes_ip[target_node])
6144
6145     msg = result.fail_msg
6146     if msg:
6147       logging.error("Instance pre-migration failed, trying to revert"
6148                     " disk status: %s", msg)
6149       self.feedback_fn("Pre-migration failed, aborting")
6150       self._AbortMigration()
6151       self._RevertDiskStatus()
6152       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6153                                (instance.name, msg))
6154
6155     self.feedback_fn("* migrating instance to %s" % target_node)
6156     time.sleep(10)
6157     result = self.rpc.call_instance_migrate(source_node, instance,
6158                                             self.nodes_ip[target_node],
6159                                             self.live)
6160     msg = result.fail_msg
6161     if msg:
6162       logging.error("Instance migration failed, trying to revert"
6163                     " disk status: %s", msg)
6164       self.feedback_fn("Migration failed, aborting")
6165       self._AbortMigration()
6166       self._RevertDiskStatus()
6167       raise errors.OpExecError("Could not migrate instance %s: %s" %
6168                                (instance.name, msg))
6169     time.sleep(10)
6170
6171     instance.primary_node = target_node
6172     # distribute new instance config to the other nodes
6173     self.cfg.Update(instance, self.feedback_fn)
6174
6175     result = self.rpc.call_finalize_migration(target_node,
6176                                               instance,
6177                                               migration_info,
6178                                               True)
6179     msg = result.fail_msg
6180     if msg:
6181       logging.error("Instance migration succeeded, but finalization failed:"
6182                     " %s", msg)
6183       raise errors.OpExecError("Could not finalize instance migration: %s" %
6184                                msg)
6185
6186     self._EnsureSecondary(source_node)
6187     self._WaitUntilSync()
6188     self._GoStandalone()
6189     self._GoReconnect(False)
6190     self._WaitUntilSync()
6191
6192     self.feedback_fn("* done")
6193
6194   def Exec(self, feedback_fn):
6195     """Perform the migration.
6196
6197     """
6198     feedback_fn("Migrating instance %s" % self.instance.name)
6199
6200     self.feedback_fn = feedback_fn
6201
6202     self.source_node = self.instance.primary_node
6203     self.target_node = self.instance.secondary_nodes[0]
6204     self.all_nodes = [self.source_node, self.target_node]
6205     self.nodes_ip = {
6206       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6207       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6208       }
6209
6210     if self.cleanup:
6211       return self._ExecCleanup()
6212     else:
6213       return self._ExecMigration()
6214
6215
6216 def _CreateBlockDev(lu, node, instance, device, force_create,
6217                     info, force_open):
6218   """Create a tree of block devices on a given node.
6219
6220   If this device type has to be created on secondaries, create it and
6221   all its children.
6222
6223   If not, just recurse to children keeping the same 'force' value.
6224
6225   @param lu: the lu on whose behalf we execute
6226   @param node: the node on which to create the device
6227   @type instance: L{objects.Instance}
6228   @param instance: the instance which owns the device
6229   @type device: L{objects.Disk}
6230   @param device: the device to create
6231   @type force_create: boolean
6232   @param force_create: whether to force creation of this device; this
6233       will be change to True whenever we find a device which has
6234       CreateOnSecondary() attribute
6235   @param info: the extra 'metadata' we should attach to the device
6236       (this will be represented as a LVM tag)
6237   @type force_open: boolean
6238   @param force_open: this parameter will be passes to the
6239       L{backend.BlockdevCreate} function where it specifies
6240       whether we run on primary or not, and it affects both
6241       the child assembly and the device own Open() execution
6242
6243   """
6244   if device.CreateOnSecondary():
6245     force_create = True
6246
6247   if device.children:
6248     for child in device.children:
6249       _CreateBlockDev(lu, node, instance, child, force_create,
6250                       info, force_open)
6251
6252   if not force_create:
6253     return
6254
6255   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6256
6257
6258 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6259   """Create a single block device on a given node.
6260
6261   This will not recurse over children of the device, so they must be
6262   created in advance.
6263
6264   @param lu: the lu on whose behalf we execute
6265   @param node: the node on which to create the device
6266   @type instance: L{objects.Instance}
6267   @param instance: the instance which owns the device
6268   @type device: L{objects.Disk}
6269   @param device: the device to create
6270   @param info: the extra 'metadata' we should attach to the device
6271       (this will be represented as a LVM tag)
6272   @type force_open: boolean
6273   @param force_open: this parameter will be passes to the
6274       L{backend.BlockdevCreate} function where it specifies
6275       whether we run on primary or not, and it affects both
6276       the child assembly and the device own Open() execution
6277
6278   """
6279   lu.cfg.SetDiskID(device, node)
6280   result = lu.rpc.call_blockdev_create(node, device, device.size,
6281                                        instance.name, force_open, info)
6282   result.Raise("Can't create block device %s on"
6283                " node %s for instance %s" % (device, node, instance.name))
6284   if device.physical_id is None:
6285     device.physical_id = result.payload
6286
6287
6288 def _GenerateUniqueNames(lu, exts):
6289   """Generate a suitable LV name.
6290
6291   This will generate a logical volume name for the given instance.
6292
6293   """
6294   results = []
6295   for val in exts:
6296     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6297     results.append("%s%s" % (new_id, val))
6298   return results
6299
6300
6301 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6302                          p_minor, s_minor):
6303   """Generate a drbd8 device complete with its children.
6304
6305   """
6306   port = lu.cfg.AllocatePort()
6307   vgname = lu.cfg.GetVGName()
6308   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6309   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6310                           logical_id=(vgname, names[0]))
6311   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6312                           logical_id=(vgname, names[1]))
6313   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6314                           logical_id=(primary, secondary, port,
6315                                       p_minor, s_minor,
6316                                       shared_secret),
6317                           children=[dev_data, dev_meta],
6318                           iv_name=iv_name)
6319   return drbd_dev
6320
6321
6322 def _GenerateDiskTemplate(lu, template_name,
6323                           instance_name, primary_node,
6324                           secondary_nodes, disk_info,
6325                           file_storage_dir, file_driver,
6326                           base_index):
6327   """Generate the entire disk layout for a given template type.
6328
6329   """
6330   #TODO: compute space requirements
6331
6332   vgname = lu.cfg.GetVGName()
6333   disk_count = len(disk_info)
6334   disks = []
6335   if template_name == constants.DT_DISKLESS:
6336     pass
6337   elif template_name == constants.DT_PLAIN:
6338     if len(secondary_nodes) != 0:
6339       raise errors.ProgrammerError("Wrong template configuration")
6340
6341     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6342                                       for i in range(disk_count)])
6343     for idx, disk in enumerate(disk_info):
6344       disk_index = idx + base_index
6345       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6346                               logical_id=(vgname, names[idx]),
6347                               iv_name="disk/%d" % disk_index,
6348                               mode=disk["mode"])
6349       disks.append(disk_dev)
6350   elif template_name == constants.DT_DRBD8:
6351     if len(secondary_nodes) != 1:
6352       raise errors.ProgrammerError("Wrong template configuration")
6353     remote_node = secondary_nodes[0]
6354     minors = lu.cfg.AllocateDRBDMinor(
6355       [primary_node, remote_node] * len(disk_info), instance_name)
6356
6357     names = []
6358     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6359                                                for i in range(disk_count)]):
6360       names.append(lv_prefix + "_data")
6361       names.append(lv_prefix + "_meta")
6362     for idx, disk in enumerate(disk_info):
6363       disk_index = idx + base_index
6364       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6365                                       disk["size"], names[idx*2:idx*2+2],
6366                                       "disk/%d" % disk_index,
6367                                       minors[idx*2], minors[idx*2+1])
6368       disk_dev.mode = disk["mode"]
6369       disks.append(disk_dev)
6370   elif template_name == constants.DT_FILE:
6371     if len(secondary_nodes) != 0:
6372       raise errors.ProgrammerError("Wrong template configuration")
6373
6374     _RequireFileStorage()
6375
6376     for idx, disk in enumerate(disk_info):
6377       disk_index = idx + base_index
6378       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6379                               iv_name="disk/%d" % disk_index,
6380                               logical_id=(file_driver,
6381                                           "%s/disk%d" % (file_storage_dir,
6382                                                          disk_index)),
6383                               mode=disk["mode"])
6384       disks.append(disk_dev)
6385   else:
6386     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6387   return disks
6388
6389
6390 def _GetInstanceInfoText(instance):
6391   """Compute that text that should be added to the disk's metadata.
6392
6393   """
6394   return "originstname+%s" % instance.name
6395
6396
6397 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6398   """Create all disks for an instance.
6399
6400   This abstracts away some work from AddInstance.
6401
6402   @type lu: L{LogicalUnit}
6403   @param lu: the logical unit on whose behalf we execute
6404   @type instance: L{objects.Instance}
6405   @param instance: the instance whose disks we should create
6406   @type to_skip: list
6407   @param to_skip: list of indices to skip
6408   @type target_node: string
6409   @param target_node: if passed, overrides the target node for creation
6410   @rtype: boolean
6411   @return: the success of the creation
6412
6413   """
6414   info = _GetInstanceInfoText(instance)
6415   if target_node is None:
6416     pnode = instance.primary_node
6417     all_nodes = instance.all_nodes
6418   else:
6419     pnode = target_node
6420     all_nodes = [pnode]
6421
6422   if instance.disk_template == constants.DT_FILE:
6423     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6424     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6425
6426     result.Raise("Failed to create directory '%s' on"
6427                  " node %s" % (file_storage_dir, pnode))
6428
6429   # Note: this needs to be kept in sync with adding of disks in
6430   # LUSetInstanceParams
6431   for idx, device in enumerate(instance.disks):
6432     if to_skip and idx in to_skip:
6433       continue
6434     logging.info("Creating volume %s for instance %s",
6435                  device.iv_name, instance.name)
6436     #HARDCODE
6437     for node in all_nodes:
6438       f_create = node == pnode
6439       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6440
6441
6442 def _RemoveDisks(lu, instance, target_node=None):
6443   """Remove all disks for an instance.
6444
6445   This abstracts away some work from `AddInstance()` and
6446   `RemoveInstance()`. Note that in case some of the devices couldn't
6447   be removed, the removal will continue with the other ones (compare
6448   with `_CreateDisks()`).
6449
6450   @type lu: L{LogicalUnit}
6451   @param lu: the logical unit on whose behalf we execute
6452   @type instance: L{objects.Instance}
6453   @param instance: the instance whose disks we should remove
6454   @type target_node: string
6455   @param target_node: used to override the node on which to remove the disks
6456   @rtype: boolean
6457   @return: the success of the removal
6458
6459   """
6460   logging.info("Removing block devices for instance %s", instance.name)
6461
6462   all_result = True
6463   for device in instance.disks:
6464     if target_node:
6465       edata = [(target_node, device)]
6466     else:
6467       edata = device.ComputeNodeTree(instance.primary_node)
6468     for node, disk in edata:
6469       lu.cfg.SetDiskID(disk, node)
6470       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6471       if msg:
6472         lu.LogWarning("Could not remove block device %s on node %s,"
6473                       " continuing anyway: %s", device.iv_name, node, msg)
6474         all_result = False
6475
6476   if instance.disk_template == constants.DT_FILE:
6477     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6478     if target_node:
6479       tgt = target_node
6480     else:
6481       tgt = instance.primary_node
6482     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6483     if result.fail_msg:
6484       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6485                     file_storage_dir, instance.primary_node, result.fail_msg)
6486       all_result = False
6487
6488   return all_result
6489
6490
6491 def _ComputeDiskSize(disk_template, disks):
6492   """Compute disk size requirements in the volume group
6493
6494   """
6495   # Required free disk space as a function of disk and swap space
6496   req_size_dict = {
6497     constants.DT_DISKLESS: None,
6498     constants.DT_PLAIN: sum(d["size"] for d in disks),
6499     # 128 MB are added for drbd metadata for each disk
6500     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6501     constants.DT_FILE: None,
6502   }
6503
6504   if disk_template not in req_size_dict:
6505     raise errors.ProgrammerError("Disk template '%s' size requirement"
6506                                  " is unknown" %  disk_template)
6507
6508   return req_size_dict[disk_template]
6509
6510
6511 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6512   """Hypervisor parameter validation.
6513
6514   This function abstract the hypervisor parameter validation to be
6515   used in both instance create and instance modify.
6516
6517   @type lu: L{LogicalUnit}
6518   @param lu: the logical unit for which we check
6519   @type nodenames: list
6520   @param nodenames: the list of nodes on which we should check
6521   @type hvname: string
6522   @param hvname: the name of the hypervisor we should use
6523   @type hvparams: dict
6524   @param hvparams: the parameters which we need to check
6525   @raise errors.OpPrereqError: if the parameters are not valid
6526
6527   """
6528   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6529                                                   hvname,
6530                                                   hvparams)
6531   for node in nodenames:
6532     info = hvinfo[node]
6533     if info.offline:
6534       continue
6535     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6536
6537
6538 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6539   """OS parameters validation.
6540
6541   @type lu: L{LogicalUnit}
6542   @param lu: the logical unit for which we check
6543   @type required: boolean
6544   @param required: whether the validation should fail if the OS is not
6545       found
6546   @type nodenames: list
6547   @param nodenames: the list of nodes on which we should check
6548   @type osname: string
6549   @param osname: the name of the hypervisor we should use
6550   @type osparams: dict
6551   @param osparams: the parameters which we need to check
6552   @raise errors.OpPrereqError: if the parameters are not valid
6553
6554   """
6555   result = lu.rpc.call_os_validate(required, nodenames, osname,
6556                                    [constants.OS_VALIDATE_PARAMETERS],
6557                                    osparams)
6558   for node, nres in result.items():
6559     # we don't check for offline cases since this should be run only
6560     # against the master node and/or an instance's nodes
6561     nres.Raise("OS Parameters validation failed on node %s" % node)
6562     if not nres.payload:
6563       lu.LogInfo("OS %s not found on node %s, validation skipped",
6564                  osname, node)
6565
6566
6567 class LUCreateInstance(LogicalUnit):
6568   """Create an instance.
6569
6570   """
6571   HPATH = "instance-add"
6572   HTYPE = constants.HTYPE_INSTANCE
6573   _OP_PARAMS = [
6574     _PInstanceName,
6575     ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6576     ("start", True, _TBool),
6577     ("wait_for_sync", True, _TBool),
6578     ("ip_check", True, _TBool),
6579     ("name_check", True, _TBool),
6580     ("disks", _NoDefault, _TListOf(_TDict)),
6581     ("nics", _NoDefault, _TListOf(_TDict)),
6582     ("hvparams", _EmptyDict, _TDict),
6583     ("beparams", _EmptyDict, _TDict),
6584     ("osparams", _EmptyDict, _TDict),
6585     ("no_install", None, _TMaybeBool),
6586     ("os_type", None, _TMaybeString),
6587     ("force_variant", False, _TBool),
6588     ("source_handshake", None, _TOr(_TList, _TNone)),
6589     ("source_x509_ca", None, _TMaybeString),
6590     ("source_instance_name", None, _TMaybeString),
6591     ("src_node", None, _TMaybeString),
6592     ("src_path", None, _TMaybeString),
6593     ("pnode", None, _TMaybeString),
6594     ("snode", None, _TMaybeString),
6595     ("iallocator", None, _TMaybeString),
6596     ("hypervisor", None, _TMaybeString),
6597     ("disk_template", _NoDefault, _CheckDiskTemplate),
6598     ("identify_defaults", False, _TBool),
6599     ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6600     ("file_storage_dir", None, _TMaybeString),
6601     ]
6602   REQ_BGL = False
6603
6604   def CheckArguments(self):
6605     """Check arguments.
6606
6607     """
6608     # do not require name_check to ease forward/backward compatibility
6609     # for tools
6610     if self.op.no_install and self.op.start:
6611       self.LogInfo("No-installation mode selected, disabling startup")
6612       self.op.start = False
6613     # validate/normalize the instance name
6614     self.op.instance_name = \
6615       netutils.HostInfo.NormalizeName(self.op.instance_name)
6616
6617     if self.op.ip_check and not self.op.name_check:
6618       # TODO: make the ip check more flexible and not depend on the name check
6619       raise errors.OpPrereqError("Cannot do ip check without a name check",
6620                                  errors.ECODE_INVAL)
6621
6622     # check nics' parameter names
6623     for nic in self.op.nics:
6624       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6625
6626     # check disks. parameter names and consistent adopt/no-adopt strategy
6627     has_adopt = has_no_adopt = False
6628     for disk in self.op.disks:
6629       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6630       if "adopt" in disk:
6631         has_adopt = True
6632       else:
6633         has_no_adopt = True
6634     if has_adopt and has_no_adopt:
6635       raise errors.OpPrereqError("Either all disks are adopted or none is",
6636                                  errors.ECODE_INVAL)
6637     if has_adopt:
6638       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6639         raise errors.OpPrereqError("Disk adoption is not supported for the"
6640                                    " '%s' disk template" %
6641                                    self.op.disk_template,
6642                                    errors.ECODE_INVAL)
6643       if self.op.iallocator is not None:
6644         raise errors.OpPrereqError("Disk adoption not allowed with an"
6645                                    " iallocator script", errors.ECODE_INVAL)
6646       if self.op.mode == constants.INSTANCE_IMPORT:
6647         raise errors.OpPrereqError("Disk adoption not allowed for"
6648                                    " instance import", errors.ECODE_INVAL)
6649
6650     self.adopt_disks = has_adopt
6651
6652     # instance name verification
6653     if self.op.name_check:
6654       self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6655       self.op.instance_name = self.hostname1.name
6656       # used in CheckPrereq for ip ping check
6657       self.check_ip = self.hostname1.ip
6658     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6659       raise errors.OpPrereqError("Remote imports require names to be checked" %
6660                                  errors.ECODE_INVAL)
6661     else:
6662       self.check_ip = None
6663
6664     # file storage checks
6665     if (self.op.file_driver and
6666         not self.op.file_driver in constants.FILE_DRIVER):
6667       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6668                                  self.op.file_driver, errors.ECODE_INVAL)
6669
6670     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6671       raise errors.OpPrereqError("File storage directory path not absolute",
6672                                  errors.ECODE_INVAL)
6673
6674     ### Node/iallocator related checks
6675     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6676
6677     if self.op.pnode is not None:
6678       if self.op.disk_template in constants.DTS_NET_MIRROR:
6679         if self.op.snode is None:
6680           raise errors.OpPrereqError("The networked disk templates need"
6681                                      " a mirror node", errors.ECODE_INVAL)
6682       elif self.op.snode:
6683         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6684                         " template")
6685         self.op.snode = None
6686
6687     self._cds = _GetClusterDomainSecret()
6688
6689     if self.op.mode == constants.INSTANCE_IMPORT:
6690       # On import force_variant must be True, because if we forced it at
6691       # initial install, our only chance when importing it back is that it
6692       # works again!
6693       self.op.force_variant = True
6694
6695       if self.op.no_install:
6696         self.LogInfo("No-installation mode has no effect during import")
6697
6698     elif self.op.mode == constants.INSTANCE_CREATE:
6699       if self.op.os_type is None:
6700         raise errors.OpPrereqError("No guest OS specified",
6701                                    errors.ECODE_INVAL)
6702       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_oss:
6703         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6704                                    " installation" % self.op.os_type,
6705                                    errors.ECODE_STATE)
6706       if self.op.disk_template is None:
6707         raise errors.OpPrereqError("No disk template specified",
6708                                    errors.ECODE_INVAL)
6709
6710     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6711       # Check handshake to ensure both clusters have the same domain secret
6712       src_handshake = self.op.source_handshake
6713       if not src_handshake:
6714         raise errors.OpPrereqError("Missing source handshake",
6715                                    errors.ECODE_INVAL)
6716
6717       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6718                                                            src_handshake)
6719       if errmsg:
6720         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6721                                    errors.ECODE_INVAL)
6722
6723       # Load and check source CA
6724       self.source_x509_ca_pem = self.op.source_x509_ca
6725       if not self.source_x509_ca_pem:
6726         raise errors.OpPrereqError("Missing source X509 CA",
6727                                    errors.ECODE_INVAL)
6728
6729       try:
6730         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6731                                                     self._cds)
6732       except OpenSSL.crypto.Error, err:
6733         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6734                                    (err, ), errors.ECODE_INVAL)
6735
6736       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6737       if errcode is not None:
6738         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6739                                    errors.ECODE_INVAL)
6740
6741       self.source_x509_ca = cert
6742
6743       src_instance_name = self.op.source_instance_name
6744       if not src_instance_name:
6745         raise errors.OpPrereqError("Missing source instance name",
6746                                    errors.ECODE_INVAL)
6747
6748       norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6749       self.source_instance_name = netutils.GetHostInfo(norm_name).name
6750
6751     else:
6752       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6753                                  self.op.mode, errors.ECODE_INVAL)
6754
6755   def ExpandNames(self):
6756     """ExpandNames for CreateInstance.
6757
6758     Figure out the right locks for instance creation.
6759
6760     """
6761     self.needed_locks = {}
6762
6763     instance_name = self.op.instance_name
6764     # this is just a preventive check, but someone might still add this
6765     # instance in the meantime, and creation will fail at lock-add time
6766     if instance_name in self.cfg.GetInstanceList():
6767       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6768                                  instance_name, errors.ECODE_EXISTS)
6769
6770     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6771
6772     if self.op.iallocator:
6773       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6774     else:
6775       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6776       nodelist = [self.op.pnode]
6777       if self.op.snode is not None:
6778         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6779         nodelist.append(self.op.snode)
6780       self.needed_locks[locking.LEVEL_NODE] = nodelist
6781
6782     # in case of import lock the source node too
6783     if self.op.mode == constants.INSTANCE_IMPORT:
6784       src_node = self.op.src_node
6785       src_path = self.op.src_path
6786
6787       if src_path is None:
6788         self.op.src_path = src_path = self.op.instance_name
6789
6790       if src_node is None:
6791         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6792         self.op.src_node = None
6793         if os.path.isabs(src_path):
6794           raise errors.OpPrereqError("Importing an instance from an absolute"
6795                                      " path requires a source node option.",
6796                                      errors.ECODE_INVAL)
6797       else:
6798         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6799         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6800           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6801         if not os.path.isabs(src_path):
6802           self.op.src_path = src_path = \
6803             utils.PathJoin(constants.EXPORT_DIR, src_path)
6804
6805   def _RunAllocator(self):
6806     """Run the allocator based on input opcode.
6807
6808     """
6809     nics = [n.ToDict() for n in self.nics]
6810     ial = IAllocator(self.cfg, self.rpc,
6811                      mode=constants.IALLOCATOR_MODE_ALLOC,
6812                      name=self.op.instance_name,
6813                      disk_template=self.op.disk_template,
6814                      tags=[],
6815                      os=self.op.os_type,
6816                      vcpus=self.be_full[constants.BE_VCPUS],
6817                      mem_size=self.be_full[constants.BE_MEMORY],
6818                      disks=self.disks,
6819                      nics=nics,
6820                      hypervisor=self.op.hypervisor,
6821                      )
6822
6823     ial.Run(self.op.iallocator)
6824
6825     if not ial.success:
6826       raise errors.OpPrereqError("Can't compute nodes using"
6827                                  " iallocator '%s': %s" %
6828                                  (self.op.iallocator, ial.info),
6829                                  errors.ECODE_NORES)
6830     if len(ial.result) != ial.required_nodes:
6831       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6832                                  " of nodes (%s), required %s" %
6833                                  (self.op.iallocator, len(ial.result),
6834                                   ial.required_nodes), errors.ECODE_FAULT)
6835     self.op.pnode = ial.result[0]
6836     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6837                  self.op.instance_name, self.op.iallocator,
6838                  utils.CommaJoin(ial.result))
6839     if ial.required_nodes == 2:
6840       self.op.snode = ial.result[1]
6841
6842   def BuildHooksEnv(self):
6843     """Build hooks env.
6844
6845     This runs on master, primary and secondary nodes of the instance.
6846
6847     """
6848     env = {
6849       "ADD_MODE": self.op.mode,
6850       }
6851     if self.op.mode == constants.INSTANCE_IMPORT:
6852       env["SRC_NODE"] = self.op.src_node
6853       env["SRC_PATH"] = self.op.src_path
6854       env["SRC_IMAGES"] = self.src_images
6855
6856     env.update(_BuildInstanceHookEnv(
6857       name=self.op.instance_name,
6858       primary_node=self.op.pnode,
6859       secondary_nodes=self.secondaries,
6860       status=self.op.start,
6861       os_type=self.op.os_type,
6862       memory=self.be_full[constants.BE_MEMORY],
6863       vcpus=self.be_full[constants.BE_VCPUS],
6864       nics=_NICListToTuple(self, self.nics),
6865       disk_template=self.op.disk_template,
6866       disks=[(d["size"], d["mode"]) for d in self.disks],
6867       bep=self.be_full,
6868       hvp=self.hv_full,
6869       hypervisor_name=self.op.hypervisor,
6870     ))
6871
6872     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6873           self.secondaries)
6874     return env, nl, nl
6875
6876   def _ReadExportInfo(self):
6877     """Reads the export information from disk.
6878
6879     It will override the opcode source node and path with the actual
6880     information, if these two were not specified before.
6881
6882     @return: the export information
6883
6884     """
6885     assert self.op.mode == constants.INSTANCE_IMPORT
6886
6887     src_node = self.op.src_node
6888     src_path = self.op.src_path
6889
6890     if src_node is None:
6891       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6892       exp_list = self.rpc.call_export_list(locked_nodes)
6893       found = False
6894       for node in exp_list:
6895         if exp_list[node].fail_msg:
6896           continue
6897         if src_path in exp_list[node].payload:
6898           found = True
6899           self.op.src_node = src_node = node
6900           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6901                                                        src_path)
6902           break
6903       if not found:
6904         raise errors.OpPrereqError("No export found for relative path %s" %
6905                                     src_path, errors.ECODE_INVAL)
6906
6907     _CheckNodeOnline(self, src_node)
6908     result = self.rpc.call_export_info(src_node, src_path)
6909     result.Raise("No export or invalid export found in dir %s" % src_path)
6910
6911     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6912     if not export_info.has_section(constants.INISECT_EXP):
6913       raise errors.ProgrammerError("Corrupted export config",
6914                                    errors.ECODE_ENVIRON)
6915
6916     ei_version = export_info.get(constants.INISECT_EXP, "version")
6917     if (int(ei_version) != constants.EXPORT_VERSION):
6918       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6919                                  (ei_version, constants.EXPORT_VERSION),
6920                                  errors.ECODE_ENVIRON)
6921     return export_info
6922
6923   def _ReadExportParams(self, einfo):
6924     """Use export parameters as defaults.
6925
6926     In case the opcode doesn't specify (as in override) some instance
6927     parameters, then try to use them from the export information, if
6928     that declares them.
6929
6930     """
6931     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6932
6933     if self.op.disk_template is None:
6934       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6935         self.op.disk_template = einfo.get(constants.INISECT_INS,
6936                                           "disk_template")
6937       else:
6938         raise errors.OpPrereqError("No disk template specified and the export"
6939                                    " is missing the disk_template information",
6940                                    errors.ECODE_INVAL)
6941
6942     if not self.op.disks:
6943       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6944         disks = []
6945         # TODO: import the disk iv_name too
6946         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6947           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6948           disks.append({"size": disk_sz})
6949         self.op.disks = disks
6950       else:
6951         raise errors.OpPrereqError("No disk info specified and the export"
6952                                    " is missing the disk information",
6953                                    errors.ECODE_INVAL)
6954
6955     if (not self.op.nics and
6956         einfo.has_option(constants.INISECT_INS, "nic_count")):
6957       nics = []
6958       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6959         ndict = {}
6960         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6961           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6962           ndict[name] = v
6963         nics.append(ndict)
6964       self.op.nics = nics
6965
6966     if (self.op.hypervisor is None and
6967         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6968       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6969     if einfo.has_section(constants.INISECT_HYP):
6970       # use the export parameters but do not override the ones
6971       # specified by the user
6972       for name, value in einfo.items(constants.INISECT_HYP):
6973         if name not in self.op.hvparams:
6974           self.op.hvparams[name] = value
6975
6976     if einfo.has_section(constants.INISECT_BEP):
6977       # use the parameters, without overriding
6978       for name, value in einfo.items(constants.INISECT_BEP):
6979         if name not in self.op.beparams:
6980           self.op.beparams[name] = value
6981     else:
6982       # try to read the parameters old style, from the main section
6983       for name in constants.BES_PARAMETERS:
6984         if (name not in self.op.beparams and
6985             einfo.has_option(constants.INISECT_INS, name)):
6986           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6987
6988     if einfo.has_section(constants.INISECT_OSP):
6989       # use the parameters, without overriding
6990       for name, value in einfo.items(constants.INISECT_OSP):
6991         if name not in self.op.osparams:
6992           self.op.osparams[name] = value
6993
6994   def _RevertToDefaults(self, cluster):
6995     """Revert the instance parameters to the default values.
6996
6997     """
6998     # hvparams
6999     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7000     for name in self.op.hvparams.keys():
7001       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7002         del self.op.hvparams[name]
7003     # beparams
7004     be_defs = cluster.SimpleFillBE({})
7005     for name in self.op.beparams.keys():
7006       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7007         del self.op.beparams[name]
7008     # nic params
7009     nic_defs = cluster.SimpleFillNIC({})
7010     for nic in self.op.nics:
7011       for name in constants.NICS_PARAMETERS:
7012         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7013           del nic[name]
7014     # osparams
7015     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7016     for name in self.op.osparams.keys():
7017       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7018         del self.op.osparams[name]
7019
7020   def CheckPrereq(self):
7021     """Check prerequisites.
7022
7023     """
7024     if self.op.mode == constants.INSTANCE_IMPORT:
7025       export_info = self._ReadExportInfo()
7026       self._ReadExportParams(export_info)
7027
7028     _CheckDiskTemplate(self.op.disk_template)
7029
7030     if (not self.cfg.GetVGName() and
7031         self.op.disk_template not in constants.DTS_NOT_LVM):
7032       raise errors.OpPrereqError("Cluster does not support lvm-based"
7033                                  " instances", errors.ECODE_STATE)
7034
7035     if self.op.hypervisor is None:
7036       self.op.hypervisor = self.cfg.GetHypervisorType()
7037
7038     cluster = self.cfg.GetClusterInfo()
7039     enabled_hvs = cluster.enabled_hypervisors
7040     if self.op.hypervisor not in enabled_hvs:
7041       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7042                                  " cluster (%s)" % (self.op.hypervisor,
7043                                   ",".join(enabled_hvs)),
7044                                  errors.ECODE_STATE)
7045
7046     # check hypervisor parameter syntax (locally)
7047     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7048     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7049                                       self.op.hvparams)
7050     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7051     hv_type.CheckParameterSyntax(filled_hvp)
7052     self.hv_full = filled_hvp
7053     # check that we don't specify global parameters on an instance
7054     _CheckGlobalHvParams(self.op.hvparams)
7055
7056     # fill and remember the beparams dict
7057     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7058     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7059
7060     # build os parameters
7061     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7062
7063     # now that hvp/bep are in final format, let's reset to defaults,
7064     # if told to do so
7065     if self.op.identify_defaults:
7066       self._RevertToDefaults(cluster)
7067
7068     # NIC buildup
7069     self.nics = []
7070     for idx, nic in enumerate(self.op.nics):
7071       nic_mode_req = nic.get("mode", None)
7072       nic_mode = nic_mode_req
7073       if nic_mode is None:
7074         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7075
7076       # in routed mode, for the first nic, the default ip is 'auto'
7077       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7078         default_ip_mode = constants.VALUE_AUTO
7079       else:
7080         default_ip_mode = constants.VALUE_NONE
7081
7082       # ip validity checks
7083       ip = nic.get("ip", default_ip_mode)
7084       if ip is None or ip.lower() == constants.VALUE_NONE:
7085         nic_ip = None
7086       elif ip.lower() == constants.VALUE_AUTO:
7087         if not self.op.name_check:
7088           raise errors.OpPrereqError("IP address set to auto but name checks"
7089                                      " have been skipped. Aborting.",
7090                                      errors.ECODE_INVAL)
7091         nic_ip = self.hostname1.ip
7092       else:
7093         if not netutils.IsValidIP4(ip):
7094           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7095                                      " like a valid IP" % ip,
7096                                      errors.ECODE_INVAL)
7097         nic_ip = ip
7098
7099       # TODO: check the ip address for uniqueness
7100       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7101         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7102                                    errors.ECODE_INVAL)
7103
7104       # MAC address verification
7105       mac = nic.get("mac", constants.VALUE_AUTO)
7106       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7107         mac = utils.NormalizeAndValidateMac(mac)
7108
7109         try:
7110           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7111         except errors.ReservationError:
7112           raise errors.OpPrereqError("MAC address %s already in use"
7113                                      " in cluster" % mac,
7114                                      errors.ECODE_NOTUNIQUE)
7115
7116       # bridge verification
7117       bridge = nic.get("bridge", None)
7118       link = nic.get("link", None)
7119       if bridge and link:
7120         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7121                                    " at the same time", errors.ECODE_INVAL)
7122       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7123         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7124                                    errors.ECODE_INVAL)
7125       elif bridge:
7126         link = bridge
7127
7128       nicparams = {}
7129       if nic_mode_req:
7130         nicparams[constants.NIC_MODE] = nic_mode_req
7131       if link:
7132         nicparams[constants.NIC_LINK] = link
7133
7134       check_params = cluster.SimpleFillNIC(nicparams)
7135       objects.NIC.CheckParameterSyntax(check_params)
7136       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7137
7138     # disk checks/pre-build
7139     self.disks = []
7140     for disk in self.op.disks:
7141       mode = disk.get("mode", constants.DISK_RDWR)
7142       if mode not in constants.DISK_ACCESS_SET:
7143         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7144                                    mode, errors.ECODE_INVAL)
7145       size = disk.get("size", None)
7146       if size is None:
7147         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7148       try:
7149         size = int(size)
7150       except (TypeError, ValueError):
7151         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7152                                    errors.ECODE_INVAL)
7153       new_disk = {"size": size, "mode": mode}
7154       if "adopt" in disk:
7155         new_disk["adopt"] = disk["adopt"]
7156       self.disks.append(new_disk)
7157
7158     if self.op.mode == constants.INSTANCE_IMPORT:
7159
7160       # Check that the new instance doesn't have less disks than the export
7161       instance_disks = len(self.disks)
7162       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7163       if instance_disks < export_disks:
7164         raise errors.OpPrereqError("Not enough disks to import."
7165                                    " (instance: %d, export: %d)" %
7166                                    (instance_disks, export_disks),
7167                                    errors.ECODE_INVAL)
7168
7169       disk_images = []
7170       for idx in range(export_disks):
7171         option = 'disk%d_dump' % idx
7172         if export_info.has_option(constants.INISECT_INS, option):
7173           # FIXME: are the old os-es, disk sizes, etc. useful?
7174           export_name = export_info.get(constants.INISECT_INS, option)
7175           image = utils.PathJoin(self.op.src_path, export_name)
7176           disk_images.append(image)
7177         else:
7178           disk_images.append(False)
7179
7180       self.src_images = disk_images
7181
7182       old_name = export_info.get(constants.INISECT_INS, 'name')
7183       try:
7184         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7185       except (TypeError, ValueError), err:
7186         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7187                                    " an integer: %s" % str(err),
7188                                    errors.ECODE_STATE)
7189       if self.op.instance_name == old_name:
7190         for idx, nic in enumerate(self.nics):
7191           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7192             nic_mac_ini = 'nic%d_mac' % idx
7193             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7194
7195     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7196
7197     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7198     if self.op.ip_check:
7199       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7200         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7201                                    (self.check_ip, self.op.instance_name),
7202                                    errors.ECODE_NOTUNIQUE)
7203
7204     #### mac address generation
7205     # By generating here the mac address both the allocator and the hooks get
7206     # the real final mac address rather than the 'auto' or 'generate' value.
7207     # There is a race condition between the generation and the instance object
7208     # creation, which means that we know the mac is valid now, but we're not
7209     # sure it will be when we actually add the instance. If things go bad
7210     # adding the instance will abort because of a duplicate mac, and the
7211     # creation job will fail.
7212     for nic in self.nics:
7213       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7214         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7215
7216     #### allocator run
7217
7218     if self.op.iallocator is not None:
7219       self._RunAllocator()
7220
7221     #### node related checks
7222
7223     # check primary node
7224     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7225     assert self.pnode is not None, \
7226       "Cannot retrieve locked node %s" % self.op.pnode
7227     if pnode.offline:
7228       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7229                                  pnode.name, errors.ECODE_STATE)
7230     if pnode.drained:
7231       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7232                                  pnode.name, errors.ECODE_STATE)
7233
7234     self.secondaries = []
7235
7236     # mirror node verification
7237     if self.op.disk_template in constants.DTS_NET_MIRROR:
7238       if self.op.snode == pnode.name:
7239         raise errors.OpPrereqError("The secondary node cannot be the"
7240                                    " primary node.", errors.ECODE_INVAL)
7241       _CheckNodeOnline(self, self.op.snode)
7242       _CheckNodeNotDrained(self, self.op.snode)
7243       self.secondaries.append(self.op.snode)
7244
7245     nodenames = [pnode.name] + self.secondaries
7246
7247     req_size = _ComputeDiskSize(self.op.disk_template,
7248                                 self.disks)
7249
7250     # Check lv size requirements, if not adopting
7251     if req_size is not None and not self.adopt_disks:
7252       _CheckNodesFreeDisk(self, nodenames, req_size)
7253
7254     if self.adopt_disks: # instead, we must check the adoption data
7255       all_lvs = set([i["adopt"] for i in self.disks])
7256       if len(all_lvs) != len(self.disks):
7257         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7258                                    errors.ECODE_INVAL)
7259       for lv_name in all_lvs:
7260         try:
7261           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7262         except errors.ReservationError:
7263           raise errors.OpPrereqError("LV named %s used by another instance" %
7264                                      lv_name, errors.ECODE_NOTUNIQUE)
7265
7266       node_lvs = self.rpc.call_lv_list([pnode.name],
7267                                        self.cfg.GetVGName())[pnode.name]
7268       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7269       node_lvs = node_lvs.payload
7270       delta = all_lvs.difference(node_lvs.keys())
7271       if delta:
7272         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7273                                    utils.CommaJoin(delta),
7274                                    errors.ECODE_INVAL)
7275       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7276       if online_lvs:
7277         raise errors.OpPrereqError("Online logical volumes found, cannot"
7278                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7279                                    errors.ECODE_STATE)
7280       # update the size of disk based on what is found
7281       for dsk in self.disks:
7282         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7283
7284     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7285
7286     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7287     # check OS parameters (remotely)
7288     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7289
7290     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7291
7292     # memory check on primary node
7293     if self.op.start:
7294       _CheckNodeFreeMemory(self, self.pnode.name,
7295                            "creating instance %s" % self.op.instance_name,
7296                            self.be_full[constants.BE_MEMORY],
7297                            self.op.hypervisor)
7298
7299     self.dry_run_result = list(nodenames)
7300
7301   def Exec(self, feedback_fn):
7302     """Create and add the instance to the cluster.
7303
7304     """
7305     instance = self.op.instance_name
7306     pnode_name = self.pnode.name
7307
7308     ht_kind = self.op.hypervisor
7309     if ht_kind in constants.HTS_REQ_PORT:
7310       network_port = self.cfg.AllocatePort()
7311     else:
7312       network_port = None
7313
7314     if constants.ENABLE_FILE_STORAGE:
7315       # this is needed because os.path.join does not accept None arguments
7316       if self.op.file_storage_dir is None:
7317         string_file_storage_dir = ""
7318       else:
7319         string_file_storage_dir = self.op.file_storage_dir
7320
7321       # build the full file storage dir path
7322       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7323                                         string_file_storage_dir, instance)
7324     else:
7325       file_storage_dir = ""
7326
7327     disks = _GenerateDiskTemplate(self,
7328                                   self.op.disk_template,
7329                                   instance, pnode_name,
7330                                   self.secondaries,
7331                                   self.disks,
7332                                   file_storage_dir,
7333                                   self.op.file_driver,
7334                                   0)
7335
7336     iobj = objects.Instance(name=instance, os=self.op.os_type,
7337                             primary_node=pnode_name,
7338                             nics=self.nics, disks=disks,
7339                             disk_template=self.op.disk_template,
7340                             admin_up=False,
7341                             network_port=network_port,
7342                             beparams=self.op.beparams,
7343                             hvparams=self.op.hvparams,
7344                             hypervisor=self.op.hypervisor,
7345                             osparams=self.op.osparams,
7346                             )
7347
7348     if self.adopt_disks:
7349       # rename LVs to the newly-generated names; we need to construct
7350       # 'fake' LV disks with the old data, plus the new unique_id
7351       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7352       rename_to = []
7353       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7354         rename_to.append(t_dsk.logical_id)
7355         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7356         self.cfg.SetDiskID(t_dsk, pnode_name)
7357       result = self.rpc.call_blockdev_rename(pnode_name,
7358                                              zip(tmp_disks, rename_to))
7359       result.Raise("Failed to rename adoped LVs")
7360     else:
7361       feedback_fn("* creating instance disks...")
7362       try:
7363         _CreateDisks(self, iobj)
7364       except errors.OpExecError:
7365         self.LogWarning("Device creation failed, reverting...")
7366         try:
7367           _RemoveDisks(self, iobj)
7368         finally:
7369           self.cfg.ReleaseDRBDMinors(instance)
7370           raise
7371
7372     feedback_fn("adding instance %s to cluster config" % instance)
7373
7374     self.cfg.AddInstance(iobj, self.proc.GetECId())
7375
7376     # Declare that we don't want to remove the instance lock anymore, as we've
7377     # added the instance to the config
7378     del self.remove_locks[locking.LEVEL_INSTANCE]
7379     # Unlock all the nodes
7380     if self.op.mode == constants.INSTANCE_IMPORT:
7381       nodes_keep = [self.op.src_node]
7382       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7383                        if node != self.op.src_node]
7384       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7385       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7386     else:
7387       self.context.glm.release(locking.LEVEL_NODE)
7388       del self.acquired_locks[locking.LEVEL_NODE]
7389
7390     if self.op.wait_for_sync:
7391       disk_abort = not _WaitForSync(self, iobj)
7392     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7393       # make sure the disks are not degraded (still sync-ing is ok)
7394       time.sleep(15)
7395       feedback_fn("* checking mirrors status")
7396       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7397     else:
7398       disk_abort = False
7399
7400     if disk_abort:
7401       _RemoveDisks(self, iobj)
7402       self.cfg.RemoveInstance(iobj.name)
7403       # Make sure the instance lock gets removed
7404       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7405       raise errors.OpExecError("There are some degraded disks for"
7406                                " this instance")
7407
7408     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7409       if self.op.mode == constants.INSTANCE_CREATE:
7410         if not self.op.no_install:
7411           feedback_fn("* running the instance OS create scripts...")
7412           # FIXME: pass debug option from opcode to backend
7413           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7414                                                  self.op.debug_level)
7415           result.Raise("Could not add os for instance %s"
7416                        " on node %s" % (instance, pnode_name))
7417
7418       elif self.op.mode == constants.INSTANCE_IMPORT:
7419         feedback_fn("* running the instance OS import scripts...")
7420
7421         transfers = []
7422
7423         for idx, image in enumerate(self.src_images):
7424           if not image:
7425             continue
7426
7427           # FIXME: pass debug option from opcode to backend
7428           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7429                                              constants.IEIO_FILE, (image, ),
7430                                              constants.IEIO_SCRIPT,
7431                                              (iobj.disks[idx], idx),
7432                                              None)
7433           transfers.append(dt)
7434
7435         import_result = \
7436           masterd.instance.TransferInstanceData(self, feedback_fn,
7437                                                 self.op.src_node, pnode_name,
7438                                                 self.pnode.secondary_ip,
7439                                                 iobj, transfers)
7440         if not compat.all(import_result):
7441           self.LogWarning("Some disks for instance %s on node %s were not"
7442                           " imported successfully" % (instance, pnode_name))
7443
7444       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7445         feedback_fn("* preparing remote import...")
7446         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7447         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7448
7449         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7450                                                      self.source_x509_ca,
7451                                                      self._cds, timeouts)
7452         if not compat.all(disk_results):
7453           # TODO: Should the instance still be started, even if some disks
7454           # failed to import (valid for local imports, too)?
7455           self.LogWarning("Some disks for instance %s on node %s were not"
7456                           " imported successfully" % (instance, pnode_name))
7457
7458         # Run rename script on newly imported instance
7459         assert iobj.name == instance
7460         feedback_fn("Running rename script for %s" % instance)
7461         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7462                                                    self.source_instance_name,
7463                                                    self.op.debug_level)
7464         if result.fail_msg:
7465           self.LogWarning("Failed to run rename script for %s on node"
7466                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7467
7468       else:
7469         # also checked in the prereq part
7470         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7471                                      % self.op.mode)
7472
7473     if self.op.start:
7474       iobj.admin_up = True
7475       self.cfg.Update(iobj, feedback_fn)
7476       logging.info("Starting instance %s on node %s", instance, pnode_name)
7477       feedback_fn("* starting instance...")
7478       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7479       result.Raise("Could not start instance")
7480
7481     return list(iobj.all_nodes)
7482
7483
7484 class LUConnectConsole(NoHooksLU):
7485   """Connect to an instance's console.
7486
7487   This is somewhat special in that it returns the command line that
7488   you need to run on the master node in order to connect to the
7489   console.
7490
7491   """
7492   _OP_PARAMS = [
7493     _PInstanceName
7494     ]
7495   REQ_BGL = False
7496
7497   def ExpandNames(self):
7498     self._ExpandAndLockInstance()
7499
7500   def CheckPrereq(self):
7501     """Check prerequisites.
7502
7503     This checks that the instance is in the cluster.
7504
7505     """
7506     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7507     assert self.instance is not None, \
7508       "Cannot retrieve locked instance %s" % self.op.instance_name
7509     _CheckNodeOnline(self, self.instance.primary_node)
7510
7511   def Exec(self, feedback_fn):
7512     """Connect to the console of an instance
7513
7514     """
7515     instance = self.instance
7516     node = instance.primary_node
7517
7518     node_insts = self.rpc.call_instance_list([node],
7519                                              [instance.hypervisor])[node]
7520     node_insts.Raise("Can't get node information from %s" % node)
7521
7522     if instance.name not in node_insts.payload:
7523       raise errors.OpExecError("Instance %s is not running." % instance.name)
7524
7525     logging.debug("Connecting to console of %s on %s", instance.name, node)
7526
7527     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7528     cluster = self.cfg.GetClusterInfo()
7529     # beparams and hvparams are passed separately, to avoid editing the
7530     # instance and then saving the defaults in the instance itself.
7531     hvparams = cluster.FillHV(instance)
7532     beparams = cluster.FillBE(instance)
7533     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7534
7535     # build ssh cmdline
7536     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7537
7538
7539 class LUReplaceDisks(LogicalUnit):
7540   """Replace the disks of an instance.
7541
7542   """
7543   HPATH = "mirrors-replace"
7544   HTYPE = constants.HTYPE_INSTANCE
7545   _OP_PARAMS = [
7546     _PInstanceName,
7547     ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7548     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7549     ("remote_node", None, _TMaybeString),
7550     ("iallocator", None, _TMaybeString),
7551     ("early_release", False, _TBool),
7552     ]
7553   REQ_BGL = False
7554
7555   def CheckArguments(self):
7556     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7557                                   self.op.iallocator)
7558
7559   def ExpandNames(self):
7560     self._ExpandAndLockInstance()
7561
7562     if self.op.iallocator is not None:
7563       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7564
7565     elif self.op.remote_node is not None:
7566       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7567       self.op.remote_node = remote_node
7568
7569       # Warning: do not remove the locking of the new secondary here
7570       # unless DRBD8.AddChildren is changed to work in parallel;
7571       # currently it doesn't since parallel invocations of
7572       # FindUnusedMinor will conflict
7573       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7574       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7575
7576     else:
7577       self.needed_locks[locking.LEVEL_NODE] = []
7578       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7579
7580     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7581                                    self.op.iallocator, self.op.remote_node,
7582                                    self.op.disks, False, self.op.early_release)
7583
7584     self.tasklets = [self.replacer]
7585
7586   def DeclareLocks(self, level):
7587     # If we're not already locking all nodes in the set we have to declare the
7588     # instance's primary/secondary nodes.
7589     if (level == locking.LEVEL_NODE and
7590         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7591       self._LockInstancesNodes()
7592
7593   def BuildHooksEnv(self):
7594     """Build hooks env.
7595
7596     This runs on the master, the primary and all the secondaries.
7597
7598     """
7599     instance = self.replacer.instance
7600     env = {
7601       "MODE": self.op.mode,
7602       "NEW_SECONDARY": self.op.remote_node,
7603       "OLD_SECONDARY": instance.secondary_nodes[0],
7604       }
7605     env.update(_BuildInstanceHookEnvByObject(self, instance))
7606     nl = [
7607       self.cfg.GetMasterNode(),
7608       instance.primary_node,
7609       ]
7610     if self.op.remote_node is not None:
7611       nl.append(self.op.remote_node)
7612     return env, nl, nl
7613
7614
7615 class TLReplaceDisks(Tasklet):
7616   """Replaces disks for an instance.
7617
7618   Note: Locking is not within the scope of this class.
7619
7620   """
7621   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7622                disks, delay_iallocator, early_release):
7623     """Initializes this class.
7624
7625     """
7626     Tasklet.__init__(self, lu)
7627
7628     # Parameters
7629     self.instance_name = instance_name
7630     self.mode = mode
7631     self.iallocator_name = iallocator_name
7632     self.remote_node = remote_node
7633     self.disks = disks
7634     self.delay_iallocator = delay_iallocator
7635     self.early_release = early_release
7636
7637     # Runtime data
7638     self.instance = None
7639     self.new_node = None
7640     self.target_node = None
7641     self.other_node = None
7642     self.remote_node_info = None
7643     self.node_secondary_ip = None
7644
7645   @staticmethod
7646   def CheckArguments(mode, remote_node, iallocator):
7647     """Helper function for users of this class.
7648
7649     """
7650     # check for valid parameter combination
7651     if mode == constants.REPLACE_DISK_CHG:
7652       if remote_node is None and iallocator is None:
7653         raise errors.OpPrereqError("When changing the secondary either an"
7654                                    " iallocator script must be used or the"
7655                                    " new node given", errors.ECODE_INVAL)
7656
7657       if remote_node is not None and iallocator is not None:
7658         raise errors.OpPrereqError("Give either the iallocator or the new"
7659                                    " secondary, not both", errors.ECODE_INVAL)
7660
7661     elif remote_node is not None or iallocator is not None:
7662       # Not replacing the secondary
7663       raise errors.OpPrereqError("The iallocator and new node options can"
7664                                  " only be used when changing the"
7665                                  " secondary node", errors.ECODE_INVAL)
7666
7667   @staticmethod
7668   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7669     """Compute a new secondary node using an IAllocator.
7670
7671     """
7672     ial = IAllocator(lu.cfg, lu.rpc,
7673                      mode=constants.IALLOCATOR_MODE_RELOC,
7674                      name=instance_name,
7675                      relocate_from=relocate_from)
7676
7677     ial.Run(iallocator_name)
7678
7679     if not ial.success:
7680       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7681                                  " %s" % (iallocator_name, ial.info),
7682                                  errors.ECODE_NORES)
7683
7684     if len(ial.result) != ial.required_nodes:
7685       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7686                                  " of nodes (%s), required %s" %
7687                                  (iallocator_name,
7688                                   len(ial.result), ial.required_nodes),
7689                                  errors.ECODE_FAULT)
7690
7691     remote_node_name = ial.result[0]
7692
7693     lu.LogInfo("Selected new secondary for instance '%s': %s",
7694                instance_name, remote_node_name)
7695
7696     return remote_node_name
7697
7698   def _FindFaultyDisks(self, node_name):
7699     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7700                                     node_name, True)
7701
7702   def CheckPrereq(self):
7703     """Check prerequisites.
7704
7705     This checks that the instance is in the cluster.
7706
7707     """
7708     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7709     assert instance is not None, \
7710       "Cannot retrieve locked instance %s" % self.instance_name
7711
7712     if instance.disk_template != constants.DT_DRBD8:
7713       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7714                                  " instances", errors.ECODE_INVAL)
7715
7716     if len(instance.secondary_nodes) != 1:
7717       raise errors.OpPrereqError("The instance has a strange layout,"
7718                                  " expected one secondary but found %d" %
7719                                  len(instance.secondary_nodes),
7720                                  errors.ECODE_FAULT)
7721
7722     if not self.delay_iallocator:
7723       self._CheckPrereq2()
7724
7725   def _CheckPrereq2(self):
7726     """Check prerequisites, second part.
7727
7728     This function should always be part of CheckPrereq. It was separated and is
7729     now called from Exec because during node evacuation iallocator was only
7730     called with an unmodified cluster model, not taking planned changes into
7731     account.
7732
7733     """
7734     instance = self.instance
7735     secondary_node = instance.secondary_nodes[0]
7736
7737     if self.iallocator_name is None:
7738       remote_node = self.remote_node
7739     else:
7740       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7741                                        instance.name, instance.secondary_nodes)
7742
7743     if remote_node is not None:
7744       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7745       assert self.remote_node_info is not None, \
7746         "Cannot retrieve locked node %s" % remote_node
7747     else:
7748       self.remote_node_info = None
7749
7750     if remote_node == self.instance.primary_node:
7751       raise errors.OpPrereqError("The specified node is the primary node of"
7752                                  " the instance.", errors.ECODE_INVAL)
7753
7754     if remote_node == secondary_node:
7755       raise errors.OpPrereqError("The specified node is already the"
7756                                  " secondary node of the instance.",
7757                                  errors.ECODE_INVAL)
7758
7759     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7760                                     constants.REPLACE_DISK_CHG):
7761       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7762                                  errors.ECODE_INVAL)
7763
7764     if self.mode == constants.REPLACE_DISK_AUTO:
7765       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7766       faulty_secondary = self._FindFaultyDisks(secondary_node)
7767
7768       if faulty_primary and faulty_secondary:
7769         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7770                                    " one node and can not be repaired"
7771                                    " automatically" % self.instance_name,
7772                                    errors.ECODE_STATE)
7773
7774       if faulty_primary:
7775         self.disks = faulty_primary
7776         self.target_node = instance.primary_node
7777         self.other_node = secondary_node
7778         check_nodes = [self.target_node, self.other_node]
7779       elif faulty_secondary:
7780         self.disks = faulty_secondary
7781         self.target_node = secondary_node
7782         self.other_node = instance.primary_node
7783         check_nodes = [self.target_node, self.other_node]
7784       else:
7785         self.disks = []
7786         check_nodes = []
7787
7788     else:
7789       # Non-automatic modes
7790       if self.mode == constants.REPLACE_DISK_PRI:
7791         self.target_node = instance.primary_node
7792         self.other_node = secondary_node
7793         check_nodes = [self.target_node, self.other_node]
7794
7795       elif self.mode == constants.REPLACE_DISK_SEC:
7796         self.target_node = secondary_node
7797         self.other_node = instance.primary_node
7798         check_nodes = [self.target_node, self.other_node]
7799
7800       elif self.mode == constants.REPLACE_DISK_CHG:
7801         self.new_node = remote_node
7802         self.other_node = instance.primary_node
7803         self.target_node = secondary_node
7804         check_nodes = [self.new_node, self.other_node]
7805
7806         _CheckNodeNotDrained(self.lu, remote_node)
7807
7808         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7809         assert old_node_info is not None
7810         if old_node_info.offline and not self.early_release:
7811           # doesn't make sense to delay the release
7812           self.early_release = True
7813           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7814                           " early-release mode", secondary_node)
7815
7816       else:
7817         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7818                                      self.mode)
7819
7820       # If not specified all disks should be replaced
7821       if not self.disks:
7822         self.disks = range(len(self.instance.disks))
7823
7824     for node in check_nodes:
7825       _CheckNodeOnline(self.lu, node)
7826
7827     # Check whether disks are valid
7828     for disk_idx in self.disks:
7829       instance.FindDisk(disk_idx)
7830
7831     # Get secondary node IP addresses
7832     node_2nd_ip = {}
7833
7834     for node_name in [self.target_node, self.other_node, self.new_node]:
7835       if node_name is not None:
7836         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7837
7838     self.node_secondary_ip = node_2nd_ip
7839
7840   def Exec(self, feedback_fn):
7841     """Execute disk replacement.
7842
7843     This dispatches the disk replacement to the appropriate handler.
7844
7845     """
7846     if self.delay_iallocator:
7847       self._CheckPrereq2()
7848
7849     if not self.disks:
7850       feedback_fn("No disks need replacement")
7851       return
7852
7853     feedback_fn("Replacing disk(s) %s for %s" %
7854                 (utils.CommaJoin(self.disks), self.instance.name))
7855
7856     activate_disks = (not self.instance.admin_up)
7857
7858     # Activate the instance disks if we're replacing them on a down instance
7859     if activate_disks:
7860       _StartInstanceDisks(self.lu, self.instance, True)
7861
7862     try:
7863       # Should we replace the secondary node?
7864       if self.new_node is not None:
7865         fn = self._ExecDrbd8Secondary
7866       else:
7867         fn = self._ExecDrbd8DiskOnly
7868
7869       return fn(feedback_fn)
7870
7871     finally:
7872       # Deactivate the instance disks if we're replacing them on a
7873       # down instance
7874       if activate_disks:
7875         _SafeShutdownInstanceDisks(self.lu, self.instance)
7876
7877   def _CheckVolumeGroup(self, nodes):
7878     self.lu.LogInfo("Checking volume groups")
7879
7880     vgname = self.cfg.GetVGName()
7881
7882     # Make sure volume group exists on all involved nodes
7883     results = self.rpc.call_vg_list(nodes)
7884     if not results:
7885       raise errors.OpExecError("Can't list volume groups on the nodes")
7886
7887     for node in nodes:
7888       res = results[node]
7889       res.Raise("Error checking node %s" % node)
7890       if vgname not in res.payload:
7891         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7892                                  (vgname, node))
7893
7894   def _CheckDisksExistence(self, nodes):
7895     # Check disk existence
7896     for idx, dev in enumerate(self.instance.disks):
7897       if idx not in self.disks:
7898         continue
7899
7900       for node in nodes:
7901         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7902         self.cfg.SetDiskID(dev, node)
7903
7904         result = self.rpc.call_blockdev_find(node, dev)
7905
7906         msg = result.fail_msg
7907         if msg or not result.payload:
7908           if not msg:
7909             msg = "disk not found"
7910           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7911                                    (idx, node, msg))
7912
7913   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7914     for idx, dev in enumerate(self.instance.disks):
7915       if idx not in self.disks:
7916         continue
7917
7918       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7919                       (idx, node_name))
7920
7921       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7922                                    ldisk=ldisk):
7923         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7924                                  " replace disks for instance %s" %
7925                                  (node_name, self.instance.name))
7926
7927   def _CreateNewStorage(self, node_name):
7928     vgname = self.cfg.GetVGName()
7929     iv_names = {}
7930
7931     for idx, dev in enumerate(self.instance.disks):
7932       if idx not in self.disks:
7933         continue
7934
7935       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7936
7937       self.cfg.SetDiskID(dev, node_name)
7938
7939       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7940       names = _GenerateUniqueNames(self.lu, lv_names)
7941
7942       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7943                              logical_id=(vgname, names[0]))
7944       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7945                              logical_id=(vgname, names[1]))
7946
7947       new_lvs = [lv_data, lv_meta]
7948       old_lvs = dev.children
7949       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7950
7951       # we pass force_create=True to force the LVM creation
7952       for new_lv in new_lvs:
7953         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7954                         _GetInstanceInfoText(self.instance), False)
7955
7956     return iv_names
7957
7958   def _CheckDevices(self, node_name, iv_names):
7959     for name, (dev, _, _) in iv_names.iteritems():
7960       self.cfg.SetDiskID(dev, node_name)
7961
7962       result = self.rpc.call_blockdev_find(node_name, dev)
7963
7964       msg = result.fail_msg
7965       if msg or not result.payload:
7966         if not msg:
7967           msg = "disk not found"
7968         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7969                                  (name, msg))
7970
7971       if result.payload.is_degraded:
7972         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7973
7974   def _RemoveOldStorage(self, node_name, iv_names):
7975     for name, (_, old_lvs, _) in iv_names.iteritems():
7976       self.lu.LogInfo("Remove logical volumes for %s" % name)
7977
7978       for lv in old_lvs:
7979         self.cfg.SetDiskID(lv, node_name)
7980
7981         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7982         if msg:
7983           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7984                              hint="remove unused LVs manually")
7985
7986   def _ReleaseNodeLock(self, node_name):
7987     """Releases the lock for a given node."""
7988     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7989
7990   def _ExecDrbd8DiskOnly(self, feedback_fn):
7991     """Replace a disk on the primary or secondary for DRBD 8.
7992
7993     The algorithm for replace is quite complicated:
7994
7995       1. for each disk to be replaced:
7996
7997         1. create new LVs on the target node with unique names
7998         1. detach old LVs from the drbd device
7999         1. rename old LVs to name_replaced.<time_t>
8000         1. rename new LVs to old LVs
8001         1. attach the new LVs (with the old names now) to the drbd device
8002
8003       1. wait for sync across all devices
8004
8005       1. for each modified disk:
8006
8007         1. remove old LVs (which have the name name_replaces.<time_t>)
8008
8009     Failures are not very well handled.
8010
8011     """
8012     steps_total = 6
8013
8014     # Step: check device activation
8015     self.lu.LogStep(1, steps_total, "Check device existence")
8016     self._CheckDisksExistence([self.other_node, self.target_node])
8017     self._CheckVolumeGroup([self.target_node, self.other_node])
8018
8019     # Step: check other node consistency
8020     self.lu.LogStep(2, steps_total, "Check peer consistency")
8021     self._CheckDisksConsistency(self.other_node,
8022                                 self.other_node == self.instance.primary_node,
8023                                 False)
8024
8025     # Step: create new storage
8026     self.lu.LogStep(3, steps_total, "Allocate new storage")
8027     iv_names = self._CreateNewStorage(self.target_node)
8028
8029     # Step: for each lv, detach+rename*2+attach
8030     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8031     for dev, old_lvs, new_lvs in iv_names.itervalues():
8032       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8033
8034       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8035                                                      old_lvs)
8036       result.Raise("Can't detach drbd from local storage on node"
8037                    " %s for device %s" % (self.target_node, dev.iv_name))
8038       #dev.children = []
8039       #cfg.Update(instance)
8040
8041       # ok, we created the new LVs, so now we know we have the needed
8042       # storage; as such, we proceed on the target node to rename
8043       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8044       # using the assumption that logical_id == physical_id (which in
8045       # turn is the unique_id on that node)
8046
8047       # FIXME(iustin): use a better name for the replaced LVs
8048       temp_suffix = int(time.time())
8049       ren_fn = lambda d, suff: (d.physical_id[0],
8050                                 d.physical_id[1] + "_replaced-%s" % suff)
8051
8052       # Build the rename list based on what LVs exist on the node
8053       rename_old_to_new = []
8054       for to_ren in old_lvs:
8055         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8056         if not result.fail_msg and result.payload:
8057           # device exists
8058           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8059
8060       self.lu.LogInfo("Renaming the old LVs on the target node")
8061       result = self.rpc.call_blockdev_rename(self.target_node,
8062                                              rename_old_to_new)
8063       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8064
8065       # Now we rename the new LVs to the old LVs
8066       self.lu.LogInfo("Renaming the new LVs on the target node")
8067       rename_new_to_old = [(new, old.physical_id)
8068                            for old, new in zip(old_lvs, new_lvs)]
8069       result = self.rpc.call_blockdev_rename(self.target_node,
8070                                              rename_new_to_old)
8071       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8072
8073       for old, new in zip(old_lvs, new_lvs):
8074         new.logical_id = old.logical_id
8075         self.cfg.SetDiskID(new, self.target_node)
8076
8077       for disk in old_lvs:
8078         disk.logical_id = ren_fn(disk, temp_suffix)
8079         self.cfg.SetDiskID(disk, self.target_node)
8080
8081       # Now that the new lvs have the old name, we can add them to the device
8082       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8083       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8084                                                   new_lvs)
8085       msg = result.fail_msg
8086       if msg:
8087         for new_lv in new_lvs:
8088           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8089                                                new_lv).fail_msg
8090           if msg2:
8091             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8092                                hint=("cleanup manually the unused logical"
8093                                      "volumes"))
8094         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8095
8096       dev.children = new_lvs
8097
8098       self.cfg.Update(self.instance, feedback_fn)
8099
8100     cstep = 5
8101     if self.early_release:
8102       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8103       cstep += 1
8104       self._RemoveOldStorage(self.target_node, iv_names)
8105       # WARNING: we release both node locks here, do not do other RPCs
8106       # than WaitForSync to the primary node
8107       self._ReleaseNodeLock([self.target_node, self.other_node])
8108
8109     # Wait for sync
8110     # This can fail as the old devices are degraded and _WaitForSync
8111     # does a combined result over all disks, so we don't check its return value
8112     self.lu.LogStep(cstep, steps_total, "Sync devices")
8113     cstep += 1
8114     _WaitForSync(self.lu, self.instance)
8115
8116     # Check all devices manually
8117     self._CheckDevices(self.instance.primary_node, iv_names)
8118
8119     # Step: remove old storage
8120     if not self.early_release:
8121       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8122       cstep += 1
8123       self._RemoveOldStorage(self.target_node, iv_names)
8124
8125   def _ExecDrbd8Secondary(self, feedback_fn):
8126     """Replace the secondary node for DRBD 8.
8127
8128     The algorithm for replace is quite complicated:
8129       - for all disks of the instance:
8130         - create new LVs on the new node with same names
8131         - shutdown the drbd device on the old secondary
8132         - disconnect the drbd network on the primary
8133         - create the drbd device on the new secondary
8134         - network attach the drbd on the primary, using an artifice:
8135           the drbd code for Attach() will connect to the network if it
8136           finds a device which is connected to the good local disks but
8137           not network enabled
8138       - wait for sync across all devices
8139       - remove all disks from the old secondary
8140
8141     Failures are not very well handled.
8142
8143     """
8144     steps_total = 6
8145
8146     # Step: check device activation
8147     self.lu.LogStep(1, steps_total, "Check device existence")
8148     self._CheckDisksExistence([self.instance.primary_node])
8149     self._CheckVolumeGroup([self.instance.primary_node])
8150
8151     # Step: check other node consistency
8152     self.lu.LogStep(2, steps_total, "Check peer consistency")
8153     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8154
8155     # Step: create new storage
8156     self.lu.LogStep(3, steps_total, "Allocate new storage")
8157     for idx, dev in enumerate(self.instance.disks):
8158       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8159                       (self.new_node, idx))
8160       # we pass force_create=True to force LVM creation
8161       for new_lv in dev.children:
8162         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8163                         _GetInstanceInfoText(self.instance), False)
8164
8165     # Step 4: dbrd minors and drbd setups changes
8166     # after this, we must manually remove the drbd minors on both the
8167     # error and the success paths
8168     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8169     minors = self.cfg.AllocateDRBDMinor([self.new_node
8170                                          for dev in self.instance.disks],
8171                                         self.instance.name)
8172     logging.debug("Allocated minors %r", minors)
8173
8174     iv_names = {}
8175     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8176       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8177                       (self.new_node, idx))
8178       # create new devices on new_node; note that we create two IDs:
8179       # one without port, so the drbd will be activated without
8180       # networking information on the new node at this stage, and one
8181       # with network, for the latter activation in step 4
8182       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8183       if self.instance.primary_node == o_node1:
8184         p_minor = o_minor1
8185       else:
8186         assert self.instance.primary_node == o_node2, "Three-node instance?"
8187         p_minor = o_minor2
8188
8189       new_alone_id = (self.instance.primary_node, self.new_node, None,
8190                       p_minor, new_minor, o_secret)
8191       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8192                     p_minor, new_minor, o_secret)
8193
8194       iv_names[idx] = (dev, dev.children, new_net_id)
8195       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8196                     new_net_id)
8197       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8198                               logical_id=new_alone_id,
8199                               children=dev.children,
8200                               size=dev.size)
8201       try:
8202         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8203                               _GetInstanceInfoText(self.instance), False)
8204       except errors.GenericError:
8205         self.cfg.ReleaseDRBDMinors(self.instance.name)
8206         raise
8207
8208     # We have new devices, shutdown the drbd on the old secondary
8209     for idx, dev in enumerate(self.instance.disks):
8210       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8211       self.cfg.SetDiskID(dev, self.target_node)
8212       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8213       if msg:
8214         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8215                            "node: %s" % (idx, msg),
8216                            hint=("Please cleanup this device manually as"
8217                                  " soon as possible"))
8218
8219     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8220     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8221                                                self.node_secondary_ip,
8222                                                self.instance.disks)\
8223                                               [self.instance.primary_node]
8224
8225     msg = result.fail_msg
8226     if msg:
8227       # detaches didn't succeed (unlikely)
8228       self.cfg.ReleaseDRBDMinors(self.instance.name)
8229       raise errors.OpExecError("Can't detach the disks from the network on"
8230                                " old node: %s" % (msg,))
8231
8232     # if we managed to detach at least one, we update all the disks of
8233     # the instance to point to the new secondary
8234     self.lu.LogInfo("Updating instance configuration")
8235     for dev, _, new_logical_id in iv_names.itervalues():
8236       dev.logical_id = new_logical_id
8237       self.cfg.SetDiskID(dev, self.instance.primary_node)
8238
8239     self.cfg.Update(self.instance, feedback_fn)
8240
8241     # and now perform the drbd attach
8242     self.lu.LogInfo("Attaching primary drbds to new secondary"
8243                     " (standalone => connected)")
8244     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8245                                             self.new_node],
8246                                            self.node_secondary_ip,
8247                                            self.instance.disks,
8248                                            self.instance.name,
8249                                            False)
8250     for to_node, to_result in result.items():
8251       msg = to_result.fail_msg
8252       if msg:
8253         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8254                            to_node, msg,
8255                            hint=("please do a gnt-instance info to see the"
8256                                  " status of disks"))
8257     cstep = 5
8258     if self.early_release:
8259       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8260       cstep += 1
8261       self._RemoveOldStorage(self.target_node, iv_names)
8262       # WARNING: we release all node locks here, do not do other RPCs
8263       # than WaitForSync to the primary node
8264       self._ReleaseNodeLock([self.instance.primary_node,
8265                              self.target_node,
8266                              self.new_node])
8267
8268     # Wait for sync
8269     # This can fail as the old devices are degraded and _WaitForSync
8270     # does a combined result over all disks, so we don't check its return value
8271     self.lu.LogStep(cstep, steps_total, "Sync devices")
8272     cstep += 1
8273     _WaitForSync(self.lu, self.instance)
8274
8275     # Check all devices manually
8276     self._CheckDevices(self.instance.primary_node, iv_names)
8277
8278     # Step: remove old storage
8279     if not self.early_release:
8280       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8281       self._RemoveOldStorage(self.target_node, iv_names)
8282
8283
8284 class LURepairNodeStorage(NoHooksLU):
8285   """Repairs the volume group on a node.
8286
8287   """
8288   _OP_PARAMS = [
8289     _PNodeName,
8290     ("storage_type", _NoDefault, _CheckStorageType),
8291     ("name", _NoDefault, _TNonEmptyString),
8292     ("ignore_consistency", False, _TBool),
8293     ]
8294   REQ_BGL = False
8295
8296   def CheckArguments(self):
8297     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8298
8299     storage_type = self.op.storage_type
8300
8301     if (constants.SO_FIX_CONSISTENCY not in
8302         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8303       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8304                                  " repaired" % storage_type,
8305                                  errors.ECODE_INVAL)
8306
8307   def ExpandNames(self):
8308     self.needed_locks = {
8309       locking.LEVEL_NODE: [self.op.node_name],
8310       }
8311
8312   def _CheckFaultyDisks(self, instance, node_name):
8313     """Ensure faulty disks abort the opcode or at least warn."""
8314     try:
8315       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8316                                   node_name, True):
8317         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8318                                    " node '%s'" % (instance.name, node_name),
8319                                    errors.ECODE_STATE)
8320     except errors.OpPrereqError, err:
8321       if self.op.ignore_consistency:
8322         self.proc.LogWarning(str(err.args[0]))
8323       else:
8324         raise
8325
8326   def CheckPrereq(self):
8327     """Check prerequisites.
8328
8329     """
8330     # Check whether any instance on this node has faulty disks
8331     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8332       if not inst.admin_up:
8333         continue
8334       check_nodes = set(inst.all_nodes)
8335       check_nodes.discard(self.op.node_name)
8336       for inst_node_name in check_nodes:
8337         self._CheckFaultyDisks(inst, inst_node_name)
8338
8339   def Exec(self, feedback_fn):
8340     feedback_fn("Repairing storage unit '%s' on %s ..." %
8341                 (self.op.name, self.op.node_name))
8342
8343     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8344     result = self.rpc.call_storage_execute(self.op.node_name,
8345                                            self.op.storage_type, st_args,
8346                                            self.op.name,
8347                                            constants.SO_FIX_CONSISTENCY)
8348     result.Raise("Failed to repair storage unit '%s' on %s" %
8349                  (self.op.name, self.op.node_name))
8350
8351
8352 class LUNodeEvacuationStrategy(NoHooksLU):
8353   """Computes the node evacuation strategy.
8354
8355   """
8356   _OP_PARAMS = [
8357     ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8358     ("remote_node", None, _TMaybeString),
8359     ("iallocator", None, _TMaybeString),
8360     ]
8361   REQ_BGL = False
8362
8363   def CheckArguments(self):
8364     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8365
8366   def ExpandNames(self):
8367     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8368     self.needed_locks = locks = {}
8369     if self.op.remote_node is None:
8370       locks[locking.LEVEL_NODE] = locking.ALL_SET
8371     else:
8372       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8373       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8374
8375   def Exec(self, feedback_fn):
8376     if self.op.remote_node is not None:
8377       instances = []
8378       for node in self.op.nodes:
8379         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8380       result = []
8381       for i in instances:
8382         if i.primary_node == self.op.remote_node:
8383           raise errors.OpPrereqError("Node %s is the primary node of"
8384                                      " instance %s, cannot use it as"
8385                                      " secondary" %
8386                                      (self.op.remote_node, i.name),
8387                                      errors.ECODE_INVAL)
8388         result.append([i.name, self.op.remote_node])
8389     else:
8390       ial = IAllocator(self.cfg, self.rpc,
8391                        mode=constants.IALLOCATOR_MODE_MEVAC,
8392                        evac_nodes=self.op.nodes)
8393       ial.Run(self.op.iallocator, validate=True)
8394       if not ial.success:
8395         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8396                                  errors.ECODE_NORES)
8397       result = ial.result
8398     return result
8399
8400
8401 class LUGrowDisk(LogicalUnit):
8402   """Grow a disk of an instance.
8403
8404   """
8405   HPATH = "disk-grow"
8406   HTYPE = constants.HTYPE_INSTANCE
8407   _OP_PARAMS = [
8408     _PInstanceName,
8409     ("disk", _NoDefault, _TInt),
8410     ("amount", _NoDefault, _TInt),
8411     ("wait_for_sync", True, _TBool),
8412     ]
8413   REQ_BGL = False
8414
8415   def ExpandNames(self):
8416     self._ExpandAndLockInstance()
8417     self.needed_locks[locking.LEVEL_NODE] = []
8418     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8419
8420   def DeclareLocks(self, level):
8421     if level == locking.LEVEL_NODE:
8422       self._LockInstancesNodes()
8423
8424   def BuildHooksEnv(self):
8425     """Build hooks env.
8426
8427     This runs on the master, the primary and all the secondaries.
8428
8429     """
8430     env = {
8431       "DISK": self.op.disk,
8432       "AMOUNT": self.op.amount,
8433       }
8434     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8435     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8436     return env, nl, nl
8437
8438   def CheckPrereq(self):
8439     """Check prerequisites.
8440
8441     This checks that the instance is in the cluster.
8442
8443     """
8444     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8445     assert instance is not None, \
8446       "Cannot retrieve locked instance %s" % self.op.instance_name
8447     nodenames = list(instance.all_nodes)
8448     for node in nodenames:
8449       _CheckNodeOnline(self, node)
8450
8451     self.instance = instance
8452
8453     if instance.disk_template not in constants.DTS_GROWABLE:
8454       raise errors.OpPrereqError("Instance's disk layout does not support"
8455                                  " growing.", errors.ECODE_INVAL)
8456
8457     self.disk = instance.FindDisk(self.op.disk)
8458
8459     if instance.disk_template != constants.DT_FILE:
8460       # TODO: check the free disk space for file, when that feature will be
8461       # supported
8462       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8463
8464   def Exec(self, feedback_fn):
8465     """Execute disk grow.
8466
8467     """
8468     instance = self.instance
8469     disk = self.disk
8470
8471     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8472     if not disks_ok:
8473       raise errors.OpExecError("Cannot activate block device to grow")
8474
8475     for node in instance.all_nodes:
8476       self.cfg.SetDiskID(disk, node)
8477       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8478       result.Raise("Grow request failed to node %s" % node)
8479
8480       # TODO: Rewrite code to work properly
8481       # DRBD goes into sync mode for a short amount of time after executing the
8482       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8483       # calling "resize" in sync mode fails. Sleeping for a short amount of
8484       # time is a work-around.
8485       time.sleep(5)
8486
8487     disk.RecordGrow(self.op.amount)
8488     self.cfg.Update(instance, feedback_fn)
8489     if self.op.wait_for_sync:
8490       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8491       if disk_abort:
8492         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8493                              " status.\nPlease check the instance.")
8494       if not instance.admin_up:
8495         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8496     elif not instance.admin_up:
8497       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8498                            " not supposed to be running because no wait for"
8499                            " sync mode was requested.")
8500
8501
8502 class LUQueryInstanceData(NoHooksLU):
8503   """Query runtime instance data.
8504
8505   """
8506   _OP_PARAMS = [
8507     ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8508     ("static", False, _TBool),
8509     ]
8510   REQ_BGL = False
8511
8512   def ExpandNames(self):
8513     self.needed_locks = {}
8514     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8515
8516     if self.op.instances:
8517       self.wanted_names = []
8518       for name in self.op.instances:
8519         full_name = _ExpandInstanceName(self.cfg, name)
8520         self.wanted_names.append(full_name)
8521       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8522     else:
8523       self.wanted_names = None
8524       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8525
8526     self.needed_locks[locking.LEVEL_NODE] = []
8527     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8528
8529   def DeclareLocks(self, level):
8530     if level == locking.LEVEL_NODE:
8531       self._LockInstancesNodes()
8532
8533   def CheckPrereq(self):
8534     """Check prerequisites.
8535
8536     This only checks the optional instance list against the existing names.
8537
8538     """
8539     if self.wanted_names is None:
8540       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8541
8542     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8543                              in self.wanted_names]
8544
8545   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8546     """Returns the status of a block device
8547
8548     """
8549     if self.op.static or not node:
8550       return None
8551
8552     self.cfg.SetDiskID(dev, node)
8553
8554     result = self.rpc.call_blockdev_find(node, dev)
8555     if result.offline:
8556       return None
8557
8558     result.Raise("Can't compute disk status for %s" % instance_name)
8559
8560     status = result.payload
8561     if status is None:
8562       return None
8563
8564     return (status.dev_path, status.major, status.minor,
8565             status.sync_percent, status.estimated_time,
8566             status.is_degraded, status.ldisk_status)
8567
8568   def _ComputeDiskStatus(self, instance, snode, dev):
8569     """Compute block device status.
8570
8571     """
8572     if dev.dev_type in constants.LDS_DRBD:
8573       # we change the snode then (otherwise we use the one passed in)
8574       if dev.logical_id[0] == instance.primary_node:
8575         snode = dev.logical_id[1]
8576       else:
8577         snode = dev.logical_id[0]
8578
8579     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8580                                               instance.name, dev)
8581     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8582
8583     if dev.children:
8584       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8585                       for child in dev.children]
8586     else:
8587       dev_children = []
8588
8589     data = {
8590       "iv_name": dev.iv_name,
8591       "dev_type": dev.dev_type,
8592       "logical_id": dev.logical_id,
8593       "physical_id": dev.physical_id,
8594       "pstatus": dev_pstatus,
8595       "sstatus": dev_sstatus,
8596       "children": dev_children,
8597       "mode": dev.mode,
8598       "size": dev.size,
8599       }
8600
8601     return data
8602
8603   def Exec(self, feedback_fn):
8604     """Gather and return data"""
8605     result = {}
8606
8607     cluster = self.cfg.GetClusterInfo()
8608
8609     for instance in self.wanted_instances:
8610       if not self.op.static:
8611         remote_info = self.rpc.call_instance_info(instance.primary_node,
8612                                                   instance.name,
8613                                                   instance.hypervisor)
8614         remote_info.Raise("Error checking node %s" % instance.primary_node)
8615         remote_info = remote_info.payload
8616         if remote_info and "state" in remote_info:
8617           remote_state = "up"
8618         else:
8619           remote_state = "down"
8620       else:
8621         remote_state = None
8622       if instance.admin_up:
8623         config_state = "up"
8624       else:
8625         config_state = "down"
8626
8627       disks = [self._ComputeDiskStatus(instance, None, device)
8628                for device in instance.disks]
8629
8630       idict = {
8631         "name": instance.name,
8632         "config_state": config_state,
8633         "run_state": remote_state,
8634         "pnode": instance.primary_node,
8635         "snodes": instance.secondary_nodes,
8636         "os": instance.os,
8637         # this happens to be the same format used for hooks
8638         "nics": _NICListToTuple(self, instance.nics),
8639         "disk_template": instance.disk_template,
8640         "disks": disks,
8641         "hypervisor": instance.hypervisor,
8642         "network_port": instance.network_port,
8643         "hv_instance": instance.hvparams,
8644         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8645         "be_instance": instance.beparams,
8646         "be_actual": cluster.FillBE(instance),
8647         "os_instance": instance.osparams,
8648         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8649         "serial_no": instance.serial_no,
8650         "mtime": instance.mtime,
8651         "ctime": instance.ctime,
8652         "uuid": instance.uuid,
8653         }
8654
8655       result[instance.name] = idict
8656
8657     return result
8658
8659
8660 class LUSetInstanceParams(LogicalUnit):
8661   """Modifies an instances's parameters.
8662
8663   """
8664   HPATH = "instance-modify"
8665   HTYPE = constants.HTYPE_INSTANCE
8666   _OP_PARAMS = [
8667     _PInstanceName,
8668     ("nics", _EmptyList, _TList),
8669     ("disks", _EmptyList, _TList),
8670     ("beparams", _EmptyDict, _TDict),
8671     ("hvparams", _EmptyDict, _TDict),
8672     ("disk_template", None, _TMaybeString),
8673     ("remote_node", None, _TMaybeString),
8674     ("os_name", None, _TMaybeString),
8675     ("force_variant", False, _TBool),
8676     ("osparams", None, _TOr(_TDict, _TNone)),
8677     _PForce,
8678     ]
8679   REQ_BGL = False
8680
8681   def CheckArguments(self):
8682     if not (self.op.nics or self.op.disks or self.op.disk_template or
8683             self.op.hvparams or self.op.beparams or self.op.os_name):
8684       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8685
8686     if self.op.hvparams:
8687       _CheckGlobalHvParams(self.op.hvparams)
8688
8689     # Disk validation
8690     disk_addremove = 0
8691     for disk_op, disk_dict in self.op.disks:
8692       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8693       if disk_op == constants.DDM_REMOVE:
8694         disk_addremove += 1
8695         continue
8696       elif disk_op == constants.DDM_ADD:
8697         disk_addremove += 1
8698       else:
8699         if not isinstance(disk_op, int):
8700           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8701         if not isinstance(disk_dict, dict):
8702           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8703           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8704
8705       if disk_op == constants.DDM_ADD:
8706         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8707         if mode not in constants.DISK_ACCESS_SET:
8708           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8709                                      errors.ECODE_INVAL)
8710         size = disk_dict.get('size', None)
8711         if size is None:
8712           raise errors.OpPrereqError("Required disk parameter size missing",
8713                                      errors.ECODE_INVAL)
8714         try:
8715           size = int(size)
8716         except (TypeError, ValueError), err:
8717           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8718                                      str(err), errors.ECODE_INVAL)
8719         disk_dict['size'] = size
8720       else:
8721         # modification of disk
8722         if 'size' in disk_dict:
8723           raise errors.OpPrereqError("Disk size change not possible, use"
8724                                      " grow-disk", errors.ECODE_INVAL)
8725
8726     if disk_addremove > 1:
8727       raise errors.OpPrereqError("Only one disk add or remove operation"
8728                                  " supported at a time", errors.ECODE_INVAL)
8729
8730     if self.op.disks and self.op.disk_template is not None:
8731       raise errors.OpPrereqError("Disk template conversion and other disk"
8732                                  " changes not supported at the same time",
8733                                  errors.ECODE_INVAL)
8734
8735     if self.op.disk_template:
8736       _CheckDiskTemplate(self.op.disk_template)
8737       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8738           self.op.remote_node is None):
8739         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8740                                    " one requires specifying a secondary node",
8741                                    errors.ECODE_INVAL)
8742
8743     # NIC validation
8744     nic_addremove = 0
8745     for nic_op, nic_dict in self.op.nics:
8746       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8747       if nic_op == constants.DDM_REMOVE:
8748         nic_addremove += 1
8749         continue
8750       elif nic_op == constants.DDM_ADD:
8751         nic_addremove += 1
8752       else:
8753         if not isinstance(nic_op, int):
8754           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8755         if not isinstance(nic_dict, dict):
8756           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8757           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8758
8759       # nic_dict should be a dict
8760       nic_ip = nic_dict.get('ip', None)
8761       if nic_ip is not None:
8762         if nic_ip.lower() == constants.VALUE_NONE:
8763           nic_dict['ip'] = None
8764         else:
8765           if not netutils.IsValidIP4(nic_ip):
8766             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8767                                        errors.ECODE_INVAL)
8768
8769       nic_bridge = nic_dict.get('bridge', None)
8770       nic_link = nic_dict.get('link', None)
8771       if nic_bridge and nic_link:
8772         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8773                                    " at the same time", errors.ECODE_INVAL)
8774       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8775         nic_dict['bridge'] = None
8776       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8777         nic_dict['link'] = None
8778
8779       if nic_op == constants.DDM_ADD:
8780         nic_mac = nic_dict.get('mac', None)
8781         if nic_mac is None:
8782           nic_dict['mac'] = constants.VALUE_AUTO
8783
8784       if 'mac' in nic_dict:
8785         nic_mac = nic_dict['mac']
8786         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8787           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8788
8789         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8790           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8791                                      " modifying an existing nic",
8792                                      errors.ECODE_INVAL)
8793
8794     if nic_addremove > 1:
8795       raise errors.OpPrereqError("Only one NIC add or remove operation"
8796                                  " supported at a time", errors.ECODE_INVAL)
8797
8798   def ExpandNames(self):
8799     self._ExpandAndLockInstance()
8800     self.needed_locks[locking.LEVEL_NODE] = []
8801     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8802
8803   def DeclareLocks(self, level):
8804     if level == locking.LEVEL_NODE:
8805       self._LockInstancesNodes()
8806       if self.op.disk_template and self.op.remote_node:
8807         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8808         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8809
8810   def BuildHooksEnv(self):
8811     """Build hooks env.
8812
8813     This runs on the master, primary and secondaries.
8814
8815     """
8816     args = dict()
8817     if constants.BE_MEMORY in self.be_new:
8818       args['memory'] = self.be_new[constants.BE_MEMORY]
8819     if constants.BE_VCPUS in self.be_new:
8820       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8821     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8822     # information at all.
8823     if self.op.nics:
8824       args['nics'] = []
8825       nic_override = dict(self.op.nics)
8826       for idx, nic in enumerate(self.instance.nics):
8827         if idx in nic_override:
8828           this_nic_override = nic_override[idx]
8829         else:
8830           this_nic_override = {}
8831         if 'ip' in this_nic_override:
8832           ip = this_nic_override['ip']
8833         else:
8834           ip = nic.ip
8835         if 'mac' in this_nic_override:
8836           mac = this_nic_override['mac']
8837         else:
8838           mac = nic.mac
8839         if idx in self.nic_pnew:
8840           nicparams = self.nic_pnew[idx]
8841         else:
8842           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8843         mode = nicparams[constants.NIC_MODE]
8844         link = nicparams[constants.NIC_LINK]
8845         args['nics'].append((ip, mac, mode, link))
8846       if constants.DDM_ADD in nic_override:
8847         ip = nic_override[constants.DDM_ADD].get('ip', None)
8848         mac = nic_override[constants.DDM_ADD]['mac']
8849         nicparams = self.nic_pnew[constants.DDM_ADD]
8850         mode = nicparams[constants.NIC_MODE]
8851         link = nicparams[constants.NIC_LINK]
8852         args['nics'].append((ip, mac, mode, link))
8853       elif constants.DDM_REMOVE in nic_override:
8854         del args['nics'][-1]
8855
8856     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8857     if self.op.disk_template:
8858       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8859     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8860     return env, nl, nl
8861
8862   def CheckPrereq(self):
8863     """Check prerequisites.
8864
8865     This only checks the instance list against the existing names.
8866
8867     """
8868     # checking the new params on the primary/secondary nodes
8869
8870     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8871     cluster = self.cluster = self.cfg.GetClusterInfo()
8872     assert self.instance is not None, \
8873       "Cannot retrieve locked instance %s" % self.op.instance_name
8874     pnode = instance.primary_node
8875     nodelist = list(instance.all_nodes)
8876
8877     # OS change
8878     if self.op.os_name and not self.op.force:
8879       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8880                       self.op.force_variant)
8881       instance_os = self.op.os_name
8882     else:
8883       instance_os = instance.os
8884
8885     if self.op.disk_template:
8886       if instance.disk_template == self.op.disk_template:
8887         raise errors.OpPrereqError("Instance already has disk template %s" %
8888                                    instance.disk_template, errors.ECODE_INVAL)
8889
8890       if (instance.disk_template,
8891           self.op.disk_template) not in self._DISK_CONVERSIONS:
8892         raise errors.OpPrereqError("Unsupported disk template conversion from"
8893                                    " %s to %s" % (instance.disk_template,
8894                                                   self.op.disk_template),
8895                                    errors.ECODE_INVAL)
8896       _CheckInstanceDown(self, instance, "cannot change disk template")
8897       if self.op.disk_template in constants.DTS_NET_MIRROR:
8898         if self.op.remote_node == pnode:
8899           raise errors.OpPrereqError("Given new secondary node %s is the same"
8900                                      " as the primary node of the instance" %
8901                                      self.op.remote_node, errors.ECODE_STATE)
8902         _CheckNodeOnline(self, self.op.remote_node)
8903         _CheckNodeNotDrained(self, self.op.remote_node)
8904         disks = [{"size": d.size} for d in instance.disks]
8905         required = _ComputeDiskSize(self.op.disk_template, disks)
8906         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8907
8908     # hvparams processing
8909     if self.op.hvparams:
8910       hv_type = instance.hypervisor
8911       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8912       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8913       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8914
8915       # local check
8916       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8917       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8918       self.hv_new = hv_new # the new actual values
8919       self.hv_inst = i_hvdict # the new dict (without defaults)
8920     else:
8921       self.hv_new = self.hv_inst = {}
8922
8923     # beparams processing
8924     if self.op.beparams:
8925       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8926                                    use_none=True)
8927       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8928       be_new = cluster.SimpleFillBE(i_bedict)
8929       self.be_new = be_new # the new actual values
8930       self.be_inst = i_bedict # the new dict (without defaults)
8931     else:
8932       self.be_new = self.be_inst = {}
8933
8934     # osparams processing
8935     if self.op.osparams:
8936       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8937       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8938       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8939       self.os_inst = i_osdict # the new dict (without defaults)
8940     else:
8941       self.os_new = self.os_inst = {}
8942
8943     self.warn = []
8944
8945     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8946       mem_check_list = [pnode]
8947       if be_new[constants.BE_AUTO_BALANCE]:
8948         # either we changed auto_balance to yes or it was from before
8949         mem_check_list.extend(instance.secondary_nodes)
8950       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8951                                                   instance.hypervisor)
8952       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8953                                          instance.hypervisor)
8954       pninfo = nodeinfo[pnode]
8955       msg = pninfo.fail_msg
8956       if msg:
8957         # Assume the primary node is unreachable and go ahead
8958         self.warn.append("Can't get info from primary node %s: %s" %
8959                          (pnode,  msg))
8960       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8961         self.warn.append("Node data from primary node %s doesn't contain"
8962                          " free memory information" % pnode)
8963       elif instance_info.fail_msg:
8964         self.warn.append("Can't get instance runtime information: %s" %
8965                         instance_info.fail_msg)
8966       else:
8967         if instance_info.payload:
8968           current_mem = int(instance_info.payload['memory'])
8969         else:
8970           # Assume instance not running
8971           # (there is a slight race condition here, but it's not very probable,
8972           # and we have no other way to check)
8973           current_mem = 0
8974         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8975                     pninfo.payload['memory_free'])
8976         if miss_mem > 0:
8977           raise errors.OpPrereqError("This change will prevent the instance"
8978                                      " from starting, due to %d MB of memory"
8979                                      " missing on its primary node" % miss_mem,
8980                                      errors.ECODE_NORES)
8981
8982       if be_new[constants.BE_AUTO_BALANCE]:
8983         for node, nres in nodeinfo.items():
8984           if node not in instance.secondary_nodes:
8985             continue
8986           msg = nres.fail_msg
8987           if msg:
8988             self.warn.append("Can't get info from secondary node %s: %s" %
8989                              (node, msg))
8990           elif not isinstance(nres.payload.get('memory_free', None), int):
8991             self.warn.append("Secondary node %s didn't return free"
8992                              " memory information" % node)
8993           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8994             self.warn.append("Not enough memory to failover instance to"
8995                              " secondary node %s" % node)
8996
8997     # NIC processing
8998     self.nic_pnew = {}
8999     self.nic_pinst = {}
9000     for nic_op, nic_dict in self.op.nics:
9001       if nic_op == constants.DDM_REMOVE:
9002         if not instance.nics:
9003           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9004                                      errors.ECODE_INVAL)
9005         continue
9006       if nic_op != constants.DDM_ADD:
9007         # an existing nic
9008         if not instance.nics:
9009           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9010                                      " no NICs" % nic_op,
9011                                      errors.ECODE_INVAL)
9012         if nic_op < 0 or nic_op >= len(instance.nics):
9013           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9014                                      " are 0 to %d" %
9015                                      (nic_op, len(instance.nics) - 1),
9016                                      errors.ECODE_INVAL)
9017         old_nic_params = instance.nics[nic_op].nicparams
9018         old_nic_ip = instance.nics[nic_op].ip
9019       else:
9020         old_nic_params = {}
9021         old_nic_ip = None
9022
9023       update_params_dict = dict([(key, nic_dict[key])
9024                                  for key in constants.NICS_PARAMETERS
9025                                  if key in nic_dict])
9026
9027       if 'bridge' in nic_dict:
9028         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9029
9030       new_nic_params = _GetUpdatedParams(old_nic_params,
9031                                          update_params_dict)
9032       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9033       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9034       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9035       self.nic_pinst[nic_op] = new_nic_params
9036       self.nic_pnew[nic_op] = new_filled_nic_params
9037       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9038
9039       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9040         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9041         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9042         if msg:
9043           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9044           if self.op.force:
9045             self.warn.append(msg)
9046           else:
9047             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9048       if new_nic_mode == constants.NIC_MODE_ROUTED:
9049         if 'ip' in nic_dict:
9050           nic_ip = nic_dict['ip']
9051         else:
9052           nic_ip = old_nic_ip
9053         if nic_ip is None:
9054           raise errors.OpPrereqError('Cannot set the nic ip to None'
9055                                      ' on a routed nic', errors.ECODE_INVAL)
9056       if 'mac' in nic_dict:
9057         nic_mac = nic_dict['mac']
9058         if nic_mac is None:
9059           raise errors.OpPrereqError('Cannot set the nic mac to None',
9060                                      errors.ECODE_INVAL)
9061         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9062           # otherwise generate the mac
9063           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9064         else:
9065           # or validate/reserve the current one
9066           try:
9067             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9068           except errors.ReservationError:
9069             raise errors.OpPrereqError("MAC address %s already in use"
9070                                        " in cluster" % nic_mac,
9071                                        errors.ECODE_NOTUNIQUE)
9072
9073     # DISK processing
9074     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9075       raise errors.OpPrereqError("Disk operations not supported for"
9076                                  " diskless instances",
9077                                  errors.ECODE_INVAL)
9078     for disk_op, _ in self.op.disks:
9079       if disk_op == constants.DDM_REMOVE:
9080         if len(instance.disks) == 1:
9081           raise errors.OpPrereqError("Cannot remove the last disk of"
9082                                      " an instance", errors.ECODE_INVAL)
9083         _CheckInstanceDown(self, instance, "cannot remove disks")
9084
9085       if (disk_op == constants.DDM_ADD and
9086           len(instance.nics) >= constants.MAX_DISKS):
9087         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9088                                    " add more" % constants.MAX_DISKS,
9089                                    errors.ECODE_STATE)
9090       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9091         # an existing disk
9092         if disk_op < 0 or disk_op >= len(instance.disks):
9093           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9094                                      " are 0 to %d" %
9095                                      (disk_op, len(instance.disks)),
9096                                      errors.ECODE_INVAL)
9097
9098     return
9099
9100   def _ConvertPlainToDrbd(self, feedback_fn):
9101     """Converts an instance from plain to drbd.
9102
9103     """
9104     feedback_fn("Converting template to drbd")
9105     instance = self.instance
9106     pnode = instance.primary_node
9107     snode = self.op.remote_node
9108
9109     # create a fake disk info for _GenerateDiskTemplate
9110     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9111     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9112                                       instance.name, pnode, [snode],
9113                                       disk_info, None, None, 0)
9114     info = _GetInstanceInfoText(instance)
9115     feedback_fn("Creating aditional volumes...")
9116     # first, create the missing data and meta devices
9117     for disk in new_disks:
9118       # unfortunately this is... not too nice
9119       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9120                             info, True)
9121       for child in disk.children:
9122         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9123     # at this stage, all new LVs have been created, we can rename the
9124     # old ones
9125     feedback_fn("Renaming original volumes...")
9126     rename_list = [(o, n.children[0].logical_id)
9127                    for (o, n) in zip(instance.disks, new_disks)]
9128     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9129     result.Raise("Failed to rename original LVs")
9130
9131     feedback_fn("Initializing DRBD devices...")
9132     # all child devices are in place, we can now create the DRBD devices
9133     for disk in new_disks:
9134       for node in [pnode, snode]:
9135         f_create = node == pnode
9136         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9137
9138     # at this point, the instance has been modified
9139     instance.disk_template = constants.DT_DRBD8
9140     instance.disks = new_disks
9141     self.cfg.Update(instance, feedback_fn)
9142
9143     # disks are created, waiting for sync
9144     disk_abort = not _WaitForSync(self, instance)
9145     if disk_abort:
9146       raise errors.OpExecError("There are some degraded disks for"
9147                                " this instance, please cleanup manually")
9148
9149   def _ConvertDrbdToPlain(self, feedback_fn):
9150     """Converts an instance from drbd to plain.
9151
9152     """
9153     instance = self.instance
9154     assert len(instance.secondary_nodes) == 1
9155     pnode = instance.primary_node
9156     snode = instance.secondary_nodes[0]
9157     feedback_fn("Converting template to plain")
9158
9159     old_disks = instance.disks
9160     new_disks = [d.children[0] for d in old_disks]
9161
9162     # copy over size and mode
9163     for parent, child in zip(old_disks, new_disks):
9164       child.size = parent.size
9165       child.mode = parent.mode
9166
9167     # update instance structure
9168     instance.disks = new_disks
9169     instance.disk_template = constants.DT_PLAIN
9170     self.cfg.Update(instance, feedback_fn)
9171
9172     feedback_fn("Removing volumes on the secondary node...")
9173     for disk in old_disks:
9174       self.cfg.SetDiskID(disk, snode)
9175       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9176       if msg:
9177         self.LogWarning("Could not remove block device %s on node %s,"
9178                         " continuing anyway: %s", disk.iv_name, snode, msg)
9179
9180     feedback_fn("Removing unneeded volumes on the primary node...")
9181     for idx, disk in enumerate(old_disks):
9182       meta = disk.children[1]
9183       self.cfg.SetDiskID(meta, pnode)
9184       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9185       if msg:
9186         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9187                         " continuing anyway: %s", idx, pnode, msg)
9188
9189
9190   def Exec(self, feedback_fn):
9191     """Modifies an instance.
9192
9193     All parameters take effect only at the next restart of the instance.
9194
9195     """
9196     # Process here the warnings from CheckPrereq, as we don't have a
9197     # feedback_fn there.
9198     for warn in self.warn:
9199       feedback_fn("WARNING: %s" % warn)
9200
9201     result = []
9202     instance = self.instance
9203     # disk changes
9204     for disk_op, disk_dict in self.op.disks:
9205       if disk_op == constants.DDM_REMOVE:
9206         # remove the last disk
9207         device = instance.disks.pop()
9208         device_idx = len(instance.disks)
9209         for node, disk in device.ComputeNodeTree(instance.primary_node):
9210           self.cfg.SetDiskID(disk, node)
9211           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9212           if msg:
9213             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9214                             " continuing anyway", device_idx, node, msg)
9215         result.append(("disk/%d" % device_idx, "remove"))
9216       elif disk_op == constants.DDM_ADD:
9217         # add a new disk
9218         if instance.disk_template == constants.DT_FILE:
9219           file_driver, file_path = instance.disks[0].logical_id
9220           file_path = os.path.dirname(file_path)
9221         else:
9222           file_driver = file_path = None
9223         disk_idx_base = len(instance.disks)
9224         new_disk = _GenerateDiskTemplate(self,
9225                                          instance.disk_template,
9226                                          instance.name, instance.primary_node,
9227                                          instance.secondary_nodes,
9228                                          [disk_dict],
9229                                          file_path,
9230                                          file_driver,
9231                                          disk_idx_base)[0]
9232         instance.disks.append(new_disk)
9233         info = _GetInstanceInfoText(instance)
9234
9235         logging.info("Creating volume %s for instance %s",
9236                      new_disk.iv_name, instance.name)
9237         # Note: this needs to be kept in sync with _CreateDisks
9238         #HARDCODE
9239         for node in instance.all_nodes:
9240           f_create = node == instance.primary_node
9241           try:
9242             _CreateBlockDev(self, node, instance, new_disk,
9243                             f_create, info, f_create)
9244           except errors.OpExecError, err:
9245             self.LogWarning("Failed to create volume %s (%s) on"
9246                             " node %s: %s",
9247                             new_disk.iv_name, new_disk, node, err)
9248         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9249                        (new_disk.size, new_disk.mode)))
9250       else:
9251         # change a given disk
9252         instance.disks[disk_op].mode = disk_dict['mode']
9253         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9254
9255     if self.op.disk_template:
9256       r_shut = _ShutdownInstanceDisks(self, instance)
9257       if not r_shut:
9258         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9259                                  " proceed with disk template conversion")
9260       mode = (instance.disk_template, self.op.disk_template)
9261       try:
9262         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9263       except:
9264         self.cfg.ReleaseDRBDMinors(instance.name)
9265         raise
9266       result.append(("disk_template", self.op.disk_template))
9267
9268     # NIC changes
9269     for nic_op, nic_dict in self.op.nics:
9270       if nic_op == constants.DDM_REMOVE:
9271         # remove the last nic
9272         del instance.nics[-1]
9273         result.append(("nic.%d" % len(instance.nics), "remove"))
9274       elif nic_op == constants.DDM_ADD:
9275         # mac and bridge should be set, by now
9276         mac = nic_dict['mac']
9277         ip = nic_dict.get('ip', None)
9278         nicparams = self.nic_pinst[constants.DDM_ADD]
9279         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9280         instance.nics.append(new_nic)
9281         result.append(("nic.%d" % (len(instance.nics) - 1),
9282                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9283                        (new_nic.mac, new_nic.ip,
9284                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9285                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9286                        )))
9287       else:
9288         for key in 'mac', 'ip':
9289           if key in nic_dict:
9290             setattr(instance.nics[nic_op], key, nic_dict[key])
9291         if nic_op in self.nic_pinst:
9292           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9293         for key, val in nic_dict.iteritems():
9294           result.append(("nic.%s/%d" % (key, nic_op), val))
9295
9296     # hvparams changes
9297     if self.op.hvparams:
9298       instance.hvparams = self.hv_inst
9299       for key, val in self.op.hvparams.iteritems():
9300         result.append(("hv/%s" % key, val))
9301
9302     # beparams changes
9303     if self.op.beparams:
9304       instance.beparams = self.be_inst
9305       for key, val in self.op.beparams.iteritems():
9306         result.append(("be/%s" % key, val))
9307
9308     # OS change
9309     if self.op.os_name:
9310       instance.os = self.op.os_name
9311
9312     # osparams changes
9313     if self.op.osparams:
9314       instance.osparams = self.os_inst
9315       for key, val in self.op.osparams.iteritems():
9316         result.append(("os/%s" % key, val))
9317
9318     self.cfg.Update(instance, feedback_fn)
9319
9320     return result
9321
9322   _DISK_CONVERSIONS = {
9323     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9324     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9325     }
9326
9327
9328 class LUQueryExports(NoHooksLU):
9329   """Query the exports list
9330
9331   """
9332   _OP_PARAMS = [
9333     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9334     ("use_locking", False, _TBool),
9335     ]
9336   REQ_BGL = False
9337
9338   def ExpandNames(self):
9339     self.needed_locks = {}
9340     self.share_locks[locking.LEVEL_NODE] = 1
9341     if not self.op.nodes:
9342       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9343     else:
9344       self.needed_locks[locking.LEVEL_NODE] = \
9345         _GetWantedNodes(self, self.op.nodes)
9346
9347   def Exec(self, feedback_fn):
9348     """Compute the list of all the exported system images.
9349
9350     @rtype: dict
9351     @return: a dictionary with the structure node->(export-list)
9352         where export-list is a list of the instances exported on
9353         that node.
9354
9355     """
9356     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9357     rpcresult = self.rpc.call_export_list(self.nodes)
9358     result = {}
9359     for node in rpcresult:
9360       if rpcresult[node].fail_msg:
9361         result[node] = False
9362       else:
9363         result[node] = rpcresult[node].payload
9364
9365     return result
9366
9367
9368 class LUPrepareExport(NoHooksLU):
9369   """Prepares an instance for an export and returns useful information.
9370
9371   """
9372   _OP_PARAMS = [
9373     _PInstanceName,
9374     ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9375     ]
9376   REQ_BGL = False
9377
9378   def ExpandNames(self):
9379     self._ExpandAndLockInstance()
9380
9381   def CheckPrereq(self):
9382     """Check prerequisites.
9383
9384     """
9385     instance_name = self.op.instance_name
9386
9387     self.instance = self.cfg.GetInstanceInfo(instance_name)
9388     assert self.instance is not None, \
9389           "Cannot retrieve locked instance %s" % self.op.instance_name
9390     _CheckNodeOnline(self, self.instance.primary_node)
9391
9392     self._cds = _GetClusterDomainSecret()
9393
9394   def Exec(self, feedback_fn):
9395     """Prepares an instance for an export.
9396
9397     """
9398     instance = self.instance
9399
9400     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9401       salt = utils.GenerateSecret(8)
9402
9403       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9404       result = self.rpc.call_x509_cert_create(instance.primary_node,
9405                                               constants.RIE_CERT_VALIDITY)
9406       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9407
9408       (name, cert_pem) = result.payload
9409
9410       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9411                                              cert_pem)
9412
9413       return {
9414         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9415         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9416                           salt),
9417         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9418         }
9419
9420     return None
9421
9422
9423 class LUExportInstance(LogicalUnit):
9424   """Export an instance to an image in the cluster.
9425
9426   """
9427   HPATH = "instance-export"
9428   HTYPE = constants.HTYPE_INSTANCE
9429   _OP_PARAMS = [
9430     _PInstanceName,
9431     ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9432     ("shutdown", True, _TBool),
9433     _PShutdownTimeout,
9434     ("remove_instance", False, _TBool),
9435     ("ignore_remove_failures", False, _TBool),
9436     ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9437     ("x509_key_name", None, _TOr(_TList, _TNone)),
9438     ("destination_x509_ca", None, _TMaybeString),
9439     ]
9440   REQ_BGL = False
9441
9442   def CheckArguments(self):
9443     """Check the arguments.
9444
9445     """
9446     self.x509_key_name = self.op.x509_key_name
9447     self.dest_x509_ca_pem = self.op.destination_x509_ca
9448
9449     if self.op.remove_instance and not self.op.shutdown:
9450       raise errors.OpPrereqError("Can not remove instance without shutting it"
9451                                  " down before")
9452
9453     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9454       if not self.x509_key_name:
9455         raise errors.OpPrereqError("Missing X509 key name for encryption",
9456                                    errors.ECODE_INVAL)
9457
9458       if not self.dest_x509_ca_pem:
9459         raise errors.OpPrereqError("Missing destination X509 CA",
9460                                    errors.ECODE_INVAL)
9461
9462   def ExpandNames(self):
9463     self._ExpandAndLockInstance()
9464
9465     # Lock all nodes for local exports
9466     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9467       # FIXME: lock only instance primary and destination node
9468       #
9469       # Sad but true, for now we have do lock all nodes, as we don't know where
9470       # the previous export might be, and in this LU we search for it and
9471       # remove it from its current node. In the future we could fix this by:
9472       #  - making a tasklet to search (share-lock all), then create the
9473       #    new one, then one to remove, after
9474       #  - removing the removal operation altogether
9475       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9476
9477   def DeclareLocks(self, level):
9478     """Last minute lock declaration."""
9479     # All nodes are locked anyway, so nothing to do here.
9480
9481   def BuildHooksEnv(self):
9482     """Build hooks env.
9483
9484     This will run on the master, primary node and target node.
9485
9486     """
9487     env = {
9488       "EXPORT_MODE": self.op.mode,
9489       "EXPORT_NODE": self.op.target_node,
9490       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9491       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9492       # TODO: Generic function for boolean env variables
9493       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9494       }
9495
9496     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9497
9498     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9499
9500     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9501       nl.append(self.op.target_node)
9502
9503     return env, nl, nl
9504
9505   def CheckPrereq(self):
9506     """Check prerequisites.
9507
9508     This checks that the instance and node names are valid.
9509
9510     """
9511     instance_name = self.op.instance_name
9512
9513     self.instance = self.cfg.GetInstanceInfo(instance_name)
9514     assert self.instance is not None, \
9515           "Cannot retrieve locked instance %s" % self.op.instance_name
9516     _CheckNodeOnline(self, self.instance.primary_node)
9517
9518     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9519       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9520       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9521       assert self.dst_node is not None
9522
9523       _CheckNodeOnline(self, self.dst_node.name)
9524       _CheckNodeNotDrained(self, self.dst_node.name)
9525
9526       self._cds = None
9527       self.dest_disk_info = None
9528       self.dest_x509_ca = None
9529
9530     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9531       self.dst_node = None
9532
9533       if len(self.op.target_node) != len(self.instance.disks):
9534         raise errors.OpPrereqError(("Received destination information for %s"
9535                                     " disks, but instance %s has %s disks") %
9536                                    (len(self.op.target_node), instance_name,
9537                                     len(self.instance.disks)),
9538                                    errors.ECODE_INVAL)
9539
9540       cds = _GetClusterDomainSecret()
9541
9542       # Check X509 key name
9543       try:
9544         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9545       except (TypeError, ValueError), err:
9546         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9547
9548       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9549         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9550                                    errors.ECODE_INVAL)
9551
9552       # Load and verify CA
9553       try:
9554         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9555       except OpenSSL.crypto.Error, err:
9556         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9557                                    (err, ), errors.ECODE_INVAL)
9558
9559       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9560       if errcode is not None:
9561         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9562                                    (msg, ), errors.ECODE_INVAL)
9563
9564       self.dest_x509_ca = cert
9565
9566       # Verify target information
9567       disk_info = []
9568       for idx, disk_data in enumerate(self.op.target_node):
9569         try:
9570           (host, port, magic) = \
9571             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9572         except errors.GenericError, err:
9573           raise errors.OpPrereqError("Target info for disk %s: %s" %
9574                                      (idx, err), errors.ECODE_INVAL)
9575
9576         disk_info.append((host, port, magic))
9577
9578       assert len(disk_info) == len(self.op.target_node)
9579       self.dest_disk_info = disk_info
9580
9581     else:
9582       raise errors.ProgrammerError("Unhandled export mode %r" %
9583                                    self.op.mode)
9584
9585     # instance disk type verification
9586     # TODO: Implement export support for file-based disks
9587     for disk in self.instance.disks:
9588       if disk.dev_type == constants.LD_FILE:
9589         raise errors.OpPrereqError("Export not supported for instances with"
9590                                    " file-based disks", errors.ECODE_INVAL)
9591
9592   def _CleanupExports(self, feedback_fn):
9593     """Removes exports of current instance from all other nodes.
9594
9595     If an instance in a cluster with nodes A..D was exported to node C, its
9596     exports will be removed from the nodes A, B and D.
9597
9598     """
9599     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9600
9601     nodelist = self.cfg.GetNodeList()
9602     nodelist.remove(self.dst_node.name)
9603
9604     # on one-node clusters nodelist will be empty after the removal
9605     # if we proceed the backup would be removed because OpQueryExports
9606     # substitutes an empty list with the full cluster node list.
9607     iname = self.instance.name
9608     if nodelist:
9609       feedback_fn("Removing old exports for instance %s" % iname)
9610       exportlist = self.rpc.call_export_list(nodelist)
9611       for node in exportlist:
9612         if exportlist[node].fail_msg:
9613           continue
9614         if iname in exportlist[node].payload:
9615           msg = self.rpc.call_export_remove(node, iname).fail_msg
9616           if msg:
9617             self.LogWarning("Could not remove older export for instance %s"
9618                             " on node %s: %s", iname, node, msg)
9619
9620   def Exec(self, feedback_fn):
9621     """Export an instance to an image in the cluster.
9622
9623     """
9624     assert self.op.mode in constants.EXPORT_MODES
9625
9626     instance = self.instance
9627     src_node = instance.primary_node
9628
9629     if self.op.shutdown:
9630       # shutdown the instance, but not the disks
9631       feedback_fn("Shutting down instance %s" % instance.name)
9632       result = self.rpc.call_instance_shutdown(src_node, instance,
9633                                                self.op.shutdown_timeout)
9634       # TODO: Maybe ignore failures if ignore_remove_failures is set
9635       result.Raise("Could not shutdown instance %s on"
9636                    " node %s" % (instance.name, src_node))
9637
9638     # set the disks ID correctly since call_instance_start needs the
9639     # correct drbd minor to create the symlinks
9640     for disk in instance.disks:
9641       self.cfg.SetDiskID(disk, src_node)
9642
9643     activate_disks = (not instance.admin_up)
9644
9645     if activate_disks:
9646       # Activate the instance disks if we'exporting a stopped instance
9647       feedback_fn("Activating disks for %s" % instance.name)
9648       _StartInstanceDisks(self, instance, None)
9649
9650     try:
9651       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9652                                                      instance)
9653
9654       helper.CreateSnapshots()
9655       try:
9656         if (self.op.shutdown and instance.admin_up and
9657             not self.op.remove_instance):
9658           assert not activate_disks
9659           feedback_fn("Starting instance %s" % instance.name)
9660           result = self.rpc.call_instance_start(src_node, instance, None, None)
9661           msg = result.fail_msg
9662           if msg:
9663             feedback_fn("Failed to start instance: %s" % msg)
9664             _ShutdownInstanceDisks(self, instance)
9665             raise errors.OpExecError("Could not start instance: %s" % msg)
9666
9667         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9668           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9669         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9670           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9671           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9672
9673           (key_name, _, _) = self.x509_key_name
9674
9675           dest_ca_pem = \
9676             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9677                                             self.dest_x509_ca)
9678
9679           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9680                                                      key_name, dest_ca_pem,
9681                                                      timeouts)
9682       finally:
9683         helper.Cleanup()
9684
9685       # Check for backwards compatibility
9686       assert len(dresults) == len(instance.disks)
9687       assert compat.all(isinstance(i, bool) for i in dresults), \
9688              "Not all results are boolean: %r" % dresults
9689
9690     finally:
9691       if activate_disks:
9692         feedback_fn("Deactivating disks for %s" % instance.name)
9693         _ShutdownInstanceDisks(self, instance)
9694
9695     if not (compat.all(dresults) and fin_resu):
9696       failures = []
9697       if not fin_resu:
9698         failures.append("export finalization")
9699       if not compat.all(dresults):
9700         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9701                                if not dsk)
9702         failures.append("disk export: disk(s) %s" % fdsk)
9703
9704       raise errors.OpExecError("Export failed, errors in %s" %
9705                                utils.CommaJoin(failures))
9706
9707     # At this point, the export was successful, we can cleanup/finish
9708
9709     # Remove instance if requested
9710     if self.op.remove_instance:
9711       feedback_fn("Removing instance %s" % instance.name)
9712       _RemoveInstance(self, feedback_fn, instance,
9713                       self.op.ignore_remove_failures)
9714
9715     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9716       self._CleanupExports(feedback_fn)
9717
9718     return fin_resu, dresults
9719
9720
9721 class LURemoveExport(NoHooksLU):
9722   """Remove exports related to the named instance.
9723
9724   """
9725   _OP_PARAMS = [
9726     _PInstanceName,
9727     ]
9728   REQ_BGL = False
9729
9730   def ExpandNames(self):
9731     self.needed_locks = {}
9732     # We need all nodes to be locked in order for RemoveExport to work, but we
9733     # don't need to lock the instance itself, as nothing will happen to it (and
9734     # we can remove exports also for a removed instance)
9735     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9736
9737   def Exec(self, feedback_fn):
9738     """Remove any export.
9739
9740     """
9741     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9742     # If the instance was not found we'll try with the name that was passed in.
9743     # This will only work if it was an FQDN, though.
9744     fqdn_warn = False
9745     if not instance_name:
9746       fqdn_warn = True
9747       instance_name = self.op.instance_name
9748
9749     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9750     exportlist = self.rpc.call_export_list(locked_nodes)
9751     found = False
9752     for node in exportlist:
9753       msg = exportlist[node].fail_msg
9754       if msg:
9755         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9756         continue
9757       if instance_name in exportlist[node].payload:
9758         found = True
9759         result = self.rpc.call_export_remove(node, instance_name)
9760         msg = result.fail_msg
9761         if msg:
9762           logging.error("Could not remove export for instance %s"
9763                         " on node %s: %s", instance_name, node, msg)
9764
9765     if fqdn_warn and not found:
9766       feedback_fn("Export not found. If trying to remove an export belonging"
9767                   " to a deleted instance please use its Fully Qualified"
9768                   " Domain Name.")
9769
9770
9771 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9772   """Generic tags LU.
9773
9774   This is an abstract class which is the parent of all the other tags LUs.
9775
9776   """
9777
9778   def ExpandNames(self):
9779     self.needed_locks = {}
9780     if self.op.kind == constants.TAG_NODE:
9781       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9782       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9783     elif self.op.kind == constants.TAG_INSTANCE:
9784       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9785       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9786
9787   def CheckPrereq(self):
9788     """Check prerequisites.
9789
9790     """
9791     if self.op.kind == constants.TAG_CLUSTER:
9792       self.target = self.cfg.GetClusterInfo()
9793     elif self.op.kind == constants.TAG_NODE:
9794       self.target = self.cfg.GetNodeInfo(self.op.name)
9795     elif self.op.kind == constants.TAG_INSTANCE:
9796       self.target = self.cfg.GetInstanceInfo(self.op.name)
9797     else:
9798       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9799                                  str(self.op.kind), errors.ECODE_INVAL)
9800
9801
9802 class LUGetTags(TagsLU):
9803   """Returns the tags of a given object.
9804
9805   """
9806   _OP_PARAMS = [
9807     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9808     # Name is only meaningful for nodes and instances
9809     ("name", _NoDefault, _TMaybeString),
9810     ]
9811   REQ_BGL = False
9812
9813   def Exec(self, feedback_fn):
9814     """Returns the tag list.
9815
9816     """
9817     return list(self.target.GetTags())
9818
9819
9820 class LUSearchTags(NoHooksLU):
9821   """Searches the tags for a given pattern.
9822
9823   """
9824   _OP_PARAMS = [
9825     ("pattern", _NoDefault, _TNonEmptyString),
9826     ]
9827   REQ_BGL = False
9828
9829   def ExpandNames(self):
9830     self.needed_locks = {}
9831
9832   def CheckPrereq(self):
9833     """Check prerequisites.
9834
9835     This checks the pattern passed for validity by compiling it.
9836
9837     """
9838     try:
9839       self.re = re.compile(self.op.pattern)
9840     except re.error, err:
9841       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9842                                  (self.op.pattern, err), errors.ECODE_INVAL)
9843
9844   def Exec(self, feedback_fn):
9845     """Returns the tag list.
9846
9847     """
9848     cfg = self.cfg
9849     tgts = [("/cluster", cfg.GetClusterInfo())]
9850     ilist = cfg.GetAllInstancesInfo().values()
9851     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9852     nlist = cfg.GetAllNodesInfo().values()
9853     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9854     results = []
9855     for path, target in tgts:
9856       for tag in target.GetTags():
9857         if self.re.search(tag):
9858           results.append((path, tag))
9859     return results
9860
9861
9862 class LUAddTags(TagsLU):
9863   """Sets a tag on a given object.
9864
9865   """
9866   _OP_PARAMS = [
9867     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9868     # Name is only meaningful for nodes and instances
9869     ("name", _NoDefault, _TMaybeString),
9870     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9871     ]
9872   REQ_BGL = False
9873
9874   def CheckPrereq(self):
9875     """Check prerequisites.
9876
9877     This checks the type and length of the tag name and value.
9878
9879     """
9880     TagsLU.CheckPrereq(self)
9881     for tag in self.op.tags:
9882       objects.TaggableObject.ValidateTag(tag)
9883
9884   def Exec(self, feedback_fn):
9885     """Sets the tag.
9886
9887     """
9888     try:
9889       for tag in self.op.tags:
9890         self.target.AddTag(tag)
9891     except errors.TagError, err:
9892       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9893     self.cfg.Update(self.target, feedback_fn)
9894
9895
9896 class LUDelTags(TagsLU):
9897   """Delete a list of tags from a given object.
9898
9899   """
9900   _OP_PARAMS = [
9901     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9902     # Name is only meaningful for nodes and instances
9903     ("name", _NoDefault, _TMaybeString),
9904     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9905     ]
9906   REQ_BGL = False
9907
9908   def CheckPrereq(self):
9909     """Check prerequisites.
9910
9911     This checks that we have the given tag.
9912
9913     """
9914     TagsLU.CheckPrereq(self)
9915     for tag in self.op.tags:
9916       objects.TaggableObject.ValidateTag(tag)
9917     del_tags = frozenset(self.op.tags)
9918     cur_tags = self.target.GetTags()
9919     if not del_tags <= cur_tags:
9920       diff_tags = del_tags - cur_tags
9921       diff_names = ["'%s'" % tag for tag in diff_tags]
9922       diff_names.sort()
9923       raise errors.OpPrereqError("Tag(s) %s not found" %
9924                                  (",".join(diff_names)), errors.ECODE_NOENT)
9925
9926   def Exec(self, feedback_fn):
9927     """Remove the tag from the object.
9928
9929     """
9930     for tag in self.op.tags:
9931       self.target.RemoveTag(tag)
9932     self.cfg.Update(self.target, feedback_fn)
9933
9934
9935 class LUTestDelay(NoHooksLU):
9936   """Sleep for a specified amount of time.
9937
9938   This LU sleeps on the master and/or nodes for a specified amount of
9939   time.
9940
9941   """
9942   _OP_PARAMS = [
9943     ("duration", _NoDefault, _TFloat),
9944     ("on_master", True, _TBool),
9945     ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9946     ("repeat", 0, _TPositiveInt)
9947     ]
9948   REQ_BGL = False
9949
9950   def ExpandNames(self):
9951     """Expand names and set required locks.
9952
9953     This expands the node list, if any.
9954
9955     """
9956     self.needed_locks = {}
9957     if self.op.on_nodes:
9958       # _GetWantedNodes can be used here, but is not always appropriate to use
9959       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9960       # more information.
9961       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9962       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9963
9964   def _TestDelay(self):
9965     """Do the actual sleep.
9966
9967     """
9968     if self.op.on_master:
9969       if not utils.TestDelay(self.op.duration):
9970         raise errors.OpExecError("Error during master delay test")
9971     if self.op.on_nodes:
9972       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9973       for node, node_result in result.items():
9974         node_result.Raise("Failure during rpc call to node %s" % node)
9975
9976   def Exec(self, feedback_fn):
9977     """Execute the test delay opcode, with the wanted repetitions.
9978
9979     """
9980     if self.op.repeat == 0:
9981       self._TestDelay()
9982     else:
9983       top_value = self.op.repeat - 1
9984       for i in range(self.op.repeat):
9985         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9986         self._TestDelay()
9987
9988
9989 class LUTestJobqueue(NoHooksLU):
9990   """Utility LU to test some aspects of the job queue.
9991
9992   """
9993   _OP_PARAMS = [
9994     ("notify_waitlock", False, _TBool),
9995     ("notify_exec", False, _TBool),
9996     ("log_messages", _EmptyList, _TListOf(_TString)),
9997     ("fail", False, _TBool),
9998     ]
9999   REQ_BGL = False
10000
10001   # Must be lower than default timeout for WaitForJobChange to see whether it
10002   # notices changed jobs
10003   _CLIENT_CONNECT_TIMEOUT = 20.0
10004   _CLIENT_CONFIRM_TIMEOUT = 60.0
10005
10006   @classmethod
10007   def _NotifyUsingSocket(cls, cb, errcls):
10008     """Opens a Unix socket and waits for another program to connect.
10009
10010     @type cb: callable
10011     @param cb: Callback to send socket name to client
10012     @type errcls: class
10013     @param errcls: Exception class to use for errors
10014
10015     """
10016     # Using a temporary directory as there's no easy way to create temporary
10017     # sockets without writing a custom loop around tempfile.mktemp and
10018     # socket.bind
10019     tmpdir = tempfile.mkdtemp()
10020     try:
10021       tmpsock = utils.PathJoin(tmpdir, "sock")
10022
10023       logging.debug("Creating temporary socket at %s", tmpsock)
10024       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10025       try:
10026         sock.bind(tmpsock)
10027         sock.listen(1)
10028
10029         # Send details to client
10030         cb(tmpsock)
10031
10032         # Wait for client to connect before continuing
10033         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10034         try:
10035           (conn, _) = sock.accept()
10036         except socket.error, err:
10037           raise errcls("Client didn't connect in time (%s)" % err)
10038       finally:
10039         sock.close()
10040     finally:
10041       # Remove as soon as client is connected
10042       shutil.rmtree(tmpdir)
10043
10044     # Wait for client to close
10045     try:
10046       try:
10047         # pylint: disable-msg=E1101
10048         # Instance of '_socketobject' has no ... member
10049         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10050         conn.recv(1)
10051       except socket.error, err:
10052         raise errcls("Client failed to confirm notification (%s)" % err)
10053     finally:
10054       conn.close()
10055
10056   def _SendNotification(self, test, arg, sockname):
10057     """Sends a notification to the client.
10058
10059     @type test: string
10060     @param test: Test name
10061     @param arg: Test argument (depends on test)
10062     @type sockname: string
10063     @param sockname: Socket path
10064
10065     """
10066     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10067
10068   def _Notify(self, prereq, test, arg):
10069     """Notifies the client of a test.
10070
10071     @type prereq: bool
10072     @param prereq: Whether this is a prereq-phase test
10073     @type test: string
10074     @param test: Test name
10075     @param arg: Test argument (depends on test)
10076
10077     """
10078     if prereq:
10079       errcls = errors.OpPrereqError
10080     else:
10081       errcls = errors.OpExecError
10082
10083     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10084                                                   test, arg),
10085                                    errcls)
10086
10087   def CheckArguments(self):
10088     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10089     self.expandnames_calls = 0
10090
10091   def ExpandNames(self):
10092     checkargs_calls = getattr(self, "checkargs_calls", 0)
10093     if checkargs_calls < 1:
10094       raise errors.ProgrammerError("CheckArguments was not called")
10095
10096     self.expandnames_calls += 1
10097
10098     if self.op.notify_waitlock:
10099       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10100
10101     self.LogInfo("Expanding names")
10102
10103     # Get lock on master node (just to get a lock, not for a particular reason)
10104     self.needed_locks = {
10105       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10106       }
10107
10108   def Exec(self, feedback_fn):
10109     if self.expandnames_calls < 1:
10110       raise errors.ProgrammerError("ExpandNames was not called")
10111
10112     if self.op.notify_exec:
10113       self._Notify(False, constants.JQT_EXEC, None)
10114
10115     self.LogInfo("Executing")
10116
10117     if self.op.log_messages:
10118       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10119       for idx, msg in enumerate(self.op.log_messages):
10120         self.LogInfo("Sending log message %s", idx + 1)
10121         feedback_fn(constants.JQT_MSGPREFIX + msg)
10122         # Report how many test messages have been sent
10123         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10124
10125     if self.op.fail:
10126       raise errors.OpExecError("Opcode failure was requested")
10127
10128     return True
10129
10130
10131 class IAllocator(object):
10132   """IAllocator framework.
10133
10134   An IAllocator instance has three sets of attributes:
10135     - cfg that is needed to query the cluster
10136     - input data (all members of the _KEYS class attribute are required)
10137     - four buffer attributes (in|out_data|text), that represent the
10138       input (to the external script) in text and data structure format,
10139       and the output from it, again in two formats
10140     - the result variables from the script (success, info, nodes) for
10141       easy usage
10142
10143   """
10144   # pylint: disable-msg=R0902
10145   # lots of instance attributes
10146   _ALLO_KEYS = [
10147     "name", "mem_size", "disks", "disk_template",
10148     "os", "tags", "nics", "vcpus", "hypervisor",
10149     ]
10150   _RELO_KEYS = [
10151     "name", "relocate_from",
10152     ]
10153   _EVAC_KEYS = [
10154     "evac_nodes",
10155     ]
10156
10157   def __init__(self, cfg, rpc, mode, **kwargs):
10158     self.cfg = cfg
10159     self.rpc = rpc
10160     # init buffer variables
10161     self.in_text = self.out_text = self.in_data = self.out_data = None
10162     # init all input fields so that pylint is happy
10163     self.mode = mode
10164     self.mem_size = self.disks = self.disk_template = None
10165     self.os = self.tags = self.nics = self.vcpus = None
10166     self.hypervisor = None
10167     self.relocate_from = None
10168     self.name = None
10169     self.evac_nodes = None
10170     # computed fields
10171     self.required_nodes = None
10172     # init result fields
10173     self.success = self.info = self.result = None
10174     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10175       keyset = self._ALLO_KEYS
10176       fn = self._AddNewInstance
10177     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10178       keyset = self._RELO_KEYS
10179       fn = self._AddRelocateInstance
10180     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10181       keyset = self._EVAC_KEYS
10182       fn = self._AddEvacuateNodes
10183     else:
10184       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10185                                    " IAllocator" % self.mode)
10186     for key in kwargs:
10187       if key not in keyset:
10188         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10189                                      " IAllocator" % key)
10190       setattr(self, key, kwargs[key])
10191
10192     for key in keyset:
10193       if key not in kwargs:
10194         raise errors.ProgrammerError("Missing input parameter '%s' to"
10195                                      " IAllocator" % key)
10196     self._BuildInputData(fn)
10197
10198   def _ComputeClusterData(self):
10199     """Compute the generic allocator input data.
10200
10201     This is the data that is independent of the actual operation.
10202
10203     """
10204     cfg = self.cfg
10205     cluster_info = cfg.GetClusterInfo()
10206     # cluster data
10207     data = {
10208       "version": constants.IALLOCATOR_VERSION,
10209       "cluster_name": cfg.GetClusterName(),
10210       "cluster_tags": list(cluster_info.GetTags()),
10211       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10212       # we don't have job IDs
10213       }
10214     iinfo = cfg.GetAllInstancesInfo().values()
10215     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10216
10217     # node data
10218     node_results = {}
10219     node_list = cfg.GetNodeList()
10220
10221     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10222       hypervisor_name = self.hypervisor
10223     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10224       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10225     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10226       hypervisor_name = cluster_info.enabled_hypervisors[0]
10227
10228     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10229                                         hypervisor_name)
10230     node_iinfo = \
10231       self.rpc.call_all_instances_info(node_list,
10232                                        cluster_info.enabled_hypervisors)
10233     for nname, nresult in node_data.items():
10234       # first fill in static (config-based) values
10235       ninfo = cfg.GetNodeInfo(nname)
10236       pnr = {
10237         "tags": list(ninfo.GetTags()),
10238         "primary_ip": ninfo.primary_ip,
10239         "secondary_ip": ninfo.secondary_ip,
10240         "offline": ninfo.offline,
10241         "drained": ninfo.drained,
10242         "master_candidate": ninfo.master_candidate,
10243         }
10244
10245       if not (ninfo.offline or ninfo.drained):
10246         nresult.Raise("Can't get data for node %s" % nname)
10247         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10248                                 nname)
10249         remote_info = nresult.payload
10250
10251         for attr in ['memory_total', 'memory_free', 'memory_dom0',
10252                      'vg_size', 'vg_free', 'cpu_total']:
10253           if attr not in remote_info:
10254             raise errors.OpExecError("Node '%s' didn't return attribute"
10255                                      " '%s'" % (nname, attr))
10256           if not isinstance(remote_info[attr], int):
10257             raise errors.OpExecError("Node '%s' returned invalid value"
10258                                      " for '%s': %s" %
10259                                      (nname, attr, remote_info[attr]))
10260         # compute memory used by primary instances
10261         i_p_mem = i_p_up_mem = 0
10262         for iinfo, beinfo in i_list:
10263           if iinfo.primary_node == nname:
10264             i_p_mem += beinfo[constants.BE_MEMORY]
10265             if iinfo.name not in node_iinfo[nname].payload:
10266               i_used_mem = 0
10267             else:
10268               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10269             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10270             remote_info['memory_free'] -= max(0, i_mem_diff)
10271
10272             if iinfo.admin_up:
10273               i_p_up_mem += beinfo[constants.BE_MEMORY]
10274
10275         # compute memory used by instances
10276         pnr_dyn = {
10277           "total_memory": remote_info['memory_total'],
10278           "reserved_memory": remote_info['memory_dom0'],
10279           "free_memory": remote_info['memory_free'],
10280           "total_disk": remote_info['vg_size'],
10281           "free_disk": remote_info['vg_free'],
10282           "total_cpus": remote_info['cpu_total'],
10283           "i_pri_memory": i_p_mem,
10284           "i_pri_up_memory": i_p_up_mem,
10285           }
10286         pnr.update(pnr_dyn)
10287
10288       node_results[nname] = pnr
10289     data["nodes"] = node_results
10290
10291     # instance data
10292     instance_data = {}
10293     for iinfo, beinfo in i_list:
10294       nic_data = []
10295       for nic in iinfo.nics:
10296         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10297         nic_dict = {"mac": nic.mac,
10298                     "ip": nic.ip,
10299                     "mode": filled_params[constants.NIC_MODE],
10300                     "link": filled_params[constants.NIC_LINK],
10301                    }
10302         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10303           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10304         nic_data.append(nic_dict)
10305       pir = {
10306         "tags": list(iinfo.GetTags()),
10307         "admin_up": iinfo.admin_up,
10308         "vcpus": beinfo[constants.BE_VCPUS],
10309         "memory": beinfo[constants.BE_MEMORY],
10310         "os": iinfo.os,
10311         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10312         "nics": nic_data,
10313         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10314         "disk_template": iinfo.disk_template,
10315         "hypervisor": iinfo.hypervisor,
10316         }
10317       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10318                                                  pir["disks"])
10319       instance_data[iinfo.name] = pir
10320
10321     data["instances"] = instance_data
10322
10323     self.in_data = data
10324
10325   def _AddNewInstance(self):
10326     """Add new instance data to allocator structure.
10327
10328     This in combination with _AllocatorGetClusterData will create the
10329     correct structure needed as input for the allocator.
10330
10331     The checks for the completeness of the opcode must have already been
10332     done.
10333
10334     """
10335     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10336
10337     if self.disk_template in constants.DTS_NET_MIRROR:
10338       self.required_nodes = 2
10339     else:
10340       self.required_nodes = 1
10341     request = {
10342       "name": self.name,
10343       "disk_template": self.disk_template,
10344       "tags": self.tags,
10345       "os": self.os,
10346       "vcpus": self.vcpus,
10347       "memory": self.mem_size,
10348       "disks": self.disks,
10349       "disk_space_total": disk_space,
10350       "nics": self.nics,
10351       "required_nodes": self.required_nodes,
10352       }
10353     return request
10354
10355   def _AddRelocateInstance(self):
10356     """Add relocate instance data to allocator structure.
10357
10358     This in combination with _IAllocatorGetClusterData will create the
10359     correct structure needed as input for the allocator.
10360
10361     The checks for the completeness of the opcode must have already been
10362     done.
10363
10364     """
10365     instance = self.cfg.GetInstanceInfo(self.name)
10366     if instance is None:
10367       raise errors.ProgrammerError("Unknown instance '%s' passed to"
10368                                    " IAllocator" % self.name)
10369
10370     if instance.disk_template not in constants.DTS_NET_MIRROR:
10371       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10372                                  errors.ECODE_INVAL)
10373
10374     if len(instance.secondary_nodes) != 1:
10375       raise errors.OpPrereqError("Instance has not exactly one secondary node",
10376                                  errors.ECODE_STATE)
10377
10378     self.required_nodes = 1
10379     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10380     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10381
10382     request = {
10383       "name": self.name,
10384       "disk_space_total": disk_space,
10385       "required_nodes": self.required_nodes,
10386       "relocate_from": self.relocate_from,
10387       }
10388     return request
10389
10390   def _AddEvacuateNodes(self):
10391     """Add evacuate nodes data to allocator structure.
10392
10393     """
10394     request = {
10395       "evac_nodes": self.evac_nodes
10396       }
10397     return request
10398
10399   def _BuildInputData(self, fn):
10400     """Build input data structures.
10401
10402     """
10403     self._ComputeClusterData()
10404
10405     request = fn()
10406     request["type"] = self.mode
10407     self.in_data["request"] = request
10408
10409     self.in_text = serializer.Dump(self.in_data)
10410
10411   def Run(self, name, validate=True, call_fn=None):
10412     """Run an instance allocator and return the results.
10413
10414     """
10415     if call_fn is None:
10416       call_fn = self.rpc.call_iallocator_runner
10417
10418     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10419     result.Raise("Failure while running the iallocator script")
10420
10421     self.out_text = result.payload
10422     if validate:
10423       self._ValidateResult()
10424
10425   def _ValidateResult(self):
10426     """Process the allocator results.
10427
10428     This will process and if successful save the result in
10429     self.out_data and the other parameters.
10430
10431     """
10432     try:
10433       rdict = serializer.Load(self.out_text)
10434     except Exception, err:
10435       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10436
10437     if not isinstance(rdict, dict):
10438       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10439
10440     # TODO: remove backwards compatiblity in later versions
10441     if "nodes" in rdict and "result" not in rdict:
10442       rdict["result"] = rdict["nodes"]
10443       del rdict["nodes"]
10444
10445     for key in "success", "info", "result":
10446       if key not in rdict:
10447         raise errors.OpExecError("Can't parse iallocator results:"
10448                                  " missing key '%s'" % key)
10449       setattr(self, key, rdict[key])
10450
10451     if not isinstance(rdict["result"], list):
10452       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10453                                " is not a list")
10454     self.out_data = rdict
10455
10456
10457 class LUTestAllocator(NoHooksLU):
10458   """Run allocator tests.
10459
10460   This LU runs the allocator tests
10461
10462   """
10463   _OP_PARAMS = [
10464     ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10465     ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10466     ("name", _NoDefault, _TNonEmptyString),
10467     ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10468       _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10469                _TOr(_TNone, _TNonEmptyString))))),
10470     ("disks", _NoDefault, _TOr(_TNone, _TList)),
10471     ("hypervisor", None, _TMaybeString),
10472     ("allocator", None, _TMaybeString),
10473     ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10474     ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10475     ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10476     ("os", None, _TMaybeString),
10477     ("disk_template", None, _TMaybeString),
10478     ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10479     ]
10480
10481   def CheckPrereq(self):
10482     """Check prerequisites.
10483
10484     This checks the opcode parameters depending on the director and mode test.
10485
10486     """
10487     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10488       for attr in ["mem_size", "disks", "disk_template",
10489                    "os", "tags", "nics", "vcpus"]:
10490         if not hasattr(self.op, attr):
10491           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10492                                      attr, errors.ECODE_INVAL)
10493       iname = self.cfg.ExpandInstanceName(self.op.name)
10494       if iname is not None:
10495         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10496                                    iname, errors.ECODE_EXISTS)
10497       if not isinstance(self.op.nics, list):
10498         raise errors.OpPrereqError("Invalid parameter 'nics'",
10499                                    errors.ECODE_INVAL)
10500       if not isinstance(self.op.disks, list):
10501         raise errors.OpPrereqError("Invalid parameter 'disks'",
10502                                    errors.ECODE_INVAL)
10503       for row in self.op.disks:
10504         if (not isinstance(row, dict) or
10505             "size" not in row or
10506             not isinstance(row["size"], int) or
10507             "mode" not in row or
10508             row["mode"] not in ['r', 'w']):
10509           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10510                                      " parameter", errors.ECODE_INVAL)
10511       if self.op.hypervisor is None:
10512         self.op.hypervisor = self.cfg.GetHypervisorType()
10513     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10514       fname = _ExpandInstanceName(self.cfg, self.op.name)
10515       self.op.name = fname
10516       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10517     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10518       if not hasattr(self.op, "evac_nodes"):
10519         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10520                                    " opcode input", errors.ECODE_INVAL)
10521     else:
10522       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10523                                  self.op.mode, errors.ECODE_INVAL)
10524
10525     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10526       if self.op.allocator is None:
10527         raise errors.OpPrereqError("Missing allocator name",
10528                                    errors.ECODE_INVAL)
10529     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10530       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10531                                  self.op.direction, errors.ECODE_INVAL)
10532
10533   def Exec(self, feedback_fn):
10534     """Run the allocator test.
10535
10536     """
10537     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10538       ial = IAllocator(self.cfg, self.rpc,
10539                        mode=self.op.mode,
10540                        name=self.op.name,
10541                        mem_size=self.op.mem_size,
10542                        disks=self.op.disks,
10543                        disk_template=self.op.disk_template,
10544                        os=self.op.os,
10545                        tags=self.op.tags,
10546                        nics=self.op.nics,
10547                        vcpus=self.op.vcpus,
10548                        hypervisor=self.op.hypervisor,
10549                        )
10550     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10551       ial = IAllocator(self.cfg, self.rpc,
10552                        mode=self.op.mode,
10553                        name=self.op.name,
10554                        relocate_from=list(self.relocate_from),
10555                        )
10556     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10557       ial = IAllocator(self.cfg, self.rpc,
10558                        mode=self.op.mode,
10559                        evac_nodes=self.op.evac_nodes)
10560     else:
10561       raise errors.ProgrammerError("Uncatched mode %s in"
10562                                    " LUTestAllocator.Exec", self.op.mode)
10563
10564     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10565       result = ial.in_text
10566     else:
10567       ial.Run(self.op.allocator, validate=False)
10568       result = ial.out_text
10569     return result