code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti.masterd import iallocator
  64
  65 import ganeti.masterd.instance # pylint: disable=W0611
  66
  67
  68 # States of instance
  69 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  70 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  71 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  72
  73 #: Instance status in which an instance can be marked as offline/online
  74 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  75   constants.ADMINST_OFFLINE,
  76   ]))
  77
  78
  79 class ResultWithJobs:
  80   """Data container for LU results with jobs.
  81
  82   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  83   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  84   contained in the C{jobs} attribute and include the job IDs in the opcode
  85   result.
  86
  87   """
  88   def __init__(self, jobs, **kwargs):
  89     """Initializes this class.
  90
  91     Additional return values can be specified as keyword arguments.
  92
  93     @type jobs: list of lists of L{opcode.OpCode}
  94     @param jobs: A list of lists of opcode objects
  95
  96     """
  97     self.jobs = jobs
  98     self.other = kwargs
  99
 100
 101 class LogicalUnit(object):
 102   """Logical Unit base class.
 103
 104   Subclasses must follow these rules:
 105     - implement ExpandNames
 106     - implement CheckPrereq (except when tasklets are used)
 107     - implement Exec (except when tasklets are used)
 108     - implement BuildHooksEnv
 109     - implement BuildHooksNodes
 110     - redefine HPATH and HTYPE
 111     - optionally redefine their run requirements:
 112         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 113
 114   Note that all commands require root permissions.
 115
 116   @ivar dry_run_result: the value (if any) that will be returned to the caller
 117       in dry-run mode (signalled by opcode dry_run parameter)
 118
 119   """
 120   HPATH = None
 121   HTYPE = None
 122   REQ_BGL = True
 123
 124   def __init__(self, processor, op, context, rpc_runner):
 125     """Constructor for LogicalUnit.
 126
 127     This needs to be overridden in derived classes in order to check op
 128     validity.
 129
 130     """
 131     self.proc = processor
 132     self.op = op
 133     self.cfg = context.cfg
 134     self.glm = context.glm
 135     # readability alias
 136     self.owned_locks = context.glm.list_owned
 137     self.context = context
 138     self.rpc = rpc_runner
 139     # Dicts used to declare locking needs to mcpu
 140     self.needed_locks = None
 141     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 142     self.add_locks = {}
 143     self.remove_locks = {}
 144     # Used to force good behavior when calling helper functions
 145     self.recalculate_locks = {}
 146     # logging
 147     self.Log = processor.Log # pylint: disable=C0103
 148     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 149     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 150     self.LogStep = processor.LogStep # pylint: disable=C0103
 151     # support for dry-run
 152     self.dry_run_result = None
 153     # support for generic debug attribute
 154     if (not hasattr(self.op, "debug_level") or
 155         not isinstance(self.op.debug_level, int)):
 156       self.op.debug_level = 0
 157
 158     # Tasklets
 159     self.tasklets = None
 160
 161     # Validate opcode parameters and set defaults
 162     self.op.Validate(True)
 163
 164     self.CheckArguments()
 165
 166   def CheckArguments(self):
 167     """Check syntactic validity for the opcode arguments.
 168
 169     This method is for doing a simple syntactic check and ensure
 170     validity of opcode parameters, without any cluster-related
 171     checks. While the same can be accomplished in ExpandNames and/or
 172     CheckPrereq, doing these separate is better because:
 173
 174       - ExpandNames is left as as purely a lock-related function
 175       - CheckPrereq is run after we have acquired locks (and possible
 176         waited for them)
 177
 178     The function is allowed to change the self.op attribute so that
 179     later methods can no longer worry about missing parameters.
 180
 181     """
 182     pass
 183
 184   def ExpandNames(self):
 185     """Expand names for this LU.
 186
 187     This method is called before starting to execute the opcode, and it should
 188     update all the parameters of the opcode to their canonical form (e.g. a
 189     short node name must be fully expanded after this method has successfully
 190     completed). This way locking, hooks, logging, etc. can work correctly.
 191
 192     LUs which implement this method must also populate the self.needed_locks
 193     member, as a dict with lock levels as keys, and a list of needed lock names
 194     as values. Rules:
 195
 196       - use an empty dict if you don't need any lock
 197       - if you don't need any lock at a particular level omit that
 198         level (note that in this case C{DeclareLocks} won't be called
 199         at all for that level)
 200       - if you need locks at a level, but you can't calculate it in
 201         this function, initialise that level with an empty list and do
 202         further processing in L{LogicalUnit.DeclareLocks} (see that
 203         function's docstring)
 204       - don't put anything for the BGL level
 205       - if you want all locks at a level use L{locking.ALL_SET} as a value
 206
 207     If you need to share locks (rather than acquire them exclusively) at one
 208     level you can modify self.share_locks, setting a true value (usually 1) for
 209     that level. By default locks are not shared.
 210
 211     This function can also define a list of tasklets, which then will be
 212     executed in order instead of the usual LU-level CheckPrereq and Exec
 213     functions, if those are not defined by the LU.
 214
 215     Examples::
 216
 217       # Acquire all nodes and one instance
 218       self.needed_locks = {
 219         locking.LEVEL_NODE: locking.ALL_SET,
 220         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 221       }
 222       # Acquire just two nodes
 223       self.needed_locks = {
 224         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 225       }
 226       # Acquire no locks
 227       self.needed_locks = {} # No, you can't leave it to the default value None
 228
 229     """
 230     # The implementation of this method is mandatory only if the new LU is
 231     # concurrent, so that old LUs don't need to be changed all at the same
 232     # time.
 233     if self.REQ_BGL:
 234       self.needed_locks = {} # Exclusive LUs don't need locks.
 235     else:
 236       raise NotImplementedError
 237
 238   def DeclareLocks(self, level):
 239     """Declare LU locking needs for a level
 240
 241     While most LUs can just declare their locking needs at ExpandNames time,
 242     sometimes there's the need to calculate some locks after having acquired
 243     the ones before. This function is called just before acquiring locks at a
 244     particular level, but after acquiring the ones at lower levels, and permits
 245     such calculations. It can be used to modify self.needed_locks, and by
 246     default it does nothing.
 247
 248     This function is only called if you have something already set in
 249     self.needed_locks for the level.
 250
 251     @param level: Locking level which is going to be locked
 252     @type level: member of L{ganeti.locking.LEVELS}
 253
 254     """
 255
 256   def CheckPrereq(self):
 257     """Check prerequisites for this LU.
 258
 259     This method should check that the prerequisites for the execution
 260     of this LU are fulfilled. It can do internode communication, but
 261     it should be idempotent - no cluster or system changes are
 262     allowed.
 263
 264     The method should raise errors.OpPrereqError in case something is
 265     not fulfilled. Its return value is ignored.
 266
 267     This method should also update all the parameters of the opcode to
 268     their canonical form if it hasn't been done by ExpandNames before.
 269
 270     """
 271     if self.tasklets is not None:
 272       for (idx, tl) in enumerate(self.tasklets):
 273         logging.debug("Checking prerequisites for tasklet %s/%s",
 274                       idx + 1, len(self.tasklets))
 275         tl.CheckPrereq()
 276     else:
 277       pass
 278
 279   def Exec(self, feedback_fn):
 280     """Execute the LU.
 281
 282     This method should implement the actual work. It should raise
 283     errors.OpExecError for failures that are somewhat dealt with in
 284     code, or expected.
 285
 286     """
 287     if self.tasklets is not None:
 288       for (idx, tl) in enumerate(self.tasklets):
 289         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 290         tl.Exec(feedback_fn)
 291     else:
 292       raise NotImplementedError
 293
 294   def BuildHooksEnv(self):
 295     """Build hooks environment for this LU.
 296
 297     @rtype: dict
 298     @return: Dictionary containing the environment that will be used for
 299       running the hooks for this LU. The keys of the dict must not be prefixed
 300       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 301       will extend the environment with additional variables. If no environment
 302       should be defined, an empty dictionary should be returned (not C{None}).
 303     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 304       will not be called.
 305
 306     """
 307     raise NotImplementedError
 308
 309   def BuildHooksNodes(self):
 310     """Build list of nodes to run LU's hooks.
 311
 312     @rtype: tuple; (list, list)
 313     @return: Tuple containing a list of node names on which the hook
 314       should run before the execution and a list of node names on which the
 315       hook should run after the execution. No nodes should be returned as an
 316       empty list (and not None).
 317     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 318       will not be called.
 319
 320     """
 321     raise NotImplementedError
 322
 323   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 324     """Notify the LU about the results of its hooks.
 325
 326     This method is called every time a hooks phase is executed, and notifies
 327     the Logical Unit about the hooks' result. The LU can then use it to alter
 328     its result based on the hooks.  By default the method does nothing and the
 329     previous result is passed back unchanged but any LU can define it if it
 330     wants to use the local cluster hook-scripts somehow.
 331
 332     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 333         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 334     @param hook_results: the results of the multi-node hooks rpc call
 335     @param feedback_fn: function used send feedback back to the caller
 336     @param lu_result: the previous Exec result this LU had, or None
 337         in the PRE phase
 338     @return: the new Exec result, based on the previous result
 339         and hook results
 340
 341     """
 342     # API must be kept, thus we ignore the unused argument and could
 343     # be a function warnings
 344     # pylint: disable=W0613,R0201
 345     return lu_result
 346
 347   def _ExpandAndLockInstance(self):
 348     """Helper function to expand and lock an instance.
 349
 350     Many LUs that work on an instance take its name in self.op.instance_name
 351     and need to expand it and then declare the expanded name for locking. This
 352     function does it, and then updates self.op.instance_name to the expanded
 353     name. It also initializes needed_locks as a dict, if this hasn't been done
 354     before.
 355
 356     """
 357     if self.needed_locks is None:
 358       self.needed_locks = {}
 359     else:
 360       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 361         "_ExpandAndLockInstance called with instance-level locks set"
 362     self.op.instance_name = _ExpandInstanceName(self.cfg,
 363                                                 self.op.instance_name)
 364     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 365
 366   def _LockInstancesNodes(self, primary_only=False,
 367                           level=locking.LEVEL_NODE):
 368     """Helper function to declare instances' nodes for locking.
 369
 370     This function should be called after locking one or more instances to lock
 371     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 372     with all primary or secondary nodes for instances already locked and
 373     present in self.needed_locks[locking.LEVEL_INSTANCE].
 374
 375     It should be called from DeclareLocks, and for safety only works if
 376     self.recalculate_locks[locking.LEVEL_NODE] is set.
 377
 378     In the future it may grow parameters to just lock some instance's nodes, or
 379     to just lock primaries or secondary nodes, if needed.
 380
 381     If should be called in DeclareLocks in a way similar to::
 382
 383       if level == locking.LEVEL_NODE:
 384         self._LockInstancesNodes()
 385
 386     @type primary_only: boolean
 387     @param primary_only: only lock primary nodes of locked instances
 388     @param level: Which lock level to use for locking nodes
 389
 390     """
 391     assert level in self.recalculate_locks, \
 392       "_LockInstancesNodes helper function called with no nodes to recalculate"
 393
 394     # TODO: check if we're really been called with the instance locks held
 395
 396     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 397     # future we might want to have different behaviors depending on the value
 398     # of self.recalculate_locks[locking.LEVEL_NODE]
 399     wanted_nodes = []
 400     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 401     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 402       wanted_nodes.append(instance.primary_node)
 403       if not primary_only:
 404         wanted_nodes.extend(instance.secondary_nodes)
 405
 406     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 407       self.needed_locks[level] = wanted_nodes
 408     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 409       self.needed_locks[level].extend(wanted_nodes)
 410     else:
 411       raise errors.ProgrammerError("Unknown recalculation mode")
 412
 413     del self.recalculate_locks[level]
 414
 415
 416 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 417   """Simple LU which runs no hooks.
 418
 419   This LU is intended as a parent for other LogicalUnits which will
 420   run no hooks, in order to reduce duplicate code.
 421
 422   """
 423   HPATH = None
 424   HTYPE = None
 425
 426   def BuildHooksEnv(self):
 427     """Empty BuildHooksEnv for NoHooksLu.
 428
 429     This just raises an error.
 430
 431     """
 432     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 433
 434   def BuildHooksNodes(self):
 435     """Empty BuildHooksNodes for NoHooksLU.
 436
 437     """
 438     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 439
 440
 441 class Tasklet:
 442   """Tasklet base class.
 443
 444   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 445   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 446   tasklets know nothing about locks.
 447
 448   Subclasses must follow these rules:
 449     - Implement CheckPrereq
 450     - Implement Exec
 451
 452   """
 453   def __init__(self, lu):
 454     self.lu = lu
 455
 456     # Shortcuts
 457     self.cfg = lu.cfg
 458     self.rpc = lu.rpc
 459
 460   def CheckPrereq(self):
 461     """Check prerequisites for this tasklets.
 462
 463     This method should check whether the prerequisites for the execution of
 464     this tasklet are fulfilled. It can do internode communication, but it
 465     should be idempotent - no cluster or system changes are allowed.
 466
 467     The method should raise errors.OpPrereqError in case something is not
 468     fulfilled. Its return value is ignored.
 469
 470     This method should also update all parameters to their canonical form if it
 471     hasn't been done before.
 472
 473     """
 474     pass
 475
 476   def Exec(self, feedback_fn):
 477     """Execute the tasklet.
 478
 479     This method should implement the actual work. It should raise
 480     errors.OpExecError for failures that are somewhat dealt with in code, or
 481     expected.
 482
 483     """
 484     raise NotImplementedError
 485
 486
 487 class _QueryBase:
 488   """Base for query utility classes.
 489
 490   """
 491   #: Attribute holding field definitions
 492   FIELDS = None
 493
 494   #: Field to sort by
 495   SORT_FIELD = "name"
 496
 497   def __init__(self, qfilter, fields, use_locking):
 498     """Initializes this class.
 499
 500     """
 501     self.use_locking = use_locking
 502
 503     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 504                              namefield=self.SORT_FIELD)
 505     self.requested_data = self.query.RequestedData()
 506     self.names = self.query.RequestedNames()
 507
 508     # Sort only if no names were requested
 509     self.sort_by_name = not self.names
 510
 511     self.do_locking = None
 512     self.wanted = None
 513
 514   def _GetNames(self, lu, all_names, lock_level):
 515     """Helper function to determine names asked for in the query.
 516
 517     """
 518     if self.do_locking:
 519       names = lu.owned_locks(lock_level)
 520     else:
 521       names = all_names
 522
 523     if self.wanted == locking.ALL_SET:
 524       assert not self.names
 525       # caller didn't specify names, so ordering is not important
 526       return utils.NiceSort(names)
 527
 528     # caller specified names and we must keep the same order
 529     assert self.names
 530     assert not self.do_locking or lu.glm.is_owned(lock_level)
 531
 532     missing = set(self.wanted).difference(names)
 533     if missing:
 534       raise errors.OpExecError("Some items were removed before retrieving"
 535                                " their data: %s" % missing)
 536
 537     # Return expanded names
 538     return self.wanted
 539
 540   def ExpandNames(self, lu):
 541     """Expand names for this query.
 542
 543     See L{LogicalUnit.ExpandNames}.
 544
 545     """
 546     raise NotImplementedError()
 547
 548   def DeclareLocks(self, lu, level):
 549     """Declare locks for this query.
 550
 551     See L{LogicalUnit.DeclareLocks}.
 552
 553     """
 554     raise NotImplementedError()
 555
 556   def _GetQueryData(self, lu):
 557     """Collects all data for this query.
 558
 559     @return: Query data object
 560
 561     """
 562     raise NotImplementedError()
 563
 564   def NewStyleQuery(self, lu):
 565     """Collect data and execute query.
 566
 567     """
 568     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 569                                   sort_by_name=self.sort_by_name)
 570
 571   def OldStyleQuery(self, lu):
 572     """Collect data and execute query.
 573
 574     """
 575     return self.query.OldStyleQuery(self._GetQueryData(lu),
 576                                     sort_by_name=self.sort_by_name)
 577
 578
 579 def _ShareAll():
 580   """Returns a dict declaring all lock levels shared.
 581
 582   """
 583   return dict.fromkeys(locking.LEVELS, 1)
 584
 585
 586 def _AnnotateDiskParams(instance, devs, cfg):
 587   """Little helper wrapper to the rpc annotation method.
 588
 589   @param instance: The instance object
 590   @type devs: List of L{objects.Disk}
 591   @param devs: The root devices (not any of its children!)
 592   @param cfg: The config object
 593   @returns The annotated disk copies
 594   @see L{rpc.AnnotateDiskParams}
 595
 596   """
 597   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 598                                 cfg.GetInstanceDiskParams(instance))
 599
 600
 601 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 602                               cur_group_uuid):
 603   """Checks if node groups for locked instances are still correct.
 604
 605   @type cfg: L{config.ConfigWriter}
 606   @param cfg: Cluster configuration
 607   @type instances: dict; string as key, L{objects.Instance} as value
 608   @param instances: Dictionary, instance name as key, instance object as value
 609   @type owned_groups: iterable of string
 610   @param owned_groups: List of owned groups
 611   @type owned_nodes: iterable of string
 612   @param owned_nodes: List of owned nodes
 613   @type cur_group_uuid: string or None
 614   @param cur_group_uuid: Optional group UUID to check against instance's groups
 615
 616   """
 617   for (name, inst) in instances.items():
 618     assert owned_nodes.issuperset(inst.all_nodes), \
 619       "Instance %s's nodes changed while we kept the lock" % name
 620
 621     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 622
 623     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 624       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 625
 626
 627 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 628                              primary_only=False):
 629   """Checks if the owned node groups are still correct for an instance.
 630
 631   @type cfg: L{config.ConfigWriter}
 632   @param cfg: The cluster configuration
 633   @type instance_name: string
 634   @param instance_name: Instance name
 635   @type owned_groups: set or frozenset
 636   @param owned_groups: List of currently owned node groups
 637   @type primary_only: boolean
 638   @param primary_only: Whether to check node groups for only the primary node
 639
 640   """
 641   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 642
 643   if not owned_groups.issuperset(inst_groups):
 644     raise errors.OpPrereqError("Instance %s's node groups changed since"
 645                                " locks were acquired, current groups are"
 646                                " are '%s', owning groups '%s'; retry the"
 647                                " operation" %
 648                                (instance_name,
 649                                 utils.CommaJoin(inst_groups),
 650                                 utils.CommaJoin(owned_groups)),
 651                                errors.ECODE_STATE)
 652
 653   return inst_groups
 654
 655
 656 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 657   """Checks if the instances in a node group are still correct.
 658
 659   @type cfg: L{config.ConfigWriter}
 660   @param cfg: The cluster configuration
 661   @type group_uuid: string
 662   @param group_uuid: Node group UUID
 663   @type owned_instances: set or frozenset
 664   @param owned_instances: List of currently owned instances
 665
 666   """
 667   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 668   if owned_instances != wanted_instances:
 669     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 670                                " locks were acquired, wanted '%s', have '%s';"
 671                                " retry the operation" %
 672                                (group_uuid,
 673                                 utils.CommaJoin(wanted_instances),
 674                                 utils.CommaJoin(owned_instances)),
 675                                errors.ECODE_STATE)
 676
 677   return wanted_instances
 678
 679
 680 def _SupportsOob(cfg, node):
 681   """Tells if node supports OOB.
 682
 683   @type cfg: L{config.ConfigWriter}
 684   @param cfg: The cluster configuration
 685   @type node: L{objects.Node}
 686   @param node: The node
 687   @return: The OOB script if supported or an empty string otherwise
 688
 689   """
 690   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 691
 692
 693 def _GetWantedNodes(lu, nodes):
 694   """Returns list of checked and expanded node names.
 695
 696   @type lu: L{LogicalUnit}
 697   @param lu: the logical unit on whose behalf we execute
 698   @type nodes: list
 699   @param nodes: list of node names or None for all nodes
 700   @rtype: list
 701   @return: the list of nodes, sorted
 702   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 703
 704   """
 705   if nodes:
 706     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 707
 708   return utils.NiceSort(lu.cfg.GetNodeList())
 709
 710
 711 def _GetWantedInstances(lu, instances):
 712   """Returns list of checked and expanded instance names.
 713
 714   @type lu: L{LogicalUnit}
 715   @param lu: the logical unit on whose behalf we execute
 716   @type instances: list
 717   @param instances: list of instance names or None for all instances
 718   @rtype: list
 719   @return: the list of instances, sorted
 720   @raise errors.OpPrereqError: if the instances parameter is wrong type
 721   @raise errors.OpPrereqError: if any of the passed instances is not found
 722
 723   """
 724   if instances:
 725     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 726   else:
 727     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 728   return wanted
 729
 730
 731 def _GetUpdatedParams(old_params, update_dict,
 732                       use_default=True, use_none=False):
 733   """Return the new version of a parameter dictionary.
 734
 735   @type old_params: dict
 736   @param old_params: old parameters
 737   @type update_dict: dict
 738   @param update_dict: dict containing new parameter values, or
 739       constants.VALUE_DEFAULT to reset the parameter to its default
 740       value
 741   @param use_default: boolean
 742   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 743       values as 'to be deleted' values
 744   @param use_none: boolean
 745   @type use_none: whether to recognise C{None} values as 'to be
 746       deleted' values
 747   @rtype: dict
 748   @return: the new parameter dictionary
 749
 750   """
 751   params_copy = copy.deepcopy(old_params)
 752   for key, val in update_dict.iteritems():
 753     if ((use_default and val == constants.VALUE_DEFAULT) or
 754         (use_none and val is None)):
 755       try:
 756         del params_copy[key]
 757       except KeyError:
 758         pass
 759     else:
 760       params_copy[key] = val
 761   return params_copy
 762
 763
 764 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 765   """Return the new version of a instance policy.
 766
 767   @param group_policy: whether this policy applies to a group and thus
 768     we should support removal of policy entries
 769
 770   """
 771   use_none = use_default = group_policy
 772   ipolicy = copy.deepcopy(old_ipolicy)
 773   for key, value in new_ipolicy.items():
 774     if key not in constants.IPOLICY_ALL_KEYS:
 775       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 776                                  errors.ECODE_INVAL)
 777     if key in constants.IPOLICY_ISPECS:
 778       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 779       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 780                                        use_none=use_none,
 781                                        use_default=use_default)
 782     else:
 783       if (not value or value == [constants.VALUE_DEFAULT] or
 784           value == constants.VALUE_DEFAULT):
 785         if group_policy:
 786           del ipolicy[key]
 787         else:
 788           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 789                                      " on the cluster'" % key,
 790                                      errors.ECODE_INVAL)
 791       else:
 792         if key in constants.IPOLICY_PARAMETERS:
 793           # FIXME: we assume all such values are float
 794           try:
 795             ipolicy[key] = float(value)
 796           except (TypeError, ValueError), err:
 797             raise errors.OpPrereqError("Invalid value for attribute"
 798                                        " '%s': '%s', error: %s" %
 799                                        (key, value, err), errors.ECODE_INVAL)
 800         else:
 801           # FIXME: we assume all others are lists; this should be redone
 802           # in a nicer way
 803           ipolicy[key] = list(value)
 804   try:
 805     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 806   except errors.ConfigurationError, err:
 807     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 808                                errors.ECODE_INVAL)
 809   return ipolicy
 810
 811
 812 def _UpdateAndVerifySubDict(base, updates, type_check):
 813   """Updates and verifies a dict with sub dicts of the same type.
 814
 815   @param base: The dict with the old data
 816   @param updates: The dict with the new data
 817   @param type_check: Dict suitable to ForceDictType to verify correct types
 818   @returns: A new dict with updated and verified values
 819
 820   """
 821   def fn(old, value):
 822     new = _GetUpdatedParams(old, value)
 823     utils.ForceDictType(new, type_check)
 824     return new
 825
 826   ret = copy.deepcopy(base)
 827   ret.update(dict((key, fn(base.get(key, {}), value))
 828                   for key, value in updates.items()))
 829   return ret
 830
 831
 832 def _MergeAndVerifyHvState(op_input, obj_input):
 833   """Combines the hv state from an opcode with the one of the object
 834
 835   @param op_input: The input dict from the opcode
 836   @param obj_input: The input dict from the objects
 837   @return: The verified and updated dict
 838
 839   """
 840   if op_input:
 841     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 842     if invalid_hvs:
 843       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 844                                  " %s" % utils.CommaJoin(invalid_hvs),
 845                                  errors.ECODE_INVAL)
 846     if obj_input is None:
 847       obj_input = {}
 848     type_check = constants.HVSTS_PARAMETER_TYPES
 849     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 850
 851   return None
 852
 853
 854 def _MergeAndVerifyDiskState(op_input, obj_input):
 855   """Combines the disk state from an opcode with the one of the object
 856
 857   @param op_input: The input dict from the opcode
 858   @param obj_input: The input dict from the objects
 859   @return: The verified and updated dict
 860   """
 861   if op_input:
 862     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 863     if invalid_dst:
 864       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 865                                  utils.CommaJoin(invalid_dst),
 866                                  errors.ECODE_INVAL)
 867     type_check = constants.DSS_PARAMETER_TYPES
 868     if obj_input is None:
 869       obj_input = {}
 870     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 871                                               type_check))
 872                 for key, value in op_input.items())
 873
 874   return None
 875
 876
 877 def _ReleaseLocks(lu, level, names=None, keep=None):
 878   """Releases locks owned by an LU.
 879
 880   @type lu: L{LogicalUnit}
 881   @param level: Lock level
 882   @type names: list or None
 883   @param names: Names of locks to release
 884   @type keep: list or None
 885   @param keep: Names of locks to retain
 886
 887   """
 888   assert not (keep is not None and names is not None), \
 889          "Only one of the 'names' and the 'keep' parameters can be given"
 890
 891   if names is not None:
 892     should_release = names.__contains__
 893   elif keep:
 894     should_release = lambda name: name not in keep
 895   else:
 896     should_release = None
 897
 898   owned = lu.owned_locks(level)
 899   if not owned:
 900     # Not owning any lock at this level, do nothing
 901     pass
 902
 903   elif should_release:
 904     retain = []
 905     release = []
 906
 907     # Determine which locks to release
 908     for name in owned:
 909       if should_release(name):
 910         release.append(name)
 911       else:
 912         retain.append(name)
 913
 914     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 915
 916     # Release just some locks
 917     lu.glm.release(level, names=release)
 918
 919     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 920   else:
 921     # Release everything
 922     lu.glm.release(level)
 923
 924     assert not lu.glm.is_owned(level), "No locks should be owned"
 925
 926
 927 def _MapInstanceDisksToNodes(instances):
 928   """Creates a map from (node, volume) to instance name.
 929
 930   @type instances: list of L{objects.Instance}
 931   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 932
 933   """
 934   return dict(((node, vol), inst.name)
 935               for inst in instances
 936               for (node, vols) in inst.MapLVsByNode().items()
 937               for vol in vols)
 938
 939
 940 def _RunPostHook(lu, node_name):
 941   """Runs the post-hook for an opcode on a single node.
 942
 943   """
 944   hm = lu.proc.BuildHooksManager(lu)
 945   try:
 946     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 947   except Exception, err: # pylint: disable=W0703
 948     lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
 949
 950
 951 def _CheckOutputFields(static, dynamic, selected):
 952   """Checks whether all selected fields are valid.
 953
 954   @type static: L{utils.FieldSet}
 955   @param static: static fields set
 956   @type dynamic: L{utils.FieldSet}
 957   @param dynamic: dynamic fields set
 958
 959   """
 960   f = utils.FieldSet()
 961   f.Extend(static)
 962   f.Extend(dynamic)
 963
 964   delta = f.NonMatching(selected)
 965   if delta:
 966     raise errors.OpPrereqError("Unknown output fields selected: %s"
 967                                % ",".join(delta), errors.ECODE_INVAL)
 968
 969
 970 def _CheckGlobalHvParams(params):
 971   """Validates that given hypervisor params are not global ones.
 972
 973   This will ensure that instances don't get customised versions of
 974   global params.
 975
 976   """
 977   used_globals = constants.HVC_GLOBALS.intersection(params)
 978   if used_globals:
 979     msg = ("The following hypervisor parameters are global and cannot"
 980            " be customized at instance level, please modify them at"
 981            " cluster level: %s" % utils.CommaJoin(used_globals))
 982     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 983
 984
 985 def _CheckNodeOnline(lu, node, msg=None):
 986   """Ensure that a given node is online.
 987
 988   @param lu: the LU on behalf of which we make the check
 989   @param node: the node to check
 990   @param msg: if passed, should be a message to replace the default one
 991   @raise errors.OpPrereqError: if the node is offline
 992
 993   """
 994   if msg is None:
 995     msg = "Can't use offline node"
 996   if lu.cfg.GetNodeInfo(node).offline:
 997     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 998
 999
1000 def _CheckNodeNotDrained(lu, node):
1001   """Ensure that a given node is not drained.
1002
1003   @param lu: the LU on behalf of which we make the check
1004   @param node: the node to check
1005   @raise errors.OpPrereqError: if the node is drained
1006
1007   """
1008   if lu.cfg.GetNodeInfo(node).drained:
1009     raise errors.OpPrereqError("Can't use drained node %s" % node,
1010                                errors.ECODE_STATE)
1011
1012
1013 def _CheckNodeVmCapable(lu, node):
1014   """Ensure that a given node is vm capable.
1015
1016   @param lu: the LU on behalf of which we make the check
1017   @param node: the node to check
1018   @raise errors.OpPrereqError: if the node is not vm capable
1019
1020   """
1021   if not lu.cfg.GetNodeInfo(node).vm_capable:
1022     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1023                                errors.ECODE_STATE)
1024
1025
1026 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1027   """Ensure that a node supports a given OS.
1028
1029   @param lu: the LU on behalf of which we make the check
1030   @param node: the node to check
1031   @param os_name: the OS to query about
1032   @param force_variant: whether to ignore variant errors
1033   @raise errors.OpPrereqError: if the node is not supporting the OS
1034
1035   """
1036   result = lu.rpc.call_os_get(node, os_name)
1037   result.Raise("OS '%s' not in supported OS list for node %s" %
1038                (os_name, node),
1039                prereq=True, ecode=errors.ECODE_INVAL)
1040   if not force_variant:
1041     _CheckOSVariant(result.payload, os_name)
1042
1043
1044 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1045   """Ensure that a node has the given secondary ip.
1046
1047   @type lu: L{LogicalUnit}
1048   @param lu: the LU on behalf of which we make the check
1049   @type node: string
1050   @param node: the node to check
1051   @type secondary_ip: string
1052   @param secondary_ip: the ip to check
1053   @type prereq: boolean
1054   @param prereq: whether to throw a prerequisite or an execute error
1055   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1056   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1057
1058   """
1059   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1060   result.Raise("Failure checking secondary ip on node %s" % node,
1061                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1062   if not result.payload:
1063     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1064            " please fix and re-run this command" % secondary_ip)
1065     if prereq:
1066       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1067     else:
1068       raise errors.OpExecError(msg)
1069
1070
1071 def _GetClusterDomainSecret():
1072   """Reads the cluster domain secret.
1073
1074   """
1075   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1076                                strict=True)
1077
1078
1079 def _CheckInstanceState(lu, instance, req_states, msg=None):
1080   """Ensure that an instance is in one of the required states.
1081
1082   @param lu: the LU on behalf of which we make the check
1083   @param instance: the instance to check
1084   @param msg: if passed, should be a message to replace the default one
1085   @raise errors.OpPrereqError: if the instance is not in the required state
1086
1087   """
1088   if msg is None:
1089     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1090   if instance.admin_state not in req_states:
1091     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1092                                (instance.name, instance.admin_state, msg),
1093                                errors.ECODE_STATE)
1094
1095   if constants.ADMINST_UP not in req_states:
1096     pnode = instance.primary_node
1097     if not lu.cfg.GetNodeInfo(pnode).offline:
1098       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1099       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1100                   prereq=True, ecode=errors.ECODE_ENVIRON)
1101       if instance.name in ins_l.payload:
1102         raise errors.OpPrereqError("Instance %s is running, %s" %
1103                                    (instance.name, msg), errors.ECODE_STATE)
1104     else:
1105       lu.LogWarning("Primary node offline, ignoring check that instance"
1106                      " is down")
1107
1108
1109 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1110   """Computes if value is in the desired range.
1111
1112   @param name: name of the parameter for which we perform the check
1113   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1114       not just 'disk')
1115   @param ipolicy: dictionary containing min, max and std values
1116   @param value: actual value that we want to use
1117   @return: None or element not meeting the criteria
1118
1119
1120   """
1121   if value in [None, constants.VALUE_AUTO]:
1122     return None
1123   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1124   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1125   if value > max_v or min_v > value:
1126     if qualifier:
1127       fqn = "%s/%s" % (name, qualifier)
1128     else:
1129       fqn = name
1130     return ("%s value %s is not in range [%s, %s]" %
1131             (fqn, value, min_v, max_v))
1132   return None
1133
1134
1135 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1136                                  nic_count, disk_sizes, spindle_use,
1137                                  _compute_fn=_ComputeMinMaxSpec):
1138   """Verifies ipolicy against provided specs.
1139
1140   @type ipolicy: dict
1141   @param ipolicy: The ipolicy
1142   @type mem_size: int
1143   @param mem_size: The memory size
1144   @type cpu_count: int
1145   @param cpu_count: Used cpu cores
1146   @type disk_count: int
1147   @param disk_count: Number of disks used
1148   @type nic_count: int
1149   @param nic_count: Number of nics used
1150   @type disk_sizes: list of ints
1151   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1152   @type spindle_use: int
1153   @param spindle_use: The number of spindles this instance uses
1154   @param _compute_fn: The compute function (unittest only)
1155   @return: A list of violations, or an empty list of no violations are found
1156
1157   """
1158   assert disk_count == len(disk_sizes)
1159
1160   test_settings = [
1161     (constants.ISPEC_MEM_SIZE, "", mem_size),
1162     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1163     (constants.ISPEC_DISK_COUNT, "", disk_count),
1164     (constants.ISPEC_NIC_COUNT, "", nic_count),
1165     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1166     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1167          for idx, d in enumerate(disk_sizes)]
1168
1169   return filter(None,
1170                 (_compute_fn(name, qualifier, ipolicy, value)
1171                  for (name, qualifier, value) in test_settings))
1172
1173
1174 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1175                                      _compute_fn=_ComputeIPolicySpecViolation):
1176   """Compute if instance meets the specs of ipolicy.
1177
1178   @type ipolicy: dict
1179   @param ipolicy: The ipolicy to verify against
1180   @type instance: L{objects.Instance}
1181   @param instance: The instance to verify
1182   @param _compute_fn: The function to verify ipolicy (unittest only)
1183   @see: L{_ComputeIPolicySpecViolation}
1184
1185   """
1186   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1187   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1188   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1189   disk_count = len(instance.disks)
1190   disk_sizes = [disk.size for disk in instance.disks]
1191   nic_count = len(instance.nics)
1192
1193   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1194                      disk_sizes, spindle_use)
1195
1196
1197 def _ComputeIPolicyInstanceSpecViolation(
1198   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1199   """Compute if instance specs meets the specs of ipolicy.
1200
1201   @type ipolicy: dict
1202   @param ipolicy: The ipolicy to verify against
1203   @param instance_spec: dict
1204   @param instance_spec: The instance spec to verify
1205   @param _compute_fn: The function to verify ipolicy (unittest only)
1206   @see: L{_ComputeIPolicySpecViolation}
1207
1208   """
1209   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1210   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1211   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1212   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1213   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1214   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1215
1216   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1217                      disk_sizes, spindle_use)
1218
1219
1220 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1221                                  target_group,
1222                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1223   """Compute if instance meets the specs of the new target group.
1224
1225   @param ipolicy: The ipolicy to verify
1226   @param instance: The instance object to verify
1227   @param current_group: The current group of the instance
1228   @param target_group: The new group of the instance
1229   @param _compute_fn: The function to verify ipolicy (unittest only)
1230   @see: L{_ComputeIPolicySpecViolation}
1231
1232   """
1233   if current_group == target_group:
1234     return []
1235   else:
1236     return _compute_fn(ipolicy, instance)
1237
1238
1239 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1240                             _compute_fn=_ComputeIPolicyNodeViolation):
1241   """Checks that the target node is correct in terms of instance policy.
1242
1243   @param ipolicy: The ipolicy to verify
1244   @param instance: The instance object to verify
1245   @param node: The new node to relocate
1246   @param ignore: Ignore violations of the ipolicy
1247   @param _compute_fn: The function to verify ipolicy (unittest only)
1248   @see: L{_ComputeIPolicySpecViolation}
1249
1250   """
1251   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1252   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1253
1254   if res:
1255     msg = ("Instance does not meet target node group's (%s) instance"
1256            " policy: %s") % (node.group, utils.CommaJoin(res))
1257     if ignore:
1258       lu.LogWarning(msg)
1259     else:
1260       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1261
1262
1263 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1264   """Computes a set of any instances that would violate the new ipolicy.
1265
1266   @param old_ipolicy: The current (still in-place) ipolicy
1267   @param new_ipolicy: The new (to become) ipolicy
1268   @param instances: List of instances to verify
1269   @return: A list of instances which violates the new ipolicy but
1270       did not before
1271
1272   """
1273   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1274           _ComputeViolatingInstances(old_ipolicy, instances))
1275
1276
1277 def _ExpandItemName(fn, name, kind):
1278   """Expand an item name.
1279
1280   @param fn: the function to use for expansion
1281   @param name: requested item name
1282   @param kind: text description ('Node' or 'Instance')
1283   @return: the resolved (full) name
1284   @raise errors.OpPrereqError: if the item is not found
1285
1286   """
1287   full_name = fn(name)
1288   if full_name is None:
1289     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1290                                errors.ECODE_NOENT)
1291   return full_name
1292
1293
1294 def _ExpandNodeName(cfg, name):
1295   """Wrapper over L{_ExpandItemName} for nodes."""
1296   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1297
1298
1299 def _ExpandInstanceName(cfg, name):
1300   """Wrapper over L{_ExpandItemName} for instance."""
1301   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1302
1303
1304 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1305                           minmem, maxmem, vcpus, nics, disk_template, disks,
1306                           bep, hvp, hypervisor_name, tags):
1307   """Builds instance related env variables for hooks
1308
1309   This builds the hook environment from individual variables.
1310
1311   @type name: string
1312   @param name: the name of the instance
1313   @type primary_node: string
1314   @param primary_node: the name of the instance's primary node
1315   @type secondary_nodes: list
1316   @param secondary_nodes: list of secondary nodes as strings
1317   @type os_type: string
1318   @param os_type: the name of the instance's OS
1319   @type status: string
1320   @param status: the desired status of the instance
1321   @type minmem: string
1322   @param minmem: the minimum memory size of the instance
1323   @type maxmem: string
1324   @param maxmem: the maximum memory size of the instance
1325   @type vcpus: string
1326   @param vcpus: the count of VCPUs the instance has
1327   @type nics: list
1328   @param nics: list of tuples (ip, mac, mode, link) representing
1329       the NICs the instance has
1330   @type disk_template: string
1331   @param disk_template: the disk template of the instance
1332   @type disks: list
1333   @param disks: the list of (size, mode) pairs
1334   @type bep: dict
1335   @param bep: the backend parameters for the instance
1336   @type hvp: dict
1337   @param hvp: the hypervisor parameters for the instance
1338   @type hypervisor_name: string
1339   @param hypervisor_name: the hypervisor for the instance
1340   @type tags: list
1341   @param tags: list of instance tags as strings
1342   @rtype: dict
1343   @return: the hook environment for this instance
1344
1345   """
1346   env = {
1347     "OP_TARGET": name,
1348     "INSTANCE_NAME": name,
1349     "INSTANCE_PRIMARY": primary_node,
1350     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1351     "INSTANCE_OS_TYPE": os_type,
1352     "INSTANCE_STATUS": status,
1353     "INSTANCE_MINMEM": minmem,
1354     "INSTANCE_MAXMEM": maxmem,
1355     # TODO(2.7) remove deprecated "memory" value
1356     "INSTANCE_MEMORY": maxmem,
1357     "INSTANCE_VCPUS": vcpus,
1358     "INSTANCE_DISK_TEMPLATE": disk_template,
1359     "INSTANCE_HYPERVISOR": hypervisor_name,
1360   }
1361   if nics:
1362     nic_count = len(nics)
1363     for idx, (ip, mac, mode, link) in enumerate(nics):
1364       if ip is None:
1365         ip = ""
1366       env["INSTANCE_NIC%d_IP" % idx] = ip
1367       env["INSTANCE_NIC%d_MAC" % idx] = mac
1368       env["INSTANCE_NIC%d_MODE" % idx] = mode
1369       env["INSTANCE_NIC%d_LINK" % idx] = link
1370       if mode == constants.NIC_MODE_BRIDGED:
1371         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1372   else:
1373     nic_count = 0
1374
1375   env["INSTANCE_NIC_COUNT"] = nic_count
1376
1377   if disks:
1378     disk_count = len(disks)
1379     for idx, (size, mode) in enumerate(disks):
1380       env["INSTANCE_DISK%d_SIZE" % idx] = size
1381       env["INSTANCE_DISK%d_MODE" % idx] = mode
1382   else:
1383     disk_count = 0
1384
1385   env["INSTANCE_DISK_COUNT"] = disk_count
1386
1387   if not tags:
1388     tags = []
1389
1390   env["INSTANCE_TAGS"] = " ".join(tags)
1391
1392   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1393     for key, value in source.items():
1394       env["INSTANCE_%s_%s" % (kind, key)] = value
1395
1396   return env
1397
1398
1399 def _NICListToTuple(lu, nics):
1400   """Build a list of nic information tuples.
1401
1402   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1403   value in LUInstanceQueryData.
1404
1405   @type lu:  L{LogicalUnit}
1406   @param lu: the logical unit on whose behalf we execute
1407   @type nics: list of L{objects.NIC}
1408   @param nics: list of nics to convert to hooks tuples
1409
1410   """
1411   hooks_nics = []
1412   cluster = lu.cfg.GetClusterInfo()
1413   for nic in nics:
1414     ip = nic.ip
1415     mac = nic.mac
1416     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1417     mode = filled_params[constants.NIC_MODE]
1418     link = filled_params[constants.NIC_LINK]
1419     hooks_nics.append((ip, mac, mode, link))
1420   return hooks_nics
1421
1422
1423 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1424   """Builds instance related env variables for hooks from an object.
1425
1426   @type lu: L{LogicalUnit}
1427   @param lu: the logical unit on whose behalf we execute
1428   @type instance: L{objects.Instance}
1429   @param instance: the instance for which we should build the
1430       environment
1431   @type override: dict
1432   @param override: dictionary with key/values that will override
1433       our values
1434   @rtype: dict
1435   @return: the hook environment dictionary
1436
1437   """
1438   cluster = lu.cfg.GetClusterInfo()
1439   bep = cluster.FillBE(instance)
1440   hvp = cluster.FillHV(instance)
1441   args = {
1442     "name": instance.name,
1443     "primary_node": instance.primary_node,
1444     "secondary_nodes": instance.secondary_nodes,
1445     "os_type": instance.os,
1446     "status": instance.admin_state,
1447     "maxmem": bep[constants.BE_MAXMEM],
1448     "minmem": bep[constants.BE_MINMEM],
1449     "vcpus": bep[constants.BE_VCPUS],
1450     "nics": _NICListToTuple(lu, instance.nics),
1451     "disk_template": instance.disk_template,
1452     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1453     "bep": bep,
1454     "hvp": hvp,
1455     "hypervisor_name": instance.hypervisor,
1456     "tags": instance.tags,
1457   }
1458   if override:
1459     args.update(override)
1460   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1461
1462
1463 def _AdjustCandidatePool(lu, exceptions):
1464   """Adjust the candidate pool after node operations.
1465
1466   """
1467   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1468   if mod_list:
1469     lu.LogInfo("Promoted nodes to master candidate role: %s",
1470                utils.CommaJoin(node.name for node in mod_list))
1471     for name in mod_list:
1472       lu.context.ReaddNode(name)
1473   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1474   if mc_now > mc_max:
1475     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1476                (mc_now, mc_max))
1477
1478
1479 def _DecideSelfPromotion(lu, exceptions=None):
1480   """Decide whether I should promote myself as a master candidate.
1481
1482   """
1483   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1484   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1485   # the new node will increase mc_max with one, so:
1486   mc_should = min(mc_should + 1, cp_size)
1487   return mc_now < mc_should
1488
1489
1490 def _ComputeViolatingInstances(ipolicy, instances):
1491   """Computes a set of instances who violates given ipolicy.
1492
1493   @param ipolicy: The ipolicy to verify
1494   @type instances: object.Instance
1495   @param instances: List of instances to verify
1496   @return: A frozenset of instance names violating the ipolicy
1497
1498   """
1499   return frozenset([inst.name for inst in instances
1500                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1501
1502
1503 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1504   """Check that the brigdes needed by a list of nics exist.
1505
1506   """
1507   cluster = lu.cfg.GetClusterInfo()
1508   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1509   brlist = [params[constants.NIC_LINK] for params in paramslist
1510             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1511   if brlist:
1512     result = lu.rpc.call_bridges_exist(target_node, brlist)
1513     result.Raise("Error checking bridges on destination node '%s'" %
1514                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1515
1516
1517 def _CheckInstanceBridgesExist(lu, instance, node=None):
1518   """Check that the brigdes needed by an instance exist.
1519
1520   """
1521   if node is None:
1522     node = instance.primary_node
1523   _CheckNicsBridgesExist(lu, instance.nics, node)
1524
1525
1526 def _CheckOSVariant(os_obj, name):
1527   """Check whether an OS name conforms to the os variants specification.
1528
1529   @type os_obj: L{objects.OS}
1530   @param os_obj: OS object to check
1531   @type name: string
1532   @param name: OS name passed by the user, to check for validity
1533
1534   """
1535   variant = objects.OS.GetVariant(name)
1536   if not os_obj.supported_variants:
1537     if variant:
1538       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1539                                  " passed)" % (os_obj.name, variant),
1540                                  errors.ECODE_INVAL)
1541     return
1542   if not variant:
1543     raise errors.OpPrereqError("OS name must include a variant",
1544                                errors.ECODE_INVAL)
1545
1546   if variant not in os_obj.supported_variants:
1547     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1548
1549
1550 def _GetNodeInstancesInner(cfg, fn):
1551   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1552
1553
1554 def _GetNodeInstances(cfg, node_name):
1555   """Returns a list of all primary and secondary instances on a node.
1556
1557   """
1558
1559   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1560
1561
1562 def _GetNodePrimaryInstances(cfg, node_name):
1563   """Returns primary instances on a node.
1564
1565   """
1566   return _GetNodeInstancesInner(cfg,
1567                                 lambda inst: node_name == inst.primary_node)
1568
1569
1570 def _GetNodeSecondaryInstances(cfg, node_name):
1571   """Returns secondary instances on a node.
1572
1573   """
1574   return _GetNodeInstancesInner(cfg,
1575                                 lambda inst: node_name in inst.secondary_nodes)
1576
1577
1578 def _GetStorageTypeArgs(cfg, storage_type):
1579   """Returns the arguments for a storage type.
1580
1581   """
1582   # Special case for file storage
1583   if storage_type == constants.ST_FILE:
1584     # storage.FileStorage wants a list of storage directories
1585     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1586
1587   return []
1588
1589
1590 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1591   faulty = []
1592
1593   for dev in instance.disks:
1594     cfg.SetDiskID(dev, node_name)
1595
1596   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1597                                                                 instance))
1598   result.Raise("Failed to get disk status from node %s" % node_name,
1599                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1600
1601   for idx, bdev_status in enumerate(result.payload):
1602     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1603       faulty.append(idx)
1604
1605   return faulty
1606
1607
1608 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1609   """Check the sanity of iallocator and node arguments and use the
1610   cluster-wide iallocator if appropriate.
1611
1612   Check that at most one of (iallocator, node) is specified. If none is
1613   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1614   then the LU's opcode's iallocator slot is filled with the cluster-wide
1615   default iallocator.
1616
1617   @type iallocator_slot: string
1618   @param iallocator_slot: the name of the opcode iallocator slot
1619   @type node_slot: string
1620   @param node_slot: the name of the opcode target node slot
1621
1622   """
1623   node = getattr(lu.op, node_slot, None)
1624   ialloc = getattr(lu.op, iallocator_slot, None)
1625   if node == []:
1626     node = None
1627
1628   if node is not None and ialloc is not None:
1629     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1630                                errors.ECODE_INVAL)
1631   elif ((node is None and ialloc is None) or
1632         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1633     default_iallocator = lu.cfg.GetDefaultIAllocator()
1634     if default_iallocator:
1635       setattr(lu.op, iallocator_slot, default_iallocator)
1636     else:
1637       raise errors.OpPrereqError("No iallocator or node given and no"
1638                                  " cluster-wide default iallocator found;"
1639                                  " please specify either an iallocator or a"
1640                                  " node, or set a cluster-wide default"
1641                                  " iallocator", errors.ECODE_INVAL)
1642
1643
1644 def _GetDefaultIAllocator(cfg, ialloc):
1645   """Decides on which iallocator to use.
1646
1647   @type cfg: L{config.ConfigWriter}
1648   @param cfg: Cluster configuration object
1649   @type ialloc: string or None
1650   @param ialloc: Iallocator specified in opcode
1651   @rtype: string
1652   @return: Iallocator name
1653
1654   """
1655   if not ialloc:
1656     # Use default iallocator
1657     ialloc = cfg.GetDefaultIAllocator()
1658
1659   if not ialloc:
1660     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1661                                " opcode nor as a cluster-wide default",
1662                                errors.ECODE_INVAL)
1663
1664   return ialloc
1665
1666
1667 class LUClusterPostInit(LogicalUnit):
1668   """Logical unit for running hooks after cluster initialization.
1669
1670   """
1671   HPATH = "cluster-init"
1672   HTYPE = constants.HTYPE_CLUSTER
1673
1674   def BuildHooksEnv(self):
1675     """Build hooks env.
1676
1677     """
1678     return {
1679       "OP_TARGET": self.cfg.GetClusterName(),
1680       }
1681
1682   def BuildHooksNodes(self):
1683     """Build hooks nodes.
1684
1685     """
1686     return ([], [self.cfg.GetMasterNode()])
1687
1688   def Exec(self, feedback_fn):
1689     """Nothing to do.
1690
1691     """
1692     return True
1693
1694
1695 class LUClusterDestroy(LogicalUnit):
1696   """Logical unit for destroying the cluster.
1697
1698   """
1699   HPATH = "cluster-destroy"
1700   HTYPE = constants.HTYPE_CLUSTER
1701
1702   def BuildHooksEnv(self):
1703     """Build hooks env.
1704
1705     """
1706     return {
1707       "OP_TARGET": self.cfg.GetClusterName(),
1708       }
1709
1710   def BuildHooksNodes(self):
1711     """Build hooks nodes.
1712
1713     """
1714     return ([], [])
1715
1716   def CheckPrereq(self):
1717     """Check prerequisites.
1718
1719     This checks whether the cluster is empty.
1720
1721     Any errors are signaled by raising errors.OpPrereqError.
1722
1723     """
1724     master = self.cfg.GetMasterNode()
1725
1726     nodelist = self.cfg.GetNodeList()
1727     if len(nodelist) != 1 or nodelist[0] != master:
1728       raise errors.OpPrereqError("There are still %d node(s) in"
1729                                  " this cluster." % (len(nodelist) - 1),
1730                                  errors.ECODE_INVAL)
1731     instancelist = self.cfg.GetInstanceList()
1732     if instancelist:
1733       raise errors.OpPrereqError("There are still %d instance(s) in"
1734                                  " this cluster." % len(instancelist),
1735                                  errors.ECODE_INVAL)
1736
1737   def Exec(self, feedback_fn):
1738     """Destroys the cluster.
1739
1740     """
1741     master_params = self.cfg.GetMasterNetworkParameters()
1742
1743     # Run post hooks on master node before it's removed
1744     _RunPostHook(self, master_params.name)
1745
1746     ems = self.cfg.GetUseExternalMipScript()
1747     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1748                                                      master_params, ems)
1749     if result.fail_msg:
1750       self.LogWarning("Error disabling the master IP address: %s",
1751                       result.fail_msg)
1752
1753     return master_params.name
1754
1755
1756 def _VerifyCertificate(filename):
1757   """Verifies a certificate for L{LUClusterVerifyConfig}.
1758
1759   @type filename: string
1760   @param filename: Path to PEM file
1761
1762   """
1763   try:
1764     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1765                                            utils.ReadFile(filename))
1766   except Exception, err: # pylint: disable=W0703
1767     return (LUClusterVerifyConfig.ETYPE_ERROR,
1768             "Failed to load X509 certificate %s: %s" % (filename, err))
1769
1770   (errcode, msg) = \
1771     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1772                                 constants.SSL_CERT_EXPIRATION_ERROR)
1773
1774   if msg:
1775     fnamemsg = "While verifying %s: %s" % (filename, msg)
1776   else:
1777     fnamemsg = None
1778
1779   if errcode is None:
1780     return (None, fnamemsg)
1781   elif errcode == utils.CERT_WARNING:
1782     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1783   elif errcode == utils.CERT_ERROR:
1784     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1785
1786   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1787
1788
1789 def _GetAllHypervisorParameters(cluster, instances):
1790   """Compute the set of all hypervisor parameters.
1791
1792   @type cluster: L{objects.Cluster}
1793   @param cluster: the cluster object
1794   @param instances: list of L{objects.Instance}
1795   @param instances: additional instances from which to obtain parameters
1796   @rtype: list of (origin, hypervisor, parameters)
1797   @return: a list with all parameters found, indicating the hypervisor they
1798        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1799
1800   """
1801   hvp_data = []
1802
1803   for hv_name in cluster.enabled_hypervisors:
1804     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1805
1806   for os_name, os_hvp in cluster.os_hvp.items():
1807     for hv_name, hv_params in os_hvp.items():
1808       if hv_params:
1809         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1810         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1811
1812   # TODO: collapse identical parameter values in a single one
1813   for instance in instances:
1814     if instance.hvparams:
1815       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1816                        cluster.FillHV(instance)))
1817
1818   return hvp_data
1819
1820
1821 class _VerifyErrors(object):
1822   """Mix-in for cluster/group verify LUs.
1823
1824   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1825   self.op and self._feedback_fn to be available.)
1826
1827   """
1828
1829   ETYPE_FIELD = "code"
1830   ETYPE_ERROR = "ERROR"
1831   ETYPE_WARNING = "WARNING"
1832
1833   def _Error(self, ecode, item, msg, *args, **kwargs):
1834     """Format an error message.
1835
1836     Based on the opcode's error_codes parameter, either format a
1837     parseable error code, or a simpler error string.
1838
1839     This must be called only from Exec and functions called from Exec.
1840
1841     """
1842     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1843     itype, etxt, _ = ecode
1844     # first complete the msg
1845     if args:
1846       msg = msg % args
1847     # then format the whole message
1848     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1849       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1850     else:
1851       if item:
1852         item = " " + item
1853       else:
1854         item = ""
1855       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1856     # and finally report it via the feedback_fn
1857     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1858
1859   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1860     """Log an error message if the passed condition is True.
1861
1862     """
1863     cond = (bool(cond)
1864             or self.op.debug_simulate_errors) # pylint: disable=E1101
1865
1866     # If the error code is in the list of ignored errors, demote the error to a
1867     # warning
1868     (_, etxt, _) = ecode
1869     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1870       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1871
1872     if cond:
1873       self._Error(ecode, *args, **kwargs)
1874
1875     # do not mark the operation as failed for WARN cases only
1876     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1877       self.bad = self.bad or cond
1878
1879
1880 class LUClusterVerify(NoHooksLU):
1881   """Submits all jobs necessary to verify the cluster.
1882
1883   """
1884   REQ_BGL = False
1885
1886   def ExpandNames(self):
1887     self.needed_locks = {}
1888
1889   def Exec(self, feedback_fn):
1890     jobs = []
1891
1892     if self.op.group_name:
1893       groups = [self.op.group_name]
1894       depends_fn = lambda: None
1895     else:
1896       groups = self.cfg.GetNodeGroupList()
1897
1898       # Verify global configuration
1899       jobs.append([
1900         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1901         ])
1902
1903       # Always depend on global verification
1904       depends_fn = lambda: [(-len(jobs), [])]
1905
1906     jobs.extend(
1907       [opcodes.OpClusterVerifyGroup(group_name=group,
1908                                     ignore_errors=self.op.ignore_errors,
1909                                     depends=depends_fn())]
1910       for group in groups)
1911
1912     # Fix up all parameters
1913     for op in itertools.chain(*jobs): # pylint: disable=W0142
1914       op.debug_simulate_errors = self.op.debug_simulate_errors
1915       op.verbose = self.op.verbose
1916       op.error_codes = self.op.error_codes
1917       try:
1918         op.skip_checks = self.op.skip_checks
1919       except AttributeError:
1920         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1921
1922     return ResultWithJobs(jobs)
1923
1924
1925 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1926   """Verifies the cluster config.
1927
1928   """
1929   REQ_BGL = False
1930
1931   def _VerifyHVP(self, hvp_data):
1932     """Verifies locally the syntax of the hypervisor parameters.
1933
1934     """
1935     for item, hv_name, hv_params in hvp_data:
1936       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1937              (item, hv_name))
1938       try:
1939         hv_class = hypervisor.GetHypervisor(hv_name)
1940         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1941         hv_class.CheckParameterSyntax(hv_params)
1942       except errors.GenericError, err:
1943         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1944
1945   def ExpandNames(self):
1946     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1947     self.share_locks = _ShareAll()
1948
1949   def CheckPrereq(self):
1950     """Check prerequisites.
1951
1952     """
1953     # Retrieve all information
1954     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1955     self.all_node_info = self.cfg.GetAllNodesInfo()
1956     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1957
1958   def Exec(self, feedback_fn):
1959     """Verify integrity of cluster, performing various test on nodes.
1960
1961     """
1962     self.bad = False
1963     self._feedback_fn = feedback_fn
1964
1965     feedback_fn("* Verifying cluster config")
1966
1967     for msg in self.cfg.VerifyConfig():
1968       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1969
1970     feedback_fn("* Verifying cluster certificate files")
1971
1972     for cert_filename in pathutils.ALL_CERT_FILES:
1973       (errcode, msg) = _VerifyCertificate(cert_filename)
1974       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1975
1976     feedback_fn("* Verifying hypervisor parameters")
1977
1978     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1979                                                 self.all_inst_info.values()))
1980
1981     feedback_fn("* Verifying all nodes belong to an existing group")
1982
1983     # We do this verification here because, should this bogus circumstance
1984     # occur, it would never be caught by VerifyGroup, which only acts on
1985     # nodes/instances reachable from existing node groups.
1986
1987     dangling_nodes = set(node.name for node in self.all_node_info.values()
1988                          if node.group not in self.all_group_info)
1989
1990     dangling_instances = {}
1991     no_node_instances = []
1992
1993     for inst in self.all_inst_info.values():
1994       if inst.primary_node in dangling_nodes:
1995         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1996       elif inst.primary_node not in self.all_node_info:
1997         no_node_instances.append(inst.name)
1998
1999     pretty_dangling = [
2000         "%s (%s)" %
2001         (node.name,
2002          utils.CommaJoin(dangling_instances.get(node.name,
2003                                                 ["no instances"])))
2004         for node in dangling_nodes]
2005
2006     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2007                   None,
2008                   "the following nodes (and their instances) belong to a non"
2009                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2010
2011     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2012                   None,
2013                   "the following instances have a non-existing primary-node:"
2014                   " %s", utils.CommaJoin(no_node_instances))
2015
2016     return not self.bad
2017
2018
2019 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2020   """Verifies the status of a node group.
2021
2022   """
2023   HPATH = "cluster-verify"
2024   HTYPE = constants.HTYPE_CLUSTER
2025   REQ_BGL = False
2026
2027   _HOOKS_INDENT_RE = re.compile("^", re.M)
2028
2029   class NodeImage(object):
2030     """A class representing the logical and physical status of a node.
2031
2032     @type name: string
2033     @ivar name: the node name to which this object refers
2034     @ivar volumes: a structure as returned from
2035         L{ganeti.backend.GetVolumeList} (runtime)
2036     @ivar instances: a list of running instances (runtime)
2037     @ivar pinst: list of configured primary instances (config)
2038     @ivar sinst: list of configured secondary instances (config)
2039     @ivar sbp: dictionary of {primary-node: list of instances} for all
2040         instances for which this node is secondary (config)
2041     @ivar mfree: free memory, as reported by hypervisor (runtime)
2042     @ivar dfree: free disk, as reported by the node (runtime)
2043     @ivar offline: the offline status (config)
2044     @type rpc_fail: boolean
2045     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2046         not whether the individual keys were correct) (runtime)
2047     @type lvm_fail: boolean
2048     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2049     @type hyp_fail: boolean
2050     @ivar hyp_fail: whether the RPC call didn't return the instance list
2051     @type ghost: boolean
2052     @ivar ghost: whether this is a known node or not (config)
2053     @type os_fail: boolean
2054     @ivar os_fail: whether the RPC call didn't return valid OS data
2055     @type oslist: list
2056     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2057     @type vm_capable: boolean
2058     @ivar vm_capable: whether the node can host instances
2059
2060     """
2061     def __init__(self, offline=False, name=None, vm_capable=True):
2062       self.name = name
2063       self.volumes = {}
2064       self.instances = []
2065       self.pinst = []
2066       self.sinst = []
2067       self.sbp = {}
2068       self.mfree = 0
2069       self.dfree = 0
2070       self.offline = offline
2071       self.vm_capable = vm_capable
2072       self.rpc_fail = False
2073       self.lvm_fail = False
2074       self.hyp_fail = False
2075       self.ghost = False
2076       self.os_fail = False
2077       self.oslist = {}
2078
2079   def ExpandNames(self):
2080     # This raises errors.OpPrereqError on its own:
2081     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2082
2083     # Get instances in node group; this is unsafe and needs verification later
2084     inst_names = \
2085       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2086
2087     self.needed_locks = {
2088       locking.LEVEL_INSTANCE: inst_names,
2089       locking.LEVEL_NODEGROUP: [self.group_uuid],
2090       locking.LEVEL_NODE: [],
2091       }
2092
2093     self.share_locks = _ShareAll()
2094
2095   def DeclareLocks(self, level):
2096     if level == locking.LEVEL_NODE:
2097       # Get members of node group; this is unsafe and needs verification later
2098       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2099
2100       all_inst_info = self.cfg.GetAllInstancesInfo()
2101
2102       # In Exec(), we warn about mirrored instances that have primary and
2103       # secondary living in separate node groups. To fully verify that
2104       # volumes for these instances are healthy, we will need to do an
2105       # extra call to their secondaries. We ensure here those nodes will
2106       # be locked.
2107       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2108         # Important: access only the instances whose lock is owned
2109         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2110           nodes.update(all_inst_info[inst].secondary_nodes)
2111
2112       self.needed_locks[locking.LEVEL_NODE] = nodes
2113
2114   def CheckPrereq(self):
2115     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2116     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2117
2118     group_nodes = set(self.group_info.members)
2119     group_instances = \
2120       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2121
2122     unlocked_nodes = \
2123         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2124
2125     unlocked_instances = \
2126         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2127
2128     if unlocked_nodes:
2129       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2130                                  utils.CommaJoin(unlocked_nodes),
2131                                  errors.ECODE_STATE)
2132
2133     if unlocked_instances:
2134       raise errors.OpPrereqError("Missing lock for instances: %s" %
2135                                  utils.CommaJoin(unlocked_instances),
2136                                  errors.ECODE_STATE)
2137
2138     self.all_node_info = self.cfg.GetAllNodesInfo()
2139     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2140
2141     self.my_node_names = utils.NiceSort(group_nodes)
2142     self.my_inst_names = utils.NiceSort(group_instances)
2143
2144     self.my_node_info = dict((name, self.all_node_info[name])
2145                              for name in self.my_node_names)
2146
2147     self.my_inst_info = dict((name, self.all_inst_info[name])
2148                              for name in self.my_inst_names)
2149
2150     # We detect here the nodes that will need the extra RPC calls for verifying
2151     # split LV volumes; they should be locked.
2152     extra_lv_nodes = set()
2153
2154     for inst in self.my_inst_info.values():
2155       if inst.disk_template in constants.DTS_INT_MIRROR:
2156         for nname in inst.all_nodes:
2157           if self.all_node_info[nname].group != self.group_uuid:
2158             extra_lv_nodes.add(nname)
2159
2160     unlocked_lv_nodes = \
2161         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2162
2163     if unlocked_lv_nodes:
2164       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2165                                  utils.CommaJoin(unlocked_lv_nodes),
2166                                  errors.ECODE_STATE)
2167     self.extra_lv_nodes = list(extra_lv_nodes)
2168
2169   def _VerifyNode(self, ninfo, nresult):
2170     """Perform some basic validation on data returned from a node.
2171
2172       - check the result data structure is well formed and has all the
2173         mandatory fields
2174       - check ganeti version
2175
2176     @type ninfo: L{objects.Node}
2177     @param ninfo: the node to check
2178     @param nresult: the results from the node
2179     @rtype: boolean
2180     @return: whether overall this call was successful (and we can expect
2181          reasonable values in the respose)
2182
2183     """
2184     node = ninfo.name
2185     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2186
2187     # main result, nresult should be a non-empty dict
2188     test = not nresult or not isinstance(nresult, dict)
2189     _ErrorIf(test, constants.CV_ENODERPC, node,
2190                   "unable to verify node: no data returned")
2191     if test:
2192       return False
2193
2194     # compares ganeti version
2195     local_version = constants.PROTOCOL_VERSION
2196     remote_version = nresult.get("version", None)
2197     test = not (remote_version and
2198                 isinstance(remote_version, (list, tuple)) and
2199                 len(remote_version) == 2)
2200     _ErrorIf(test, constants.CV_ENODERPC, node,
2201              "connection to node returned invalid data")
2202     if test:
2203       return False
2204
2205     test = local_version != remote_version[0]
2206     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2207              "incompatible protocol versions: master %s,"
2208              " node %s", local_version, remote_version[0])
2209     if test:
2210       return False
2211
2212     # node seems compatible, we can actually try to look into its results
2213
2214     # full package version
2215     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2216                   constants.CV_ENODEVERSION, node,
2217                   "software version mismatch: master %s, node %s",
2218                   constants.RELEASE_VERSION, remote_version[1],
2219                   code=self.ETYPE_WARNING)
2220
2221     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2222     if ninfo.vm_capable and isinstance(hyp_result, dict):
2223       for hv_name, hv_result in hyp_result.iteritems():
2224         test = hv_result is not None
2225         _ErrorIf(test, constants.CV_ENODEHV, node,
2226                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2227
2228     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2229     if ninfo.vm_capable and isinstance(hvp_result, list):
2230       for item, hv_name, hv_result in hvp_result:
2231         _ErrorIf(True, constants.CV_ENODEHV, node,
2232                  "hypervisor %s parameter verify failure (source %s): %s",
2233                  hv_name, item, hv_result)
2234
2235     test = nresult.get(constants.NV_NODESETUP,
2236                        ["Missing NODESETUP results"])
2237     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2238              "; ".join(test))
2239
2240     return True
2241
2242   def _VerifyNodeTime(self, ninfo, nresult,
2243                       nvinfo_starttime, nvinfo_endtime):
2244     """Check the node time.
2245
2246     @type ninfo: L{objects.Node}
2247     @param ninfo: the node to check
2248     @param nresult: the remote results for the node
2249     @param nvinfo_starttime: the start time of the RPC call
2250     @param nvinfo_endtime: the end time of the RPC call
2251
2252     """
2253     node = ninfo.name
2254     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2255
2256     ntime = nresult.get(constants.NV_TIME, None)
2257     try:
2258       ntime_merged = utils.MergeTime(ntime)
2259     except (ValueError, TypeError):
2260       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2261       return
2262
2263     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2264       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2265     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2266       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2267     else:
2268       ntime_diff = None
2269
2270     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2271              "Node time diverges by at least %s from master node time",
2272              ntime_diff)
2273
2274   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2275     """Check the node LVM results.
2276
2277     @type ninfo: L{objects.Node}
2278     @param ninfo: the node to check
2279     @param nresult: the remote results for the node
2280     @param vg_name: the configured VG name
2281
2282     """
2283     if vg_name is None:
2284       return
2285
2286     node = ninfo.name
2287     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2288
2289     # checks vg existence and size > 20G
2290     vglist = nresult.get(constants.NV_VGLIST, None)
2291     test = not vglist
2292     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2293     if not test:
2294       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2295                                             constants.MIN_VG_SIZE)
2296       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2297
2298     # check pv names
2299     pvlist = nresult.get(constants.NV_PVLIST, None)
2300     test = pvlist is None
2301     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2302     if not test:
2303       # check that ':' is not present in PV names, since it's a
2304       # special character for lvcreate (denotes the range of PEs to
2305       # use on the PV)
2306       for _, pvname, owner_vg in pvlist:
2307         test = ":" in pvname
2308         _ErrorIf(test, constants.CV_ENODELVM, node,
2309                  "Invalid character ':' in PV '%s' of VG '%s'",
2310                  pvname, owner_vg)
2311
2312   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2313     """Check the node bridges.
2314
2315     @type ninfo: L{objects.Node}
2316     @param ninfo: the node to check
2317     @param nresult: the remote results for the node
2318     @param bridges: the expected list of bridges
2319
2320     """
2321     if not bridges:
2322       return
2323
2324     node = ninfo.name
2325     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2326
2327     missing = nresult.get(constants.NV_BRIDGES, None)
2328     test = not isinstance(missing, list)
2329     _ErrorIf(test, constants.CV_ENODENET, node,
2330              "did not return valid bridge information")
2331     if not test:
2332       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2333                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2334
2335   def _VerifyNodeUserScripts(self, ninfo, nresult):
2336     """Check the results of user scripts presence and executability on the node
2337
2338     @type ninfo: L{objects.Node}
2339     @param ninfo: the node to check
2340     @param nresult: the remote results for the node
2341
2342     """
2343     node = ninfo.name
2344
2345     test = not constants.NV_USERSCRIPTS in nresult
2346     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2347                   "did not return user scripts information")
2348
2349     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2350     if not test:
2351       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2352                     "user scripts not present or not executable: %s" %
2353                     utils.CommaJoin(sorted(broken_scripts)))
2354
2355   def _VerifyNodeNetwork(self, ninfo, nresult):
2356     """Check the node network connectivity results.
2357
2358     @type ninfo: L{objects.Node}
2359     @param ninfo: the node to check
2360     @param nresult: the remote results for the node
2361
2362     """
2363     node = ninfo.name
2364     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2365
2366     test = constants.NV_NODELIST not in nresult
2367     _ErrorIf(test, constants.CV_ENODESSH, node,
2368              "node hasn't returned node ssh connectivity data")
2369     if not test:
2370       if nresult[constants.NV_NODELIST]:
2371         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2372           _ErrorIf(True, constants.CV_ENODESSH, node,
2373                    "ssh communication with node '%s': %s", a_node, a_msg)
2374
2375     test = constants.NV_NODENETTEST not in nresult
2376     _ErrorIf(test, constants.CV_ENODENET, node,
2377              "node hasn't returned node tcp connectivity data")
2378     if not test:
2379       if nresult[constants.NV_NODENETTEST]:
2380         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2381         for anode in nlist:
2382           _ErrorIf(True, constants.CV_ENODENET, node,
2383                    "tcp communication with node '%s': %s",
2384                    anode, nresult[constants.NV_NODENETTEST][anode])
2385
2386     test = constants.NV_MASTERIP not in nresult
2387     _ErrorIf(test, constants.CV_ENODENET, node,
2388              "node hasn't returned node master IP reachability data")
2389     if not test:
2390       if not nresult[constants.NV_MASTERIP]:
2391         if node == self.master_node:
2392           msg = "the master node cannot reach the master IP (not configured?)"
2393         else:
2394           msg = "cannot reach the master IP"
2395         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2396
2397   def _VerifyInstance(self, instance, instanceconfig, node_image,
2398                       diskstatus):
2399     """Verify an instance.
2400
2401     This function checks to see if the required block devices are
2402     available on the instance's node.
2403
2404     """
2405     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2406     node_current = instanceconfig.primary_node
2407
2408     node_vol_should = {}
2409     instanceconfig.MapLVsByNode(node_vol_should)
2410
2411     cluster = self.cfg.GetClusterInfo()
2412     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2413                                                             self.group_info)
2414     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2415     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2416
2417     for node in node_vol_should:
2418       n_img = node_image[node]
2419       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2420         # ignore missing volumes on offline or broken nodes
2421         continue
2422       for volume in node_vol_should[node]:
2423         test = volume not in n_img.volumes
2424         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2425                  "volume %s missing on node %s", volume, node)
2426
2427     if instanceconfig.admin_state == constants.ADMINST_UP:
2428       pri_img = node_image[node_current]
2429       test = instance not in pri_img.instances and not pri_img.offline
2430       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2431                "instance not running on its primary node %s",
2432                node_current)
2433
2434     diskdata = [(nname, success, status, idx)
2435                 for (nname, disks) in diskstatus.items()
2436                 for idx, (success, status) in enumerate(disks)]
2437
2438     for nname, success, bdev_status, idx in diskdata:
2439       # the 'ghost node' construction in Exec() ensures that we have a
2440       # node here
2441       snode = node_image[nname]
2442       bad_snode = snode.ghost or snode.offline
2443       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2444                not success and not bad_snode,
2445                constants.CV_EINSTANCEFAULTYDISK, instance,
2446                "couldn't retrieve status for disk/%s on %s: %s",
2447                idx, nname, bdev_status)
2448       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2449                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2450                constants.CV_EINSTANCEFAULTYDISK, instance,
2451                "disk/%s on %s is faulty", idx, nname)
2452
2453   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2454     """Verify if there are any unknown volumes in the cluster.
2455
2456     The .os, .swap and backup volumes are ignored. All other volumes are
2457     reported as unknown.
2458
2459     @type reserved: L{ganeti.utils.FieldSet}
2460     @param reserved: a FieldSet of reserved volume names
2461
2462     """
2463     for node, n_img in node_image.items():
2464       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2465           self.all_node_info[node].group != self.group_uuid):
2466         # skip non-healthy nodes
2467         continue
2468       for volume in n_img.volumes:
2469         test = ((node not in node_vol_should or
2470                 volume not in node_vol_should[node]) and
2471                 not reserved.Matches(volume))
2472         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2473                       "volume %s is unknown", volume)
2474
2475   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2476     """Verify N+1 Memory Resilience.
2477
2478     Check that if one single node dies we can still start all the
2479     instances it was primary for.
2480
2481     """
2482     cluster_info = self.cfg.GetClusterInfo()
2483     for node, n_img in node_image.items():
2484       # This code checks that every node which is now listed as
2485       # secondary has enough memory to host all instances it is
2486       # supposed to should a single other node in the cluster fail.
2487       # FIXME: not ready for failover to an arbitrary node
2488       # FIXME: does not support file-backed instances
2489       # WARNING: we currently take into account down instances as well
2490       # as up ones, considering that even if they're down someone
2491       # might want to start them even in the event of a node failure.
2492       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2493         # we're skipping nodes marked offline and nodes in other groups from
2494         # the N+1 warning, since most likely we don't have good memory
2495         # infromation from them; we already list instances living on such
2496         # nodes, and that's enough warning
2497         continue
2498       #TODO(dynmem): also consider ballooning out other instances
2499       for prinode, instances in n_img.sbp.items():
2500         needed_mem = 0
2501         for instance in instances:
2502           bep = cluster_info.FillBE(instance_cfg[instance])
2503           if bep[constants.BE_AUTO_BALANCE]:
2504             needed_mem += bep[constants.BE_MINMEM]
2505         test = n_img.mfree < needed_mem
2506         self._ErrorIf(test, constants.CV_ENODEN1, node,
2507                       "not enough memory to accomodate instance failovers"
2508                       " should node %s fail (%dMiB needed, %dMiB available)",
2509                       prinode, needed_mem, n_img.mfree)
2510
2511   @classmethod
2512   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2513                    (files_all, files_opt, files_mc, files_vm)):
2514     """Verifies file checksums collected from all nodes.
2515
2516     @param errorif: Callback for reporting errors
2517     @param nodeinfo: List of L{objects.Node} objects
2518     @param master_node: Name of master node
2519     @param all_nvinfo: RPC results
2520
2521     """
2522     # Define functions determining which nodes to consider for a file
2523     files2nodefn = [
2524       (files_all, None),
2525       (files_mc, lambda node: (node.master_candidate or
2526                                node.name == master_node)),
2527       (files_vm, lambda node: node.vm_capable),
2528       ]
2529
2530     # Build mapping from filename to list of nodes which should have the file
2531     nodefiles = {}
2532     for (files, fn) in files2nodefn:
2533       if fn is None:
2534         filenodes = nodeinfo
2535       else:
2536         filenodes = filter(fn, nodeinfo)
2537       nodefiles.update((filename,
2538                         frozenset(map(operator.attrgetter("name"), filenodes)))
2539                        for filename in files)
2540
2541     assert set(nodefiles) == (files_all | files_mc | files_vm)
2542
2543     fileinfo = dict((filename, {}) for filename in nodefiles)
2544     ignore_nodes = set()
2545
2546     for node in nodeinfo:
2547       if node.offline:
2548         ignore_nodes.add(node.name)
2549         continue
2550
2551       nresult = all_nvinfo[node.name]
2552
2553       if nresult.fail_msg or not nresult.payload:
2554         node_files = None
2555       else:
2556         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2557
2558       test = not (node_files and isinstance(node_files, dict))
2559       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2560               "Node did not return file checksum data")
2561       if test:
2562         ignore_nodes.add(node.name)
2563         continue
2564
2565       # Build per-checksum mapping from filename to nodes having it
2566       for (filename, checksum) in node_files.items():
2567         assert filename in nodefiles
2568         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2569
2570     for (filename, checksums) in fileinfo.items():
2571       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2572
2573       # Nodes having the file
2574       with_file = frozenset(node_name
2575                             for nodes in fileinfo[filename].values()
2576                             for node_name in nodes) - ignore_nodes
2577
2578       expected_nodes = nodefiles[filename] - ignore_nodes
2579
2580       # Nodes missing file
2581       missing_file = expected_nodes - with_file
2582
2583       if filename in files_opt:
2584         # All or no nodes
2585         errorif(missing_file and missing_file != expected_nodes,
2586                 constants.CV_ECLUSTERFILECHECK, None,
2587                 "File %s is optional, but it must exist on all or no"
2588                 " nodes (not found on %s)",
2589                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2590       else:
2591         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2592                 "File %s is missing from node(s) %s", filename,
2593                 utils.CommaJoin(utils.NiceSort(missing_file)))
2594
2595         # Warn if a node has a file it shouldn't
2596         unexpected = with_file - expected_nodes
2597         errorif(unexpected,
2598                 constants.CV_ECLUSTERFILECHECK, None,
2599                 "File %s should not exist on node(s) %s",
2600                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2601
2602       # See if there are multiple versions of the file
2603       test = len(checksums) > 1
2604       if test:
2605         variants = ["variant %s on %s" %
2606                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2607                     for (idx, (checksum, nodes)) in
2608                       enumerate(sorted(checksums.items()))]
2609       else:
2610         variants = []
2611
2612       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2613               "File %s found with %s different checksums (%s)",
2614               filename, len(checksums), "; ".join(variants))
2615
2616   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2617                       drbd_map):
2618     """Verifies and the node DRBD status.
2619
2620     @type ninfo: L{objects.Node}
2621     @param ninfo: the node to check
2622     @param nresult: the remote results for the node
2623     @param instanceinfo: the dict of instances
2624     @param drbd_helper: the configured DRBD usermode helper
2625     @param drbd_map: the DRBD map as returned by
2626         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2627
2628     """
2629     node = ninfo.name
2630     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2631
2632     if drbd_helper:
2633       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2634       test = (helper_result is None)
2635       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2636                "no drbd usermode helper returned")
2637       if helper_result:
2638         status, payload = helper_result
2639         test = not status
2640         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2641                  "drbd usermode helper check unsuccessful: %s", payload)
2642         test = status and (payload != drbd_helper)
2643         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2644                  "wrong drbd usermode helper: %s", payload)
2645
2646     # compute the DRBD minors
2647     node_drbd = {}
2648     for minor, instance in drbd_map[node].items():
2649       test = instance not in instanceinfo
2650       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2651                "ghost instance '%s' in temporary DRBD map", instance)
2652         # ghost instance should not be running, but otherwise we
2653         # don't give double warnings (both ghost instance and
2654         # unallocated minor in use)
2655       if test:
2656         node_drbd[minor] = (instance, False)
2657       else:
2658         instance = instanceinfo[instance]
2659         node_drbd[minor] = (instance.name,
2660                             instance.admin_state == constants.ADMINST_UP)
2661
2662     # and now check them
2663     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2664     test = not isinstance(used_minors, (tuple, list))
2665     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2666              "cannot parse drbd status file: %s", str(used_minors))
2667     if test:
2668       # we cannot check drbd status
2669       return
2670
2671     for minor, (iname, must_exist) in node_drbd.items():
2672       test = minor not in used_minors and must_exist
2673       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2674                "drbd minor %d of instance %s is not active", minor, iname)
2675     for minor in used_minors:
2676       test = minor not in node_drbd
2677       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2678                "unallocated drbd minor %d is in use", minor)
2679
2680   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2681     """Builds the node OS structures.
2682
2683     @type ninfo: L{objects.Node}
2684     @param ninfo: the node to check
2685     @param nresult: the remote results for the node
2686     @param nimg: the node image object
2687
2688     """
2689     node = ninfo.name
2690     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2691
2692     remote_os = nresult.get(constants.NV_OSLIST, None)
2693     test = (not isinstance(remote_os, list) or
2694             not compat.all(isinstance(v, list) and len(v) == 7
2695                            for v in remote_os))
2696
2697     _ErrorIf(test, constants.CV_ENODEOS, node,
2698              "node hasn't returned valid OS data")
2699
2700     nimg.os_fail = test
2701
2702     if test:
2703       return
2704
2705     os_dict = {}
2706
2707     for (name, os_path, status, diagnose,
2708          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2709
2710       if name not in os_dict:
2711         os_dict[name] = []
2712
2713       # parameters is a list of lists instead of list of tuples due to
2714       # JSON lacking a real tuple type, fix it:
2715       parameters = [tuple(v) for v in parameters]
2716       os_dict[name].append((os_path, status, diagnose,
2717                             set(variants), set(parameters), set(api_ver)))
2718
2719     nimg.oslist = os_dict
2720
2721   def _VerifyNodeOS(self, ninfo, nimg, base):
2722     """Verifies the node OS list.
2723
2724     @type ninfo: L{objects.Node}
2725     @param ninfo: the node to check
2726     @param nimg: the node image object
2727     @param base: the 'template' node we match against (e.g. from the master)
2728
2729     """
2730     node = ninfo.name
2731     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2732
2733     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2734
2735     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2736     for os_name, os_data in nimg.oslist.items():
2737       assert os_data, "Empty OS status for OS %s?!" % os_name
2738       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2739       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2740                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2741       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2742                "OS '%s' has multiple entries (first one shadows the rest): %s",
2743                os_name, utils.CommaJoin([v[0] for v in os_data]))
2744       # comparisons with the 'base' image
2745       test = os_name not in base.oslist
2746       _ErrorIf(test, constants.CV_ENODEOS, node,
2747                "Extra OS %s not present on reference node (%s)",
2748                os_name, base.name)
2749       if test:
2750         continue
2751       assert base.oslist[os_name], "Base node has empty OS status?"
2752       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2753       if not b_status:
2754         # base OS is invalid, skipping
2755         continue
2756       for kind, a, b in [("API version", f_api, b_api),
2757                          ("variants list", f_var, b_var),
2758                          ("parameters", beautify_params(f_param),
2759                           beautify_params(b_param))]:
2760         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2761                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2762                  kind, os_name, base.name,
2763                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2764
2765     # check any missing OSes
2766     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2767     _ErrorIf(missing, constants.CV_ENODEOS, node,
2768              "OSes present on reference node %s but missing on this node: %s",
2769              base.name, utils.CommaJoin(missing))
2770
2771   def _VerifyOob(self, ninfo, nresult):
2772     """Verifies out of band functionality of a node.
2773
2774     @type ninfo: L{objects.Node}
2775     @param ninfo: the node to check
2776     @param nresult: the remote results for the node
2777
2778     """
2779     node = ninfo.name
2780     # We just have to verify the paths on master and/or master candidates
2781     # as the oob helper is invoked on the master
2782     if ((ninfo.master_candidate or ninfo.master_capable) and
2783         constants.NV_OOB_PATHS in nresult):
2784       for path_result in nresult[constants.NV_OOB_PATHS]:
2785         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2786
2787   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2788     """Verifies and updates the node volume data.
2789
2790     This function will update a L{NodeImage}'s internal structures
2791     with data from the remote call.
2792
2793     @type ninfo: L{objects.Node}
2794     @param ninfo: the node to check
2795     @param nresult: the remote results for the node
2796     @param nimg: the node image object
2797     @param vg_name: the configured VG name
2798
2799     """
2800     node = ninfo.name
2801     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2802
2803     nimg.lvm_fail = True
2804     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2805     if vg_name is None:
2806       pass
2807     elif isinstance(lvdata, basestring):
2808       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2809                utils.SafeEncode(lvdata))
2810     elif not isinstance(lvdata, dict):
2811       _ErrorIf(True, constants.CV_ENODELVM, node,
2812                "rpc call to node failed (lvlist)")
2813     else:
2814       nimg.volumes = lvdata
2815       nimg.lvm_fail = False
2816
2817   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2818     """Verifies and updates the node instance list.
2819
2820     If the listing was successful, then updates this node's instance
2821     list. Otherwise, it marks the RPC call as failed for the instance
2822     list key.
2823
2824     @type ninfo: L{objects.Node}
2825     @param ninfo: the node to check
2826     @param nresult: the remote results for the node
2827     @param nimg: the node image object
2828
2829     """
2830     idata = nresult.get(constants.NV_INSTANCELIST, None)
2831     test = not isinstance(idata, list)
2832     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2833                   "rpc call to node failed (instancelist): %s",
2834                   utils.SafeEncode(str(idata)))
2835     if test:
2836       nimg.hyp_fail = True
2837     else:
2838       nimg.instances = idata
2839
2840   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2841     """Verifies and computes a node information map
2842
2843     @type ninfo: L{objects.Node}
2844     @param ninfo: the node to check
2845     @param nresult: the remote results for the node
2846     @param nimg: the node image object
2847     @param vg_name: the configured VG name
2848
2849     """
2850     node = ninfo.name
2851     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2852
2853     # try to read free memory (from the hypervisor)
2854     hv_info = nresult.get(constants.NV_HVINFO, None)
2855     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2856     _ErrorIf(test, constants.CV_ENODEHV, node,
2857              "rpc call to node failed (hvinfo)")
2858     if not test:
2859       try:
2860         nimg.mfree = int(hv_info["memory_free"])
2861       except (ValueError, TypeError):
2862         _ErrorIf(True, constants.CV_ENODERPC, node,
2863                  "node returned invalid nodeinfo, check hypervisor")
2864
2865     # FIXME: devise a free space model for file based instances as well
2866     if vg_name is not None:
2867       test = (constants.NV_VGLIST not in nresult or
2868               vg_name not in nresult[constants.NV_VGLIST])
2869       _ErrorIf(test, constants.CV_ENODELVM, node,
2870                "node didn't return data for the volume group '%s'"
2871                " - it is either missing or broken", vg_name)
2872       if not test:
2873         try:
2874           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2875         except (ValueError, TypeError):
2876           _ErrorIf(True, constants.CV_ENODERPC, node,
2877                    "node returned invalid LVM info, check LVM status")
2878
2879   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2880     """Gets per-disk status information for all instances.
2881
2882     @type nodelist: list of strings
2883     @param nodelist: Node names
2884     @type node_image: dict of (name, L{objects.Node})
2885     @param node_image: Node objects
2886     @type instanceinfo: dict of (name, L{objects.Instance})
2887     @param instanceinfo: Instance objects
2888     @rtype: {instance: {node: [(succes, payload)]}}
2889     @return: a dictionary of per-instance dictionaries with nodes as
2890         keys and disk information as values; the disk information is a
2891         list of tuples (success, payload)
2892
2893     """
2894     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2895
2896     node_disks = {}
2897     node_disks_devonly = {}
2898     diskless_instances = set()
2899     diskless = constants.DT_DISKLESS
2900
2901     for nname in nodelist:
2902       node_instances = list(itertools.chain(node_image[nname].pinst,
2903                                             node_image[nname].sinst))
2904       diskless_instances.update(inst for inst in node_instances
2905                                 if instanceinfo[inst].disk_template == diskless)
2906       disks = [(inst, disk)
2907                for inst in node_instances
2908                for disk in instanceinfo[inst].disks]
2909
2910       if not disks:
2911         # No need to collect data
2912         continue
2913
2914       node_disks[nname] = disks
2915
2916       # _AnnotateDiskParams makes already copies of the disks
2917       devonly = []
2918       for (inst, dev) in disks:
2919         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2920         self.cfg.SetDiskID(anno_disk, nname)
2921         devonly.append(anno_disk)
2922
2923       node_disks_devonly[nname] = devonly
2924
2925     assert len(node_disks) == len(node_disks_devonly)
2926
2927     # Collect data from all nodes with disks
2928     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2929                                                           node_disks_devonly)
2930
2931     assert len(result) == len(node_disks)
2932
2933     instdisk = {}
2934
2935     for (nname, nres) in result.items():
2936       disks = node_disks[nname]
2937
2938       if nres.offline:
2939         # No data from this node
2940         data = len(disks) * [(False, "node offline")]
2941       else:
2942         msg = nres.fail_msg
2943         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2944                  "while getting disk information: %s", msg)
2945         if msg:
2946           # No data from this node
2947           data = len(disks) * [(False, msg)]
2948         else:
2949           data = []
2950           for idx, i in enumerate(nres.payload):
2951             if isinstance(i, (tuple, list)) and len(i) == 2:
2952               data.append(i)
2953             else:
2954               logging.warning("Invalid result from node %s, entry %d: %s",
2955                               nname, idx, i)
2956               data.append((False, "Invalid result from the remote node"))
2957
2958       for ((inst, _), status) in zip(disks, data):
2959         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2960
2961     # Add empty entries for diskless instances.
2962     for inst in diskless_instances:
2963       assert inst not in instdisk
2964       instdisk[inst] = {}
2965
2966     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2967                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2968                       compat.all(isinstance(s, (tuple, list)) and
2969                                  len(s) == 2 for s in statuses)
2970                       for inst, nnames in instdisk.items()
2971                       for nname, statuses in nnames.items())
2972     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2973
2974     return instdisk
2975
2976   @staticmethod
2977   def _SshNodeSelector(group_uuid, all_nodes):
2978     """Create endless iterators for all potential SSH check hosts.
2979
2980     """
2981     nodes = [node for node in all_nodes
2982              if (node.group != group_uuid and
2983                  not node.offline)]
2984     keyfunc = operator.attrgetter("group")
2985
2986     return map(itertools.cycle,
2987                [sorted(map(operator.attrgetter("name"), names))
2988                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2989                                                   keyfunc)])
2990
2991   @classmethod
2992   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2993     """Choose which nodes should talk to which other nodes.
2994
2995     We will make nodes contact all nodes in their group, and one node from
2996     every other group.
2997
2998     @warning: This algorithm has a known issue if one node group is much
2999       smaller than others (e.g. just one node). In such a case all other
3000       nodes will talk to the single node.
3001
3002     """
3003     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3004     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3005
3006     return (online_nodes,
3007             dict((name, sorted([i.next() for i in sel]))
3008                  for name in online_nodes))
3009
3010   def BuildHooksEnv(self):
3011     """Build hooks env.
3012
3013     Cluster-Verify hooks just ran in the post phase and their failure makes
3014     the output be logged in the verify output and the verification to fail.
3015
3016     """
3017     env = {
3018       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3019       }
3020
3021     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3022                for node in self.my_node_info.values())
3023
3024     return env
3025
3026   def BuildHooksNodes(self):
3027     """Build hooks nodes.
3028
3029     """
3030     return ([], self.my_node_names)
3031
3032   def Exec(self, feedback_fn):
3033     """Verify integrity of the node group, performing various test on nodes.
3034
3035     """
3036     # This method has too many local variables. pylint: disable=R0914
3037     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3038
3039     if not self.my_node_names:
3040       # empty node group
3041       feedback_fn("* Empty node group, skipping verification")
3042       return True
3043
3044     self.bad = False
3045     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3046     verbose = self.op.verbose
3047     self._feedback_fn = feedback_fn
3048
3049     vg_name = self.cfg.GetVGName()
3050     drbd_helper = self.cfg.GetDRBDHelper()
3051     cluster = self.cfg.GetClusterInfo()
3052     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3053     hypervisors = cluster.enabled_hypervisors
3054     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3055
3056     i_non_redundant = [] # Non redundant instances
3057     i_non_a_balanced = [] # Non auto-balanced instances
3058     i_offline = 0 # Count of offline instances
3059     n_offline = 0 # Count of offline nodes
3060     n_drained = 0 # Count of nodes being drained
3061     node_vol_should = {}
3062
3063     # FIXME: verify OS list
3064
3065     # File verification
3066     filemap = _ComputeAncillaryFiles(cluster, False)
3067
3068     # do local checksums
3069     master_node = self.master_node = self.cfg.GetMasterNode()
3070     master_ip = self.cfg.GetMasterIP()
3071
3072     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3073
3074     user_scripts = []
3075     if self.cfg.GetUseExternalMipScript():
3076       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3077
3078     node_verify_param = {
3079       constants.NV_FILELIST:
3080         utils.UniqueSequence(filename
3081                              for files in filemap
3082                              for filename in files),
3083       constants.NV_NODELIST:
3084         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3085                                   self.all_node_info.values()),
3086       constants.NV_HYPERVISOR: hypervisors,
3087       constants.NV_HVPARAMS:
3088         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3089       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3090                                  for node in node_data_list
3091                                  if not node.offline],
3092       constants.NV_INSTANCELIST: hypervisors,
3093       constants.NV_VERSION: None,
3094       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3095       constants.NV_NODESETUP: None,
3096       constants.NV_TIME: None,
3097       constants.NV_MASTERIP: (master_node, master_ip),
3098       constants.NV_OSLIST: None,
3099       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3100       constants.NV_USERSCRIPTS: user_scripts,
3101       }
3102
3103     if vg_name is not None:
3104       node_verify_param[constants.NV_VGLIST] = None
3105       node_verify_param[constants.NV_LVLIST] = vg_name
3106       node_verify_param[constants.NV_PVLIST] = [vg_name]
3107
3108     if drbd_helper:
3109       node_verify_param[constants.NV_DRBDLIST] = None
3110       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3111
3112     # bridge checks
3113     # FIXME: this needs to be changed per node-group, not cluster-wide
3114     bridges = set()
3115     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3116     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3117       bridges.add(default_nicpp[constants.NIC_LINK])
3118     for instance in self.my_inst_info.values():
3119       for nic in instance.nics:
3120         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3121         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3122           bridges.add(full_nic[constants.NIC_LINK])
3123
3124     if bridges:
3125       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3126
3127     # Build our expected cluster state
3128     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3129                                                  name=node.name,
3130                                                  vm_capable=node.vm_capable))
3131                       for node in node_data_list)
3132
3133     # Gather OOB paths
3134     oob_paths = []
3135     for node in self.all_node_info.values():
3136       path = _SupportsOob(self.cfg, node)
3137       if path and path not in oob_paths:
3138         oob_paths.append(path)
3139
3140     if oob_paths:
3141       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3142
3143     for instance in self.my_inst_names:
3144       inst_config = self.my_inst_info[instance]
3145       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3146         i_offline += 1
3147
3148       for nname in inst_config.all_nodes:
3149         if nname not in node_image:
3150           gnode = self.NodeImage(name=nname)
3151           gnode.ghost = (nname not in self.all_node_info)
3152           node_image[nname] = gnode
3153
3154       inst_config.MapLVsByNode(node_vol_should)
3155
3156       pnode = inst_config.primary_node
3157       node_image[pnode].pinst.append(instance)
3158
3159       for snode in inst_config.secondary_nodes:
3160         nimg = node_image[snode]
3161         nimg.sinst.append(instance)
3162         if pnode not in nimg.sbp:
3163           nimg.sbp[pnode] = []
3164         nimg.sbp[pnode].append(instance)
3165
3166     # At this point, we have the in-memory data structures complete,
3167     # except for the runtime information, which we'll gather next
3168
3169     # Due to the way our RPC system works, exact response times cannot be
3170     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3171     # time before and after executing the request, we can at least have a time
3172     # window.
3173     nvinfo_starttime = time.time()
3174     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3175                                            node_verify_param,
3176                                            self.cfg.GetClusterName())
3177     nvinfo_endtime = time.time()
3178
3179     if self.extra_lv_nodes and vg_name is not None:
3180       extra_lv_nvinfo = \
3181           self.rpc.call_node_verify(self.extra_lv_nodes,
3182                                     {constants.NV_LVLIST: vg_name},
3183                                     self.cfg.GetClusterName())
3184     else:
3185       extra_lv_nvinfo = {}
3186
3187     all_drbd_map = self.cfg.ComputeDRBDMap()
3188
3189     feedback_fn("* Gathering disk information (%s nodes)" %
3190                 len(self.my_node_names))
3191     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3192                                      self.my_inst_info)
3193
3194     feedback_fn("* Verifying configuration file consistency")
3195
3196     # If not all nodes are being checked, we need to make sure the master node
3197     # and a non-checked vm_capable node are in the list.
3198     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3199     if absent_nodes:
3200       vf_nvinfo = all_nvinfo.copy()
3201       vf_node_info = list(self.my_node_info.values())
3202       additional_nodes = []
3203       if master_node not in self.my_node_info:
3204         additional_nodes.append(master_node)
3205         vf_node_info.append(self.all_node_info[master_node])
3206       # Add the first vm_capable node we find which is not included,
3207       # excluding the master node (which we already have)
3208       for node in absent_nodes:
3209         nodeinfo = self.all_node_info[node]
3210         if (nodeinfo.vm_capable and not nodeinfo.offline and
3211             node != master_node):
3212           additional_nodes.append(node)
3213           vf_node_info.append(self.all_node_info[node])
3214           break
3215       key = constants.NV_FILELIST
3216       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3217                                                  {key: node_verify_param[key]},
3218                                                  self.cfg.GetClusterName()))
3219     else:
3220       vf_nvinfo = all_nvinfo
3221       vf_node_info = self.my_node_info.values()
3222
3223     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3224
3225     feedback_fn("* Verifying node status")
3226
3227     refos_img = None
3228
3229     for node_i in node_data_list:
3230       node = node_i.name
3231       nimg = node_image[node]
3232
3233       if node_i.offline:
3234         if verbose:
3235           feedback_fn("* Skipping offline node %s" % (node,))
3236         n_offline += 1
3237         continue
3238
3239       if node == master_node:
3240         ntype = "master"
3241       elif node_i.master_candidate:
3242         ntype = "master candidate"
3243       elif node_i.drained:
3244         ntype = "drained"
3245         n_drained += 1
3246       else:
3247         ntype = "regular"
3248       if verbose:
3249         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3250
3251       msg = all_nvinfo[node].fail_msg
3252       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3253                msg)
3254       if msg:
3255         nimg.rpc_fail = True
3256         continue
3257
3258       nresult = all_nvinfo[node].payload
3259
3260       nimg.call_ok = self._VerifyNode(node_i, nresult)
3261       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3262       self._VerifyNodeNetwork(node_i, nresult)
3263       self._VerifyNodeUserScripts(node_i, nresult)
3264       self._VerifyOob(node_i, nresult)
3265
3266       if nimg.vm_capable:
3267         self._VerifyNodeLVM(node_i, nresult, vg_name)
3268         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3269                              all_drbd_map)
3270
3271         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3272         self._UpdateNodeInstances(node_i, nresult, nimg)
3273         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3274         self._UpdateNodeOS(node_i, nresult, nimg)
3275
3276         if not nimg.os_fail:
3277           if refos_img is None:
3278             refos_img = nimg
3279           self._VerifyNodeOS(node_i, nimg, refos_img)
3280         self._VerifyNodeBridges(node_i, nresult, bridges)
3281
3282         # Check whether all running instancies are primary for the node. (This
3283         # can no longer be done from _VerifyInstance below, since some of the
3284         # wrong instances could be from other node groups.)
3285         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3286
3287         for inst in non_primary_inst:
3288           test = inst in self.all_inst_info
3289           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3290                    "instance should not run on node %s", node_i.name)
3291           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3292                    "node is running unknown instance %s", inst)
3293
3294     for node, result in extra_lv_nvinfo.items():
3295       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3296                               node_image[node], vg_name)
3297
3298     feedback_fn("* Verifying instance status")
3299     for instance in self.my_inst_names:
3300       if verbose:
3301         feedback_fn("* Verifying instance %s" % instance)
3302       inst_config = self.my_inst_info[instance]
3303       self._VerifyInstance(instance, inst_config, node_image,
3304                            instdisk[instance])
3305       inst_nodes_offline = []
3306
3307       pnode = inst_config.primary_node
3308       pnode_img = node_image[pnode]
3309       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3310                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3311                " primary node failed", instance)
3312
3313       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3314                pnode_img.offline,
3315                constants.CV_EINSTANCEBADNODE, instance,
3316                "instance is marked as running and lives on offline node %s",
3317                inst_config.primary_node)
3318
3319       # If the instance is non-redundant we cannot survive losing its primary
3320       # node, so we are not N+1 compliant. On the other hand we have no disk
3321       # templates with more than one secondary so that situation is not well
3322       # supported either.
3323       # FIXME: does not support file-backed instances
3324       if not inst_config.secondary_nodes:
3325         i_non_redundant.append(instance)
3326
3327       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3328                constants.CV_EINSTANCELAYOUT,
3329                instance, "instance has multiple secondary nodes: %s",
3330                utils.CommaJoin(inst_config.secondary_nodes),
3331                code=self.ETYPE_WARNING)
3332
3333       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3334         pnode = inst_config.primary_node
3335         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3336         instance_groups = {}
3337
3338         for node in instance_nodes:
3339           instance_groups.setdefault(self.all_node_info[node].group,
3340                                      []).append(node)
3341
3342         pretty_list = [
3343           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3344           # Sort so that we always list the primary node first.
3345           for group, nodes in sorted(instance_groups.items(),
3346                                      key=lambda (_, nodes): pnode in nodes,
3347                                      reverse=True)]
3348
3349         self._ErrorIf(len(instance_groups) > 1,
3350                       constants.CV_EINSTANCESPLITGROUPS,
3351                       instance, "instance has primary and secondary nodes in"
3352                       " different groups: %s", utils.CommaJoin(pretty_list),
3353                       code=self.ETYPE_WARNING)
3354
3355       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3356         i_non_a_balanced.append(instance)
3357
3358       for snode in inst_config.secondary_nodes:
3359         s_img = node_image[snode]
3360         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3361                  snode, "instance %s, connection to secondary node failed",
3362                  instance)
3363
3364         if s_img.offline:
3365           inst_nodes_offline.append(snode)
3366
3367       # warn that the instance lives on offline nodes
3368       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3369                "instance has offline secondary node(s) %s",
3370                utils.CommaJoin(inst_nodes_offline))
3371       # ... or ghost/non-vm_capable nodes
3372       for node in inst_config.all_nodes:
3373         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3374                  instance, "instance lives on ghost node %s", node)
3375         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3376                  instance, "instance lives on non-vm_capable node %s", node)
3377
3378     feedback_fn("* Verifying orphan volumes")
3379     reserved = utils.FieldSet(*cluster.reserved_lvs)
3380
3381     # We will get spurious "unknown volume" warnings if any node of this group
3382     # is secondary for an instance whose primary is in another group. To avoid
3383     # them, we find these instances and add their volumes to node_vol_should.
3384     for inst in self.all_inst_info.values():
3385       for secondary in inst.secondary_nodes:
3386         if (secondary in self.my_node_info
3387             and inst.name not in self.my_inst_info):
3388           inst.MapLVsByNode(node_vol_should)
3389           break
3390
3391     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3392
3393     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3394       feedback_fn("* Verifying N+1 Memory redundancy")
3395       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3396
3397     feedback_fn("* Other Notes")
3398     if i_non_redundant:
3399       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3400                   % len(i_non_redundant))
3401
3402     if i_non_a_balanced:
3403       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3404                   % len(i_non_a_balanced))
3405
3406     if i_offline:
3407       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3408
3409     if n_offline:
3410       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3411
3412     if n_drained:
3413       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3414
3415     return not self.bad
3416
3417   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3418     """Analyze the post-hooks' result
3419
3420     This method analyses the hook result, handles it, and sends some
3421     nicely-formatted feedback back to the user.
3422
3423     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3424         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3425     @param hooks_results: the results of the multi-node hooks rpc call
3426     @param feedback_fn: function used send feedback back to the caller
3427     @param lu_result: previous Exec result
3428     @return: the new Exec result, based on the previous result
3429         and hook results
3430
3431     """
3432     # We only really run POST phase hooks, only for non-empty groups,
3433     # and are only interested in their results
3434     if not self.my_node_names:
3435       # empty node group
3436       pass
3437     elif phase == constants.HOOKS_PHASE_POST:
3438       # Used to change hooks' output to proper indentation
3439       feedback_fn("* Hooks Results")
3440       assert hooks_results, "invalid result from hooks"
3441
3442       for node_name in hooks_results:
3443         res = hooks_results[node_name]
3444         msg = res.fail_msg
3445         test = msg and not res.offline
3446         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3447                       "Communication failure in hooks execution: %s", msg)
3448         if res.offline or msg:
3449           # No need to investigate payload if node is offline or gave
3450           # an error.
3451           continue
3452         for script, hkr, output in res.payload:
3453           test = hkr == constants.HKR_FAIL
3454           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3455                         "Script %s failed, output:", script)
3456           if test:
3457             output = self._HOOKS_INDENT_RE.sub("      ", output)
3458             feedback_fn("%s" % output)
3459             lu_result = False
3460
3461     return lu_result
3462
3463
3464 class LUClusterVerifyDisks(NoHooksLU):
3465   """Verifies the cluster disks status.
3466
3467   """
3468   REQ_BGL = False
3469
3470   def ExpandNames(self):
3471     self.share_locks = _ShareAll()
3472     self.needed_locks = {
3473       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3474       }
3475
3476   def Exec(self, feedback_fn):
3477     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3478
3479     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3480     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3481                            for group in group_names])
3482
3483
3484 class LUGroupVerifyDisks(NoHooksLU):
3485   """Verifies the status of all disks in a node group.
3486
3487   """
3488   REQ_BGL = False
3489
3490   def ExpandNames(self):
3491     # Raises errors.OpPrereqError on its own if group can't be found
3492     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3493
3494     self.share_locks = _ShareAll()
3495     self.needed_locks = {
3496       locking.LEVEL_INSTANCE: [],
3497       locking.LEVEL_NODEGROUP: [],
3498       locking.LEVEL_NODE: [],
3499       }
3500
3501   def DeclareLocks(self, level):
3502     if level == locking.LEVEL_INSTANCE:
3503       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3504
3505       # Lock instances optimistically, needs verification once node and group
3506       # locks have been acquired
3507       self.needed_locks[locking.LEVEL_INSTANCE] = \
3508         self.cfg.GetNodeGroupInstances(self.group_uuid)
3509
3510     elif level == locking.LEVEL_NODEGROUP:
3511       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3512
3513       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3514         set([self.group_uuid] +
3515             # Lock all groups used by instances optimistically; this requires
3516             # going via the node before it's locked, requiring verification
3517             # later on
3518             [group_uuid
3519              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3520              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3521
3522     elif level == locking.LEVEL_NODE:
3523       # This will only lock the nodes in the group to be verified which contain
3524       # actual instances
3525       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3526       self._LockInstancesNodes()
3527
3528       # Lock all nodes in group to be verified
3529       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3530       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3531       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3532
3533   def CheckPrereq(self):
3534     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3535     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3536     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3537
3538     assert self.group_uuid in owned_groups
3539
3540     # Check if locked instances are still correct
3541     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3542
3543     # Get instance information
3544     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3545
3546     # Check if node groups for locked instances are still correct
3547     _CheckInstancesNodeGroups(self.cfg, self.instances,
3548                               owned_groups, owned_nodes, self.group_uuid)
3549
3550   def Exec(self, feedback_fn):
3551     """Verify integrity of cluster disks.
3552
3553     @rtype: tuple of three items
3554     @return: a tuple of (dict of node-to-node_error, list of instances
3555         which need activate-disks, dict of instance: (node, volume) for
3556         missing volumes
3557
3558     """
3559     res_nodes = {}
3560     res_instances = set()
3561     res_missing = {}
3562
3563     nv_dict = _MapInstanceDisksToNodes(
3564       [inst for inst in self.instances.values()
3565        if inst.admin_state == constants.ADMINST_UP])
3566
3567     if nv_dict:
3568       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3569                              set(self.cfg.GetVmCapableNodeList()))
3570
3571       node_lvs = self.rpc.call_lv_list(nodes, [])
3572
3573       for (node, node_res) in node_lvs.items():
3574         if node_res.offline:
3575           continue
3576
3577         msg = node_res.fail_msg
3578         if msg:
3579           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3580           res_nodes[node] = msg
3581           continue
3582
3583         for lv_name, (_, _, lv_online) in node_res.payload.items():
3584           inst = nv_dict.pop((node, lv_name), None)
3585           if not (lv_online or inst is None):
3586             res_instances.add(inst)
3587
3588       # any leftover items in nv_dict are missing LVs, let's arrange the data
3589       # better
3590       for key, inst in nv_dict.iteritems():
3591         res_missing.setdefault(inst, []).append(list(key))
3592
3593     return (res_nodes, list(res_instances), res_missing)
3594
3595
3596 class LUClusterRepairDiskSizes(NoHooksLU):
3597   """Verifies the cluster disks sizes.
3598
3599   """
3600   REQ_BGL = False
3601
3602   def ExpandNames(self):
3603     if self.op.instances:
3604       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3605       self.needed_locks = {
3606         locking.LEVEL_NODE_RES: [],
3607         locking.LEVEL_INSTANCE: self.wanted_names,
3608         }
3609       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3610     else:
3611       self.wanted_names = None
3612       self.needed_locks = {
3613         locking.LEVEL_NODE_RES: locking.ALL_SET,
3614         locking.LEVEL_INSTANCE: locking.ALL_SET,
3615         }
3616     self.share_locks = {
3617       locking.LEVEL_NODE_RES: 1,
3618       locking.LEVEL_INSTANCE: 0,
3619       }
3620
3621   def DeclareLocks(self, level):
3622     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3623       self._LockInstancesNodes(primary_only=True, level=level)
3624
3625   def CheckPrereq(self):
3626     """Check prerequisites.
3627
3628     This only checks the optional instance list against the existing names.
3629
3630     """
3631     if self.wanted_names is None:
3632       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3633
3634     self.wanted_instances = \
3635         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3636
3637   def _EnsureChildSizes(self, disk):
3638     """Ensure children of the disk have the needed disk size.
3639
3640     This is valid mainly for DRBD8 and fixes an issue where the
3641     children have smaller disk size.
3642
3643     @param disk: an L{ganeti.objects.Disk} object
3644
3645     """
3646     if disk.dev_type == constants.LD_DRBD8:
3647       assert disk.children, "Empty children for DRBD8?"
3648       fchild = disk.children[0]
3649       mismatch = fchild.size < disk.size
3650       if mismatch:
3651         self.LogInfo("Child disk has size %d, parent %d, fixing",
3652                      fchild.size, disk.size)
3653         fchild.size = disk.size
3654
3655       # and we recurse on this child only, not on the metadev
3656       return self._EnsureChildSizes(fchild) or mismatch
3657     else:
3658       return False
3659
3660   def Exec(self, feedback_fn):
3661     """Verify the size of cluster disks.
3662
3663     """
3664     # TODO: check child disks too
3665     # TODO: check differences in size between primary/secondary nodes
3666     per_node_disks = {}
3667     for instance in self.wanted_instances:
3668       pnode = instance.primary_node
3669       if pnode not in per_node_disks:
3670         per_node_disks[pnode] = []
3671       for idx, disk in enumerate(instance.disks):
3672         per_node_disks[pnode].append((instance, idx, disk))
3673
3674     assert not (frozenset(per_node_disks.keys()) -
3675                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3676       "Not owning correct locks"
3677     assert not self.owned_locks(locking.LEVEL_NODE)
3678
3679     changed = []
3680     for node, dskl in per_node_disks.items():
3681       newl = [v[2].Copy() for v in dskl]
3682       for dsk in newl:
3683         self.cfg.SetDiskID(dsk, node)
3684       result = self.rpc.call_blockdev_getsize(node, newl)
3685       if result.fail_msg:
3686         self.LogWarning("Failure in blockdev_getsize call to node"
3687                         " %s, ignoring", node)
3688         continue
3689       if len(result.payload) != len(dskl):
3690         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3691                         " result.payload=%s", node, len(dskl), result.payload)
3692         self.LogWarning("Invalid result from node %s, ignoring node results",
3693                         node)
3694         continue
3695       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3696         if size is None:
3697           self.LogWarning("Disk %d of instance %s did not return size"
3698                           " information, ignoring", idx, instance.name)
3699           continue
3700         if not isinstance(size, (int, long)):
3701           self.LogWarning("Disk %d of instance %s did not return valid"
3702                           " size information, ignoring", idx, instance.name)
3703           continue
3704         size = size >> 20
3705         if size != disk.size:
3706           self.LogInfo("Disk %d of instance %s has mismatched size,"
3707                        " correcting: recorded %d, actual %d", idx,
3708                        instance.name, disk.size, size)
3709           disk.size = size
3710           self.cfg.Update(instance, feedback_fn)
3711           changed.append((instance.name, idx, size))
3712         if self._EnsureChildSizes(disk):
3713           self.cfg.Update(instance, feedback_fn)
3714           changed.append((instance.name, idx, disk.size))
3715     return changed
3716
3717
3718 class LUClusterRename(LogicalUnit):
3719   """Rename the cluster.
3720
3721   """
3722   HPATH = "cluster-rename"
3723   HTYPE = constants.HTYPE_CLUSTER
3724
3725   def BuildHooksEnv(self):
3726     """Build hooks env.
3727
3728     """
3729     return {
3730       "OP_TARGET": self.cfg.GetClusterName(),
3731       "NEW_NAME": self.op.name,
3732       }
3733
3734   def BuildHooksNodes(self):
3735     """Build hooks nodes.
3736
3737     """
3738     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3739
3740   def CheckPrereq(self):
3741     """Verify that the passed name is a valid one.
3742
3743     """
3744     hostname = netutils.GetHostname(name=self.op.name,
3745                                     family=self.cfg.GetPrimaryIPFamily())
3746
3747     new_name = hostname.name
3748     self.ip = new_ip = hostname.ip
3749     old_name = self.cfg.GetClusterName()
3750     old_ip = self.cfg.GetMasterIP()
3751     if new_name == old_name and new_ip == old_ip:
3752       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3753                                  " cluster has changed",
3754                                  errors.ECODE_INVAL)
3755     if new_ip != old_ip:
3756       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3757         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3758                                    " reachable on the network" %
3759                                    new_ip, errors.ECODE_NOTUNIQUE)
3760
3761     self.op.name = new_name
3762
3763   def Exec(self, feedback_fn):
3764     """Rename the cluster.
3765
3766     """
3767     clustername = self.op.name
3768     new_ip = self.ip
3769
3770     # shutdown the master IP
3771     master_params = self.cfg.GetMasterNetworkParameters()
3772     ems = self.cfg.GetUseExternalMipScript()
3773     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3774                                                      master_params, ems)
3775     result.Raise("Could not disable the master role")
3776
3777     try:
3778       cluster = self.cfg.GetClusterInfo()
3779       cluster.cluster_name = clustername
3780       cluster.master_ip = new_ip
3781       self.cfg.Update(cluster, feedback_fn)
3782
3783       # update the known hosts file
3784       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3785       node_list = self.cfg.GetOnlineNodeList()
3786       try:
3787         node_list.remove(master_params.name)
3788       except ValueError:
3789         pass
3790       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3791     finally:
3792       master_params.ip = new_ip
3793       result = self.rpc.call_node_activate_master_ip(master_params.name,
3794                                                      master_params, ems)
3795       msg = result.fail_msg
3796       if msg:
3797         self.LogWarning("Could not re-enable the master role on"
3798                         " the master, please restart manually: %s", msg)
3799
3800     return clustername
3801
3802
3803 def _ValidateNetmask(cfg, netmask):
3804   """Checks if a netmask is valid.
3805
3806   @type cfg: L{config.ConfigWriter}
3807   @param cfg: The cluster configuration
3808   @type netmask: int
3809   @param netmask: the netmask to be verified
3810   @raise errors.OpPrereqError: if the validation fails
3811
3812   """
3813   ip_family = cfg.GetPrimaryIPFamily()
3814   try:
3815     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3816   except errors.ProgrammerError:
3817     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3818                                ip_family, errors.ECODE_INVAL)
3819   if not ipcls.ValidateNetmask(netmask):
3820     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3821                                 (netmask), errors.ECODE_INVAL)
3822
3823
3824 class LUClusterSetParams(LogicalUnit):
3825   """Change the parameters of the cluster.
3826
3827   """
3828   HPATH = "cluster-modify"
3829   HTYPE = constants.HTYPE_CLUSTER
3830   REQ_BGL = False
3831
3832   def CheckArguments(self):
3833     """Check parameters
3834
3835     """
3836     if self.op.uid_pool:
3837       uidpool.CheckUidPool(self.op.uid_pool)
3838
3839     if self.op.add_uids:
3840       uidpool.CheckUidPool(self.op.add_uids)
3841
3842     if self.op.remove_uids:
3843       uidpool.CheckUidPool(self.op.remove_uids)
3844
3845     if self.op.master_netmask is not None:
3846       _ValidateNetmask(self.cfg, self.op.master_netmask)
3847
3848     if self.op.diskparams:
3849       for dt_params in self.op.diskparams.values():
3850         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3851       try:
3852         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3853       except errors.OpPrereqError, err:
3854         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3855                                    errors.ECODE_INVAL)
3856
3857   def ExpandNames(self):
3858     # FIXME: in the future maybe other cluster params won't require checking on
3859     # all nodes to be modified.
3860     self.needed_locks = {
3861       locking.LEVEL_NODE: locking.ALL_SET,
3862       locking.LEVEL_INSTANCE: locking.ALL_SET,
3863       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3864     }
3865     self.share_locks = {
3866         locking.LEVEL_NODE: 1,
3867         locking.LEVEL_INSTANCE: 1,
3868         locking.LEVEL_NODEGROUP: 1,
3869     }
3870
3871   def BuildHooksEnv(self):
3872     """Build hooks env.
3873
3874     """
3875     return {
3876       "OP_TARGET": self.cfg.GetClusterName(),
3877       "NEW_VG_NAME": self.op.vg_name,
3878       }
3879
3880   def BuildHooksNodes(self):
3881     """Build hooks nodes.
3882
3883     """
3884     mn = self.cfg.GetMasterNode()
3885     return ([mn], [mn])
3886
3887   def CheckPrereq(self):
3888     """Check prerequisites.
3889
3890     This checks whether the given params don't conflict and
3891     if the given volume group is valid.
3892
3893     """
3894     if self.op.vg_name is not None and not self.op.vg_name:
3895       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3896         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3897                                    " instances exist", errors.ECODE_INVAL)
3898
3899     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3900       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3901         raise errors.OpPrereqError("Cannot disable drbd helper while"
3902                                    " drbd-based instances exist",
3903                                    errors.ECODE_INVAL)
3904
3905     node_list = self.owned_locks(locking.LEVEL_NODE)
3906
3907     # if vg_name not None, checks given volume group on all nodes
3908     if self.op.vg_name:
3909       vglist = self.rpc.call_vg_list(node_list)
3910       for node in node_list:
3911         msg = vglist[node].fail_msg
3912         if msg:
3913           # ignoring down node
3914           self.LogWarning("Error while gathering data on node %s"
3915                           " (ignoring node): %s", node, msg)
3916           continue
3917         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3918                                               self.op.vg_name,
3919                                               constants.MIN_VG_SIZE)
3920         if vgstatus:
3921           raise errors.OpPrereqError("Error on node '%s': %s" %
3922                                      (node, vgstatus), errors.ECODE_ENVIRON)
3923
3924     if self.op.drbd_helper:
3925       # checks given drbd helper on all nodes
3926       helpers = self.rpc.call_drbd_helper(node_list)
3927       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3928         if ninfo.offline:
3929           self.LogInfo("Not checking drbd helper on offline node %s", node)
3930           continue
3931         msg = helpers[node].fail_msg
3932         if msg:
3933           raise errors.OpPrereqError("Error checking drbd helper on node"
3934                                      " '%s': %s" % (node, msg),
3935                                      errors.ECODE_ENVIRON)
3936         node_helper = helpers[node].payload
3937         if node_helper != self.op.drbd_helper:
3938           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3939                                      (node, node_helper), errors.ECODE_ENVIRON)
3940
3941     self.cluster = cluster = self.cfg.GetClusterInfo()
3942     # validate params changes
3943     if self.op.beparams:
3944       objects.UpgradeBeParams(self.op.beparams)
3945       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3946       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3947
3948     if self.op.ndparams:
3949       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3950       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3951
3952       # TODO: we need a more general way to handle resetting
3953       # cluster-level parameters to default values
3954       if self.new_ndparams["oob_program"] == "":
3955         self.new_ndparams["oob_program"] = \
3956             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3957
3958     if self.op.hv_state:
3959       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3960                                             self.cluster.hv_state_static)
3961       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3962                                for hv, values in new_hv_state.items())
3963
3964     if self.op.disk_state:
3965       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3966                                                 self.cluster.disk_state_static)
3967       self.new_disk_state = \
3968         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3969                             for name, values in svalues.items()))
3970              for storage, svalues in new_disk_state.items())
3971
3972     if self.op.ipolicy:
3973       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3974                                             group_policy=False)
3975
3976       all_instances = self.cfg.GetAllInstancesInfo().values()
3977       violations = set()
3978       for group in self.cfg.GetAllNodeGroupsInfo().values():
3979         instances = frozenset([inst for inst in all_instances
3980                                if compat.any(node in group.members
3981                                              for node in inst.all_nodes)])
3982         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3983         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
3984         new = _ComputeNewInstanceViolations(ipol,
3985                                             new_ipolicy, instances)
3986         if new:
3987           violations.update(new)
3988
3989       if violations:
3990         self.LogWarning("After the ipolicy change the following instances"
3991                         " violate them: %s",
3992                         utils.CommaJoin(utils.NiceSort(violations)))
3993
3994     if self.op.nicparams:
3995       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3996       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3997       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3998       nic_errors = []
3999
4000       # check all instances for consistency
4001       for instance in self.cfg.GetAllInstancesInfo().values():
4002         for nic_idx, nic in enumerate(instance.nics):
4003           params_copy = copy.deepcopy(nic.nicparams)
4004           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4005
4006           # check parameter syntax
4007           try:
4008             objects.NIC.CheckParameterSyntax(params_filled)
4009           except errors.ConfigurationError, err:
4010             nic_errors.append("Instance %s, nic/%d: %s" %
4011                               (instance.name, nic_idx, err))
4012
4013           # if we're moving instances to routed, check that they have an ip
4014           target_mode = params_filled[constants.NIC_MODE]
4015           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4016             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4017                               " address" % (instance.name, nic_idx))
4018       if nic_errors:
4019         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4020                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4021
4022     # hypervisor list/parameters
4023     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4024     if self.op.hvparams:
4025       for hv_name, hv_dict in self.op.hvparams.items():
4026         if hv_name not in self.new_hvparams:
4027           self.new_hvparams[hv_name] = hv_dict
4028         else:
4029           self.new_hvparams[hv_name].update(hv_dict)
4030
4031     # disk template parameters
4032     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4033     if self.op.diskparams:
4034       for dt_name, dt_params in self.op.diskparams.items():
4035         if dt_name not in self.op.diskparams:
4036           self.new_diskparams[dt_name] = dt_params
4037         else:
4038           self.new_diskparams[dt_name].update(dt_params)
4039
4040     # os hypervisor parameters
4041     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4042     if self.op.os_hvp:
4043       for os_name, hvs in self.op.os_hvp.items():
4044         if os_name not in self.new_os_hvp:
4045           self.new_os_hvp[os_name] = hvs
4046         else:
4047           for hv_name, hv_dict in hvs.items():
4048             if hv_name not in self.new_os_hvp[os_name]:
4049               self.new_os_hvp[os_name][hv_name] = hv_dict
4050             else:
4051               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4052
4053     # os parameters
4054     self.new_osp = objects.FillDict(cluster.osparams, {})
4055     if self.op.osparams:
4056       for os_name, osp in self.op.osparams.items():
4057         if os_name not in self.new_osp:
4058           self.new_osp[os_name] = {}
4059
4060         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4061                                                   use_none=True)
4062
4063         if not self.new_osp[os_name]:
4064           # we removed all parameters
4065           del self.new_osp[os_name]
4066         else:
4067           # check the parameter validity (remote check)
4068           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4069                          os_name, self.new_osp[os_name])
4070
4071     # changes to the hypervisor list
4072     if self.op.enabled_hypervisors is not None:
4073       self.hv_list = self.op.enabled_hypervisors
4074       for hv in self.hv_list:
4075         # if the hypervisor doesn't already exist in the cluster
4076         # hvparams, we initialize it to empty, and then (in both
4077         # cases) we make sure to fill the defaults, as we might not
4078         # have a complete defaults list if the hypervisor wasn't
4079         # enabled before
4080         if hv not in new_hvp:
4081           new_hvp[hv] = {}
4082         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4083         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4084     else:
4085       self.hv_list = cluster.enabled_hypervisors
4086
4087     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4088       # either the enabled list has changed, or the parameters have, validate
4089       for hv_name, hv_params in self.new_hvparams.items():
4090         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4091             (self.op.enabled_hypervisors and
4092              hv_name in self.op.enabled_hypervisors)):
4093           # either this is a new hypervisor, or its parameters have changed
4094           hv_class = hypervisor.GetHypervisor(hv_name)
4095           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4096           hv_class.CheckParameterSyntax(hv_params)
4097           _CheckHVParams(self, node_list, hv_name, hv_params)
4098
4099     if self.op.os_hvp:
4100       # no need to check any newly-enabled hypervisors, since the
4101       # defaults have already been checked in the above code-block
4102       for os_name, os_hvp in self.new_os_hvp.items():
4103         for hv_name, hv_params in os_hvp.items():
4104           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4105           # we need to fill in the new os_hvp on top of the actual hv_p
4106           cluster_defaults = self.new_hvparams.get(hv_name, {})
4107           new_osp = objects.FillDict(cluster_defaults, hv_params)
4108           hv_class = hypervisor.GetHypervisor(hv_name)
4109           hv_class.CheckParameterSyntax(new_osp)
4110           _CheckHVParams(self, node_list, hv_name, new_osp)
4111
4112     if self.op.default_iallocator:
4113       alloc_script = utils.FindFile(self.op.default_iallocator,
4114                                     constants.IALLOCATOR_SEARCH_PATH,
4115                                     os.path.isfile)
4116       if alloc_script is None:
4117         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4118                                    " specified" % self.op.default_iallocator,
4119                                    errors.ECODE_INVAL)
4120
4121   def Exec(self, feedback_fn):
4122     """Change the parameters of the cluster.
4123
4124     """
4125     if self.op.vg_name is not None:
4126       new_volume = self.op.vg_name
4127       if not new_volume:
4128         new_volume = None
4129       if new_volume != self.cfg.GetVGName():
4130         self.cfg.SetVGName(new_volume)
4131       else:
4132         feedback_fn("Cluster LVM configuration already in desired"
4133                     " state, not changing")
4134     if self.op.drbd_helper is not None:
4135       new_helper = self.op.drbd_helper
4136       if not new_helper:
4137         new_helper = None
4138       if new_helper != self.cfg.GetDRBDHelper():
4139         self.cfg.SetDRBDHelper(new_helper)
4140       else:
4141         feedback_fn("Cluster DRBD helper already in desired state,"
4142                     " not changing")
4143     if self.op.hvparams:
4144       self.cluster.hvparams = self.new_hvparams
4145     if self.op.os_hvp:
4146       self.cluster.os_hvp = self.new_os_hvp
4147     if self.op.enabled_hypervisors is not None:
4148       self.cluster.hvparams = self.new_hvparams
4149       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4150     if self.op.beparams:
4151       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4152     if self.op.nicparams:
4153       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4154     if self.op.ipolicy:
4155       self.cluster.ipolicy = self.new_ipolicy
4156     if self.op.osparams:
4157       self.cluster.osparams = self.new_osp
4158     if self.op.ndparams:
4159       self.cluster.ndparams = self.new_ndparams
4160     if self.op.diskparams:
4161       self.cluster.diskparams = self.new_diskparams
4162     if self.op.hv_state:
4163       self.cluster.hv_state_static = self.new_hv_state
4164     if self.op.disk_state:
4165       self.cluster.disk_state_static = self.new_disk_state
4166
4167     if self.op.candidate_pool_size is not None:
4168       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4169       # we need to update the pool size here, otherwise the save will fail
4170       _AdjustCandidatePool(self, [])
4171
4172     if self.op.maintain_node_health is not None:
4173       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4174         feedback_fn("Note: CONFD was disabled at build time, node health"
4175                     " maintenance is not useful (still enabling it)")
4176       self.cluster.maintain_node_health = self.op.maintain_node_health
4177
4178     if self.op.prealloc_wipe_disks is not None:
4179       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4180
4181     if self.op.add_uids is not None:
4182       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4183
4184     if self.op.remove_uids is not None:
4185       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4186
4187     if self.op.uid_pool is not None:
4188       self.cluster.uid_pool = self.op.uid_pool
4189
4190     if self.op.default_iallocator is not None:
4191       self.cluster.default_iallocator = self.op.default_iallocator
4192
4193     if self.op.reserved_lvs is not None:
4194       self.cluster.reserved_lvs = self.op.reserved_lvs
4195
4196     if self.op.use_external_mip_script is not None:
4197       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4198
4199     def helper_os(aname, mods, desc):
4200       desc += " OS list"
4201       lst = getattr(self.cluster, aname)
4202       for key, val in mods:
4203         if key == constants.DDM_ADD:
4204           if val in lst:
4205             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4206           else:
4207             lst.append(val)
4208         elif key == constants.DDM_REMOVE:
4209           if val in lst:
4210             lst.remove(val)
4211           else:
4212             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4213         else:
4214           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4215
4216     if self.op.hidden_os:
4217       helper_os("hidden_os", self.op.hidden_os, "hidden")
4218
4219     if self.op.blacklisted_os:
4220       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4221
4222     if self.op.master_netdev:
4223       master_params = self.cfg.GetMasterNetworkParameters()
4224       ems = self.cfg.GetUseExternalMipScript()
4225       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4226                   self.cluster.master_netdev)
4227       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4228                                                        master_params, ems)
4229       result.Raise("Could not disable the master ip")
4230       feedback_fn("Changing master_netdev from %s to %s" %
4231                   (master_params.netdev, self.op.master_netdev))
4232       self.cluster.master_netdev = self.op.master_netdev
4233
4234     if self.op.master_netmask:
4235       master_params = self.cfg.GetMasterNetworkParameters()
4236       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4237       result = self.rpc.call_node_change_master_netmask(master_params.name,
4238                                                         master_params.netmask,
4239                                                         self.op.master_netmask,
4240                                                         master_params.ip,
4241                                                         master_params.netdev)
4242       if result.fail_msg:
4243         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4244         feedback_fn(msg)
4245
4246       self.cluster.master_netmask = self.op.master_netmask
4247
4248     self.cfg.Update(self.cluster, feedback_fn)
4249
4250     if self.op.master_netdev:
4251       master_params = self.cfg.GetMasterNetworkParameters()
4252       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4253                   self.op.master_netdev)
4254       ems = self.cfg.GetUseExternalMipScript()
4255       result = self.rpc.call_node_activate_master_ip(master_params.name,
4256                                                      master_params, ems)
4257       if result.fail_msg:
4258         self.LogWarning("Could not re-enable the master ip on"
4259                         " the master, please restart manually: %s",
4260                         result.fail_msg)
4261
4262
4263 def _UploadHelper(lu, nodes, fname):
4264   """Helper for uploading a file and showing warnings.
4265
4266   """
4267   if os.path.exists(fname):
4268     result = lu.rpc.call_upload_file(nodes, fname)
4269     for to_node, to_result in result.items():
4270       msg = to_result.fail_msg
4271       if msg:
4272         msg = ("Copy of file %s to node %s failed: %s" %
4273                (fname, to_node, msg))
4274         lu.proc.LogWarning(msg)
4275
4276
4277 def _ComputeAncillaryFiles(cluster, redist):
4278   """Compute files external to Ganeti which need to be consistent.
4279
4280   @type redist: boolean
4281   @param redist: Whether to include files which need to be redistributed
4282
4283   """
4284   # Compute files for all nodes
4285   files_all = set([
4286     pathutils.SSH_KNOWN_HOSTS_FILE,
4287     pathutils.CONFD_HMAC_KEY,
4288     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4289     pathutils.SPICE_CERT_FILE,
4290     pathutils.SPICE_CACERT_FILE,
4291     pathutils.RAPI_USERS_FILE,
4292     ])
4293
4294   if not redist:
4295     files_all.update(pathutils.ALL_CERT_FILES)
4296     files_all.update(ssconf.SimpleStore().GetFileList())
4297   else:
4298     # we need to ship at least the RAPI certificate
4299     files_all.add(pathutils.RAPI_CERT_FILE)
4300
4301   if cluster.modify_etc_hosts:
4302     files_all.add(constants.ETC_HOSTS)
4303
4304   if cluster.use_external_mip_script:
4305     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4306
4307   # Files which are optional, these must:
4308   # - be present in one other category as well
4309   # - either exist or not exist on all nodes of that category (mc, vm all)
4310   files_opt = set([
4311     pathutils.RAPI_USERS_FILE,
4312     ])
4313
4314   # Files which should only be on master candidates
4315   files_mc = set()
4316
4317   if not redist:
4318     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4319
4320   # Files which should only be on VM-capable nodes
4321   files_vm = set(
4322     filename
4323     for hv_name in cluster.enabled_hypervisors
4324     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4325
4326   files_opt |= set(
4327     filename
4328     for hv_name in cluster.enabled_hypervisors
4329     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4330
4331   # Filenames in each category must be unique
4332   all_files_set = files_all | files_mc | files_vm
4333   assert (len(all_files_set) ==
4334           sum(map(len, [files_all, files_mc, files_vm]))), \
4335          "Found file listed in more than one file list"
4336
4337   # Optional files must be present in one other category
4338   assert all_files_set.issuperset(files_opt), \
4339          "Optional file not in a different required list"
4340
4341   return (files_all, files_opt, files_mc, files_vm)
4342
4343
4344 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4345   """Distribute additional files which are part of the cluster configuration.
4346
4347   ConfigWriter takes care of distributing the config and ssconf files, but
4348   there are more files which should be distributed to all nodes. This function
4349   makes sure those are copied.
4350
4351   @param lu: calling logical unit
4352   @param additional_nodes: list of nodes not in the config to distribute to
4353   @type additional_vm: boolean
4354   @param additional_vm: whether the additional nodes are vm-capable or not
4355
4356   """
4357   # Gather target nodes
4358   cluster = lu.cfg.GetClusterInfo()
4359   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4360
4361   online_nodes = lu.cfg.GetOnlineNodeList()
4362   online_set = frozenset(online_nodes)
4363   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4364
4365   if additional_nodes is not None:
4366     online_nodes.extend(additional_nodes)
4367     if additional_vm:
4368       vm_nodes.extend(additional_nodes)
4369
4370   # Never distribute to master node
4371   for nodelist in [online_nodes, vm_nodes]:
4372     if master_info.name in nodelist:
4373       nodelist.remove(master_info.name)
4374
4375   # Gather file lists
4376   (files_all, _, files_mc, files_vm) = \
4377     _ComputeAncillaryFiles(cluster, True)
4378
4379   # Never re-distribute configuration file from here
4380   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4381               pathutils.CLUSTER_CONF_FILE in files_vm)
4382   assert not files_mc, "Master candidates not handled in this function"
4383
4384   filemap = [
4385     (online_nodes, files_all),
4386     (vm_nodes, files_vm),
4387     ]
4388
4389   # Upload the files
4390   for (node_list, files) in filemap:
4391     for fname in files:
4392       _UploadHelper(lu, node_list, fname)
4393
4394
4395 class LUClusterRedistConf(NoHooksLU):
4396   """Force the redistribution of cluster configuration.
4397
4398   This is a very simple LU.
4399
4400   """
4401   REQ_BGL = False
4402
4403   def ExpandNames(self):
4404     self.needed_locks = {
4405       locking.LEVEL_NODE: locking.ALL_SET,
4406     }
4407     self.share_locks[locking.LEVEL_NODE] = 1
4408
4409   def Exec(self, feedback_fn):
4410     """Redistribute the configuration.
4411
4412     """
4413     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4414     _RedistributeAncillaryFiles(self)
4415
4416
4417 class LUClusterActivateMasterIp(NoHooksLU):
4418   """Activate the master IP on the master node.
4419
4420   """
4421   def Exec(self, feedback_fn):
4422     """Activate the master IP.
4423
4424     """
4425     master_params = self.cfg.GetMasterNetworkParameters()
4426     ems = self.cfg.GetUseExternalMipScript()
4427     result = self.rpc.call_node_activate_master_ip(master_params.name,
4428                                                    master_params, ems)
4429     result.Raise("Could not activate the master IP")
4430
4431
4432 class LUClusterDeactivateMasterIp(NoHooksLU):
4433   """Deactivate the master IP on the master node.
4434
4435   """
4436   def Exec(self, feedback_fn):
4437     """Deactivate the master IP.
4438
4439     """
4440     master_params = self.cfg.GetMasterNetworkParameters()
4441     ems = self.cfg.GetUseExternalMipScript()
4442     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4443                                                      master_params, ems)
4444     result.Raise("Could not deactivate the master IP")
4445
4446
4447 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4448   """Sleep and poll for an instance's disk to sync.
4449
4450   """
4451   if not instance.disks or disks is not None and not disks:
4452     return True
4453
4454   disks = _ExpandCheckDisks(instance, disks)
4455
4456   if not oneshot:
4457     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4458
4459   node = instance.primary_node
4460
4461   for dev in disks:
4462     lu.cfg.SetDiskID(dev, node)
4463
4464   # TODO: Convert to utils.Retry
4465
4466   retries = 0
4467   degr_retries = 10 # in seconds, as we sleep 1 second each time
4468   while True:
4469     max_time = 0
4470     done = True
4471     cumul_degraded = False
4472     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4473     msg = rstats.fail_msg
4474     if msg:
4475       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4476       retries += 1
4477       if retries >= 10:
4478         raise errors.RemoteError("Can't contact node %s for mirror data,"
4479                                  " aborting." % node)
4480       time.sleep(6)
4481       continue
4482     rstats = rstats.payload
4483     retries = 0
4484     for i, mstat in enumerate(rstats):
4485       if mstat is None:
4486         lu.LogWarning("Can't compute data for node %s/%s",
4487                            node, disks[i].iv_name)
4488         continue
4489
4490       cumul_degraded = (cumul_degraded or
4491                         (mstat.is_degraded and mstat.sync_percent is None))
4492       if mstat.sync_percent is not None:
4493         done = False
4494         if mstat.estimated_time is not None:
4495           rem_time = ("%s remaining (estimated)" %
4496                       utils.FormatSeconds(mstat.estimated_time))
4497           max_time = mstat.estimated_time
4498         else:
4499           rem_time = "no time estimate"
4500         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4501                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4502
4503     # if we're done but degraded, let's do a few small retries, to
4504     # make sure we see a stable and not transient situation; therefore
4505     # we force restart of the loop
4506     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4507       logging.info("Degraded disks found, %d retries left", degr_retries)
4508       degr_retries -= 1
4509       time.sleep(1)
4510       continue
4511
4512     if done or oneshot:
4513       break
4514
4515     time.sleep(min(60, max_time))
4516
4517   if done:
4518     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4519   return not cumul_degraded
4520
4521
4522 def _BlockdevFind(lu, node, dev, instance):
4523   """Wrapper around call_blockdev_find to annotate diskparams.
4524
4525   @param lu: A reference to the lu object
4526   @param node: The node to call out
4527   @param dev: The device to find
4528   @param instance: The instance object the device belongs to
4529   @returns The result of the rpc call
4530
4531   """
4532   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4533   return lu.rpc.call_blockdev_find(node, disk)
4534
4535
4536 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4537   """Wrapper around L{_CheckDiskConsistencyInner}.
4538
4539   """
4540   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4541   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4542                                     ldisk=ldisk)
4543
4544
4545 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4546                                ldisk=False):
4547   """Check that mirrors are not degraded.
4548
4549   @attention: The device has to be annotated already.
4550
4551   The ldisk parameter, if True, will change the test from the
4552   is_degraded attribute (which represents overall non-ok status for
4553   the device(s)) to the ldisk (representing the local storage status).
4554
4555   """
4556   lu.cfg.SetDiskID(dev, node)
4557
4558   result = True
4559
4560   if on_primary or dev.AssembleOnSecondary():
4561     rstats = lu.rpc.call_blockdev_find(node, dev)
4562     msg = rstats.fail_msg
4563     if msg:
4564       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4565       result = False
4566     elif not rstats.payload:
4567       lu.LogWarning("Can't find disk on node %s", node)
4568       result = False
4569     else:
4570       if ldisk:
4571         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4572       else:
4573         result = result and not rstats.payload.is_degraded
4574
4575   if dev.children:
4576     for child in dev.children:
4577       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4578                                                      on_primary)
4579
4580   return result
4581
4582
4583 class LUOobCommand(NoHooksLU):
4584   """Logical unit for OOB handling.
4585
4586   """
4587   REQ_BGL = False
4588   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4589
4590   def ExpandNames(self):
4591     """Gather locks we need.
4592
4593     """
4594     if self.op.node_names:
4595       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4596       lock_names = self.op.node_names
4597     else:
4598       lock_names = locking.ALL_SET
4599
4600     self.needed_locks = {
4601       locking.LEVEL_NODE: lock_names,
4602       }
4603
4604   def CheckPrereq(self):
4605     """Check prerequisites.
4606
4607     This checks:
4608      - the node exists in the configuration
4609      - OOB is supported
4610
4611     Any errors are signaled by raising errors.OpPrereqError.
4612
4613     """
4614     self.nodes = []
4615     self.master_node = self.cfg.GetMasterNode()
4616
4617     assert self.op.power_delay >= 0.0
4618
4619     if self.op.node_names:
4620       if (self.op.command in self._SKIP_MASTER and
4621           self.master_node in self.op.node_names):
4622         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4623         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4624
4625         if master_oob_handler:
4626           additional_text = ("run '%s %s %s' if you want to operate on the"
4627                              " master regardless") % (master_oob_handler,
4628                                                       self.op.command,
4629                                                       self.master_node)
4630         else:
4631           additional_text = "it does not support out-of-band operations"
4632
4633         raise errors.OpPrereqError(("Operating on the master node %s is not"
4634                                     " allowed for %s; %s") %
4635                                    (self.master_node, self.op.command,
4636                                     additional_text), errors.ECODE_INVAL)
4637     else:
4638       self.op.node_names = self.cfg.GetNodeList()
4639       if self.op.command in self._SKIP_MASTER:
4640         self.op.node_names.remove(self.master_node)
4641
4642     if self.op.command in self._SKIP_MASTER:
4643       assert self.master_node not in self.op.node_names
4644
4645     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4646       if node is None:
4647         raise errors.OpPrereqError("Node %s not found" % node_name,
4648                                    errors.ECODE_NOENT)
4649       else:
4650         self.nodes.append(node)
4651
4652       if (not self.op.ignore_status and
4653           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4654         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4655                                     " not marked offline") % node_name,
4656                                    errors.ECODE_STATE)
4657
4658   def Exec(self, feedback_fn):
4659     """Execute OOB and return result if we expect any.
4660
4661     """
4662     master_node = self.master_node
4663     ret = []
4664
4665     for idx, node in enumerate(utils.NiceSort(self.nodes,
4666                                               key=lambda node: node.name)):
4667       node_entry = [(constants.RS_NORMAL, node.name)]
4668       ret.append(node_entry)
4669
4670       oob_program = _SupportsOob(self.cfg, node)
4671
4672       if not oob_program:
4673         node_entry.append((constants.RS_UNAVAIL, None))
4674         continue
4675
4676       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4677                    self.op.command, oob_program, node.name)
4678       result = self.rpc.call_run_oob(master_node, oob_program,
4679                                      self.op.command, node.name,
4680                                      self.op.timeout)
4681
4682       if result.fail_msg:
4683         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4684                         node.name, result.fail_msg)
4685         node_entry.append((constants.RS_NODATA, None))
4686       else:
4687         try:
4688           self._CheckPayload(result)
4689         except errors.OpExecError, err:
4690           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4691                           node.name, err)
4692           node_entry.append((constants.RS_NODATA, None))
4693         else:
4694           if self.op.command == constants.OOB_HEALTH:
4695             # For health we should log important events
4696             for item, status in result.payload:
4697               if status in [constants.OOB_STATUS_WARNING,
4698                             constants.OOB_STATUS_CRITICAL]:
4699                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4700                                 item, node.name, status)
4701
4702           if self.op.command == constants.OOB_POWER_ON:
4703             node.powered = True
4704           elif self.op.command == constants.OOB_POWER_OFF:
4705             node.powered = False
4706           elif self.op.command == constants.OOB_POWER_STATUS:
4707             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4708             if powered != node.powered:
4709               logging.warning(("Recorded power state (%s) of node '%s' does not"
4710                                " match actual power state (%s)"), node.powered,
4711                               node.name, powered)
4712
4713           # For configuration changing commands we should update the node
4714           if self.op.command in (constants.OOB_POWER_ON,
4715                                  constants.OOB_POWER_OFF):
4716             self.cfg.Update(node, feedback_fn)
4717
4718           node_entry.append((constants.RS_NORMAL, result.payload))
4719
4720           if (self.op.command == constants.OOB_POWER_ON and
4721               idx < len(self.nodes) - 1):
4722             time.sleep(self.op.power_delay)
4723
4724     return ret
4725
4726   def _CheckPayload(self, result):
4727     """Checks if the payload is valid.
4728
4729     @param result: RPC result
4730     @raises errors.OpExecError: If payload is not valid
4731
4732     """
4733     errs = []
4734     if self.op.command == constants.OOB_HEALTH:
4735       if not isinstance(result.payload, list):
4736         errs.append("command 'health' is expected to return a list but got %s" %
4737                     type(result.payload))
4738       else:
4739         for item, status in result.payload:
4740           if status not in constants.OOB_STATUSES:
4741             errs.append("health item '%s' has invalid status '%s'" %
4742                         (item, status))
4743
4744     if self.op.command == constants.OOB_POWER_STATUS:
4745       if not isinstance(result.payload, dict):
4746         errs.append("power-status is expected to return a dict but got %s" %
4747                     type(result.payload))
4748
4749     if self.op.command in [
4750       constants.OOB_POWER_ON,
4751       constants.OOB_POWER_OFF,
4752       constants.OOB_POWER_CYCLE,
4753       ]:
4754       if result.payload is not None:
4755         errs.append("%s is expected to not return payload but got '%s'" %
4756                     (self.op.command, result.payload))
4757
4758     if errs:
4759       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4760                                utils.CommaJoin(errs))
4761
4762
4763 class _OsQuery(_QueryBase):
4764   FIELDS = query.OS_FIELDS
4765
4766   def ExpandNames(self, lu):
4767     # Lock all nodes in shared mode
4768     # Temporary removal of locks, should be reverted later
4769     # TODO: reintroduce locks when they are lighter-weight
4770     lu.needed_locks = {}
4771     #self.share_locks[locking.LEVEL_NODE] = 1
4772     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4773
4774     # The following variables interact with _QueryBase._GetNames
4775     if self.names:
4776       self.wanted = self.names
4777     else:
4778       self.wanted = locking.ALL_SET
4779
4780     self.do_locking = self.use_locking
4781
4782   def DeclareLocks(self, lu, level):
4783     pass
4784
4785   @staticmethod
4786   def _DiagnoseByOS(rlist):
4787     """Remaps a per-node return list into an a per-os per-node dictionary
4788
4789     @param rlist: a map with node names as keys and OS objects as values
4790
4791     @rtype: dict
4792     @return: a dictionary with osnames as keys and as value another
4793         map, with nodes as keys and tuples of (path, status, diagnose,
4794         variants, parameters, api_versions) as values, eg::
4795
4796           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4797                                      (/srv/..., False, "invalid api")],
4798                            "node2": [(/srv/..., True, "", [], [])]}
4799           }
4800
4801     """
4802     all_os = {}
4803     # we build here the list of nodes that didn't fail the RPC (at RPC
4804     # level), so that nodes with a non-responding node daemon don't
4805     # make all OSes invalid
4806     good_nodes = [node_name for node_name in rlist
4807                   if not rlist[node_name].fail_msg]
4808     for node_name, nr in rlist.items():
4809       if nr.fail_msg or not nr.payload:
4810         continue
4811       for (name, path, status, diagnose, variants,
4812            params, api_versions) in nr.payload:
4813         if name not in all_os:
4814           # build a list of nodes for this os containing empty lists
4815           # for each node in node_list
4816           all_os[name] = {}
4817           for nname in good_nodes:
4818             all_os[name][nname] = []
4819         # convert params from [name, help] to (name, help)
4820         params = [tuple(v) for v in params]
4821         all_os[name][node_name].append((path, status, diagnose,
4822                                         variants, params, api_versions))
4823     return all_os
4824
4825   def _GetQueryData(self, lu):
4826     """Computes the list of nodes and their attributes.
4827
4828     """
4829     # Locking is not used
4830     assert not (compat.any(lu.glm.is_owned(level)
4831                            for level in locking.LEVELS
4832                            if level != locking.LEVEL_CLUSTER) or
4833                 self.do_locking or self.use_locking)
4834
4835     valid_nodes = [node.name
4836                    for node in lu.cfg.GetAllNodesInfo().values()
4837                    if not node.offline and node.vm_capable]
4838     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4839     cluster = lu.cfg.GetClusterInfo()
4840
4841     data = {}
4842
4843     for (os_name, os_data) in pol.items():
4844       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4845                           hidden=(os_name in cluster.hidden_os),
4846                           blacklisted=(os_name in cluster.blacklisted_os))
4847
4848       variants = set()
4849       parameters = set()
4850       api_versions = set()
4851
4852       for idx, osl in enumerate(os_data.values()):
4853         info.valid = bool(info.valid and osl and osl[0][1])
4854         if not info.valid:
4855           break
4856
4857         (node_variants, node_params, node_api) = osl[0][3:6]
4858         if idx == 0:
4859           # First entry
4860           variants.update(node_variants)
4861           parameters.update(node_params)
4862           api_versions.update(node_api)
4863         else:
4864           # Filter out inconsistent values
4865           variants.intersection_update(node_variants)
4866           parameters.intersection_update(node_params)
4867           api_versions.intersection_update(node_api)
4868
4869       info.variants = list(variants)
4870       info.parameters = list(parameters)
4871       info.api_versions = list(api_versions)
4872
4873       data[os_name] = info
4874
4875     # Prepare data in requested order
4876     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4877             if name in data]
4878
4879
4880 class LUOsDiagnose(NoHooksLU):
4881   """Logical unit for OS diagnose/query.
4882
4883   """
4884   REQ_BGL = False
4885
4886   @staticmethod
4887   def _BuildFilter(fields, names):
4888     """Builds a filter for querying OSes.
4889
4890     """
4891     name_filter = qlang.MakeSimpleFilter("name", names)
4892
4893     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4894     # respective field is not requested
4895     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4896                      for fname in ["hidden", "blacklisted"]
4897                      if fname not in fields]
4898     if "valid" not in fields:
4899       status_filter.append([qlang.OP_TRUE, "valid"])
4900
4901     if status_filter:
4902       status_filter.insert(0, qlang.OP_AND)
4903     else:
4904       status_filter = None
4905
4906     if name_filter and status_filter:
4907       return [qlang.OP_AND, name_filter, status_filter]
4908     elif name_filter:
4909       return name_filter
4910     else:
4911       return status_filter
4912
4913   def CheckArguments(self):
4914     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4915                        self.op.output_fields, False)
4916
4917   def ExpandNames(self):
4918     self.oq.ExpandNames(self)
4919
4920   def Exec(self, feedback_fn):
4921     return self.oq.OldStyleQuery(self)
4922
4923
4924 class LUNodeRemove(LogicalUnit):
4925   """Logical unit for removing a node.
4926
4927   """
4928   HPATH = "node-remove"
4929   HTYPE = constants.HTYPE_NODE
4930
4931   def BuildHooksEnv(self):
4932     """Build hooks env.
4933
4934     """
4935     return {
4936       "OP_TARGET": self.op.node_name,
4937       "NODE_NAME": self.op.node_name,
4938       }
4939
4940   def BuildHooksNodes(self):
4941     """Build hooks nodes.
4942
4943     This doesn't run on the target node in the pre phase as a failed
4944     node would then be impossible to remove.
4945
4946     """
4947     all_nodes = self.cfg.GetNodeList()
4948     try:
4949       all_nodes.remove(self.op.node_name)
4950     except ValueError:
4951       pass
4952     return (all_nodes, all_nodes)
4953
4954   def CheckPrereq(self):
4955     """Check prerequisites.
4956
4957     This checks:
4958      - the node exists in the configuration
4959      - it does not have primary or secondary instances
4960      - it's not the master
4961
4962     Any errors are signaled by raising errors.OpPrereqError.
4963
4964     """
4965     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4966     node = self.cfg.GetNodeInfo(self.op.node_name)
4967     assert node is not None
4968
4969     masternode = self.cfg.GetMasterNode()
4970     if node.name == masternode:
4971       raise errors.OpPrereqError("Node is the master node, failover to another"
4972                                  " node is required", errors.ECODE_INVAL)
4973
4974     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4975       if node.name in instance.all_nodes:
4976         raise errors.OpPrereqError("Instance %s is still running on the node,"
4977                                    " please remove first" % instance_name,
4978                                    errors.ECODE_INVAL)
4979     self.op.node_name = node.name
4980     self.node = node
4981
4982   def Exec(self, feedback_fn):
4983     """Removes the node from the cluster.
4984
4985     """
4986     node = self.node
4987     logging.info("Stopping the node daemon and removing configs from node %s",
4988                  node.name)
4989
4990     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4991
4992     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4993       "Not owning BGL"
4994
4995     # Promote nodes to master candidate as needed
4996     _AdjustCandidatePool(self, exceptions=[node.name])
4997     self.context.RemoveNode(node.name)
4998
4999     # Run post hooks on the node before it's removed
5000     _RunPostHook(self, node.name)
5001
5002     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5003     msg = result.fail_msg
5004     if msg:
5005       self.LogWarning("Errors encountered on the remote node while leaving"
5006                       " the cluster: %s", msg)
5007
5008     # Remove node from our /etc/hosts
5009     if self.cfg.GetClusterInfo().modify_etc_hosts:
5010       master_node = self.cfg.GetMasterNode()
5011       result = self.rpc.call_etc_hosts_modify(master_node,
5012                                               constants.ETC_HOSTS_REMOVE,
5013                                               node.name, None)
5014       result.Raise("Can't update hosts file with new host data")
5015       _RedistributeAncillaryFiles(self)
5016
5017
5018 class _NodeQuery(_QueryBase):
5019   FIELDS = query.NODE_FIELDS
5020
5021   def ExpandNames(self, lu):
5022     lu.needed_locks = {}
5023     lu.share_locks = _ShareAll()
5024
5025     if self.names:
5026       self.wanted = _GetWantedNodes(lu, self.names)
5027     else:
5028       self.wanted = locking.ALL_SET
5029
5030     self.do_locking = (self.use_locking and
5031                        query.NQ_LIVE in self.requested_data)
5032
5033     if self.do_locking:
5034       # If any non-static field is requested we need to lock the nodes
5035       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5036
5037   def DeclareLocks(self, lu, level):
5038     pass
5039
5040   def _GetQueryData(self, lu):
5041     """Computes the list of nodes and their attributes.
5042
5043     """
5044     all_info = lu.cfg.GetAllNodesInfo()
5045
5046     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5047
5048     # Gather data as requested
5049     if query.NQ_LIVE in self.requested_data:
5050       # filter out non-vm_capable nodes
5051       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5052
5053       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5054                                         [lu.cfg.GetHypervisorType()])
5055       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5056                        for (name, nresult) in node_data.items()
5057                        if not nresult.fail_msg and nresult.payload)
5058     else:
5059       live_data = None
5060
5061     if query.NQ_INST in self.requested_data:
5062       node_to_primary = dict([(name, set()) for name in nodenames])
5063       node_to_secondary = dict([(name, set()) for name in nodenames])
5064
5065       inst_data = lu.cfg.GetAllInstancesInfo()
5066
5067       for inst in inst_data.values():
5068         if inst.primary_node in node_to_primary:
5069           node_to_primary[inst.primary_node].add(inst.name)
5070         for secnode in inst.secondary_nodes:
5071           if secnode in node_to_secondary:
5072             node_to_secondary[secnode].add(inst.name)
5073     else:
5074       node_to_primary = None
5075       node_to_secondary = None
5076
5077     if query.NQ_OOB in self.requested_data:
5078       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5079                          for name, node in all_info.iteritems())
5080     else:
5081       oob_support = None
5082
5083     if query.NQ_GROUP in self.requested_data:
5084       groups = lu.cfg.GetAllNodeGroupsInfo()
5085     else:
5086       groups = {}
5087
5088     return query.NodeQueryData([all_info[name] for name in nodenames],
5089                                live_data, lu.cfg.GetMasterNode(),
5090                                node_to_primary, node_to_secondary, groups,
5091                                oob_support, lu.cfg.GetClusterInfo())
5092
5093
5094 class LUNodeQuery(NoHooksLU):
5095   """Logical unit for querying nodes.
5096
5097   """
5098   # pylint: disable=W0142
5099   REQ_BGL = False
5100
5101   def CheckArguments(self):
5102     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5103                          self.op.output_fields, self.op.use_locking)
5104
5105   def ExpandNames(self):
5106     self.nq.ExpandNames(self)
5107
5108   def DeclareLocks(self, level):
5109     self.nq.DeclareLocks(self, level)
5110
5111   def Exec(self, feedback_fn):
5112     return self.nq.OldStyleQuery(self)
5113
5114
5115 class LUNodeQueryvols(NoHooksLU):
5116   """Logical unit for getting volumes on node(s).
5117
5118   """
5119   REQ_BGL = False
5120   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5121   _FIELDS_STATIC = utils.FieldSet("node")
5122
5123   def CheckArguments(self):
5124     _CheckOutputFields(static=self._FIELDS_STATIC,
5125                        dynamic=self._FIELDS_DYNAMIC,
5126                        selected=self.op.output_fields)
5127
5128   def ExpandNames(self):
5129     self.share_locks = _ShareAll()
5130     self.needed_locks = {}
5131
5132     if not self.op.nodes:
5133       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5134     else:
5135       self.needed_locks[locking.LEVEL_NODE] = \
5136         _GetWantedNodes(self, self.op.nodes)
5137
5138   def Exec(self, feedback_fn):
5139     """Computes the list of nodes and their attributes.
5140
5141     """
5142     nodenames = self.owned_locks(locking.LEVEL_NODE)
5143     volumes = self.rpc.call_node_volumes(nodenames)
5144
5145     ilist = self.cfg.GetAllInstancesInfo()
5146     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5147
5148     output = []
5149     for node in nodenames:
5150       nresult = volumes[node]
5151       if nresult.offline:
5152         continue
5153       msg = nresult.fail_msg
5154       if msg:
5155         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5156         continue
5157
5158       node_vols = sorted(nresult.payload,
5159                          key=operator.itemgetter("dev"))
5160
5161       for vol in node_vols:
5162         node_output = []
5163         for field in self.op.output_fields:
5164           if field == "node":
5165             val = node
5166           elif field == "phys":
5167             val = vol["dev"]
5168           elif field == "vg":
5169             val = vol["vg"]
5170           elif field == "name":
5171             val = vol["name"]
5172           elif field == "size":
5173             val = int(float(vol["size"]))
5174           elif field == "instance":
5175             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5176           else:
5177             raise errors.ParameterError(field)
5178           node_output.append(str(val))
5179
5180         output.append(node_output)
5181
5182     return output
5183
5184
5185 class LUNodeQueryStorage(NoHooksLU):
5186   """Logical unit for getting information on storage units on node(s).
5187
5188   """
5189   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5190   REQ_BGL = False
5191
5192   def CheckArguments(self):
5193     _CheckOutputFields(static=self._FIELDS_STATIC,
5194                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5195                        selected=self.op.output_fields)
5196
5197   def ExpandNames(self):
5198     self.share_locks = _ShareAll()
5199     self.needed_locks = {}
5200
5201     if self.op.nodes:
5202       self.needed_locks[locking.LEVEL_NODE] = \
5203         _GetWantedNodes(self, self.op.nodes)
5204     else:
5205       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5206
5207   def Exec(self, feedback_fn):
5208     """Computes the list of nodes and their attributes.
5209
5210     """
5211     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5212
5213     # Always get name to sort by
5214     if constants.SF_NAME in self.op.output_fields:
5215       fields = self.op.output_fields[:]
5216     else:
5217       fields = [constants.SF_NAME] + self.op.output_fields
5218
5219     # Never ask for node or type as it's only known to the LU
5220     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5221       while extra in fields:
5222         fields.remove(extra)
5223
5224     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5225     name_idx = field_idx[constants.SF_NAME]
5226
5227     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5228     data = self.rpc.call_storage_list(self.nodes,
5229                                       self.op.storage_type, st_args,
5230                                       self.op.name, fields)
5231
5232     result = []
5233
5234     for node in utils.NiceSort(self.nodes):
5235       nresult = data[node]
5236       if nresult.offline:
5237         continue
5238
5239       msg = nresult.fail_msg
5240       if msg:
5241         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5242         continue
5243
5244       rows = dict([(row[name_idx], row) for row in nresult.payload])
5245
5246       for name in utils.NiceSort(rows.keys()):
5247         row = rows[name]
5248
5249         out = []
5250
5251         for field in self.op.output_fields:
5252           if field == constants.SF_NODE:
5253             val = node
5254           elif field == constants.SF_TYPE:
5255             val = self.op.storage_type
5256           elif field in field_idx:
5257             val = row[field_idx[field]]
5258           else:
5259             raise errors.ParameterError(field)
5260
5261           out.append(val)
5262
5263         result.append(out)
5264
5265     return result
5266
5267
5268 class _InstanceQuery(_QueryBase):
5269   FIELDS = query.INSTANCE_FIELDS
5270
5271   def ExpandNames(self, lu):
5272     lu.needed_locks = {}
5273     lu.share_locks = _ShareAll()
5274
5275     if self.names:
5276       self.wanted = _GetWantedInstances(lu, self.names)
5277     else:
5278       self.wanted = locking.ALL_SET
5279
5280     self.do_locking = (self.use_locking and
5281                        query.IQ_LIVE in self.requested_data)
5282     if self.do_locking:
5283       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5284       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5285       lu.needed_locks[locking.LEVEL_NODE] = []
5286       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5287
5288     self.do_grouplocks = (self.do_locking and
5289                           query.IQ_NODES in self.requested_data)
5290
5291   def DeclareLocks(self, lu, level):
5292     if self.do_locking:
5293       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5294         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5295
5296         # Lock all groups used by instances optimistically; this requires going
5297         # via the node before it's locked, requiring verification later on
5298         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5299           set(group_uuid
5300               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5301               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5302       elif level == locking.LEVEL_NODE:
5303         lu._LockInstancesNodes() # pylint: disable=W0212
5304
5305   @staticmethod
5306   def _CheckGroupLocks(lu):
5307     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5308     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5309
5310     # Check if node groups for locked instances are still correct
5311     for instance_name in owned_instances:
5312       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5313
5314   def _GetQueryData(self, lu):
5315     """Computes the list of instances and their attributes.
5316
5317     """
5318     if self.do_grouplocks:
5319       self._CheckGroupLocks(lu)
5320
5321     cluster = lu.cfg.GetClusterInfo()
5322     all_info = lu.cfg.GetAllInstancesInfo()
5323
5324     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5325
5326     instance_list = [all_info[name] for name in instance_names]
5327     nodes = frozenset(itertools.chain(*(inst.all_nodes
5328                                         for inst in instance_list)))
5329     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5330     bad_nodes = []
5331     offline_nodes = []
5332     wrongnode_inst = set()
5333
5334     # Gather data as requested
5335     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5336       live_data = {}
5337       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5338       for name in nodes:
5339         result = node_data[name]
5340         if result.offline:
5341           # offline nodes will be in both lists
5342           assert result.fail_msg
5343           offline_nodes.append(name)
5344         if result.fail_msg:
5345           bad_nodes.append(name)
5346         elif result.payload:
5347           for inst in result.payload:
5348             if inst in all_info:
5349               if all_info[inst].primary_node == name:
5350                 live_data.update(result.payload)
5351               else:
5352                 wrongnode_inst.add(inst)
5353             else:
5354               # orphan instance; we don't list it here as we don't
5355               # handle this case yet in the output of instance listing
5356               logging.warning("Orphan instance '%s' found on node %s",
5357                               inst, name)
5358         # else no instance is alive
5359     else:
5360       live_data = {}
5361
5362     if query.IQ_DISKUSAGE in self.requested_data:
5363       gmi = ganeti.masterd.instance
5364       disk_usage = dict((inst.name,
5365                          gmi.ComputeDiskSize(inst.disk_template,
5366                                              [{constants.IDISK_SIZE: disk.size}
5367                                               for disk in inst.disks]))
5368                         for inst in instance_list)
5369     else:
5370       disk_usage = None
5371
5372     if query.IQ_CONSOLE in self.requested_data:
5373       consinfo = {}
5374       for inst in instance_list:
5375         if inst.name in live_data:
5376           # Instance is running
5377           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5378         else:
5379           consinfo[inst.name] = None
5380       assert set(consinfo.keys()) == set(instance_names)
5381     else:
5382       consinfo = None
5383
5384     if query.IQ_NODES in self.requested_data:
5385       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5386                                             instance_list)))
5387       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5388       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5389                     for uuid in set(map(operator.attrgetter("group"),
5390                                         nodes.values())))
5391     else:
5392       nodes = None
5393       groups = None
5394
5395     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5396                                    disk_usage, offline_nodes, bad_nodes,
5397                                    live_data, wrongnode_inst, consinfo,
5398                                    nodes, groups)
5399
5400
5401 class LUQuery(NoHooksLU):
5402   """Query for resources/items of a certain kind.
5403
5404   """
5405   # pylint: disable=W0142
5406   REQ_BGL = False
5407
5408   def CheckArguments(self):
5409     qcls = _GetQueryImplementation(self.op.what)
5410
5411     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5412
5413   def ExpandNames(self):
5414     self.impl.ExpandNames(self)
5415
5416   def DeclareLocks(self, level):
5417     self.impl.DeclareLocks(self, level)
5418
5419   def Exec(self, feedback_fn):
5420     return self.impl.NewStyleQuery(self)
5421
5422
5423 class LUQueryFields(NoHooksLU):
5424   """Query for resources/items of a certain kind.
5425
5426   """
5427   # pylint: disable=W0142
5428   REQ_BGL = False
5429
5430   def CheckArguments(self):
5431     self.qcls = _GetQueryImplementation(self.op.what)
5432
5433   def ExpandNames(self):
5434     self.needed_locks = {}
5435
5436   def Exec(self, feedback_fn):
5437     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5438
5439
5440 class LUNodeModifyStorage(NoHooksLU):
5441   """Logical unit for modifying a storage volume on a node.
5442
5443   """
5444   REQ_BGL = False
5445
5446   def CheckArguments(self):
5447     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5448
5449     storage_type = self.op.storage_type
5450
5451     try:
5452       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5453     except KeyError:
5454       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5455                                  " modified" % storage_type,
5456                                  errors.ECODE_INVAL)
5457
5458     diff = set(self.op.changes.keys()) - modifiable
5459     if diff:
5460       raise errors.OpPrereqError("The following fields can not be modified for"
5461                                  " storage units of type '%s': %r" %
5462                                  (storage_type, list(diff)),
5463                                  errors.ECODE_INVAL)
5464
5465   def ExpandNames(self):
5466     self.needed_locks = {
5467       locking.LEVEL_NODE: self.op.node_name,
5468       }
5469
5470   def Exec(self, feedback_fn):
5471     """Computes the list of nodes and their attributes.
5472
5473     """
5474     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5475     result = self.rpc.call_storage_modify(self.op.node_name,
5476                                           self.op.storage_type, st_args,
5477                                           self.op.name, self.op.changes)
5478     result.Raise("Failed to modify storage unit '%s' on %s" %
5479                  (self.op.name, self.op.node_name))
5480
5481
5482 class LUNodeAdd(LogicalUnit):
5483   """Logical unit for adding node to the cluster.
5484
5485   """
5486   HPATH = "node-add"
5487   HTYPE = constants.HTYPE_NODE
5488   _NFLAGS = ["master_capable", "vm_capable"]
5489
5490   def CheckArguments(self):
5491     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5492     # validate/normalize the node name
5493     self.hostname = netutils.GetHostname(name=self.op.node_name,
5494                                          family=self.primary_ip_family)
5495     self.op.node_name = self.hostname.name
5496
5497     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5498       raise errors.OpPrereqError("Cannot readd the master node",
5499                                  errors.ECODE_STATE)
5500
5501     if self.op.readd and self.op.group:
5502       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5503                                  " being readded", errors.ECODE_INVAL)
5504
5505   def BuildHooksEnv(self):
5506     """Build hooks env.
5507
5508     This will run on all nodes before, and on all nodes + the new node after.
5509
5510     """
5511     return {
5512       "OP_TARGET": self.op.node_name,
5513       "NODE_NAME": self.op.node_name,
5514       "NODE_PIP": self.op.primary_ip,
5515       "NODE_SIP": self.op.secondary_ip,
5516       "MASTER_CAPABLE": str(self.op.master_capable),
5517       "VM_CAPABLE": str(self.op.vm_capable),
5518       }
5519
5520   def BuildHooksNodes(self):
5521     """Build hooks nodes.
5522
5523     """
5524     # Exclude added node
5525     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5526     post_nodes = pre_nodes + [self.op.node_name, ]
5527
5528     return (pre_nodes, post_nodes)
5529
5530   def CheckPrereq(self):
5531     """Check prerequisites.
5532
5533     This checks:
5534      - the new node is not already in the config
5535      - it is resolvable
5536      - its parameters (single/dual homed) matches the cluster
5537
5538     Any errors are signaled by raising errors.OpPrereqError.
5539
5540     """
5541     cfg = self.cfg
5542     hostname = self.hostname
5543     node = hostname.name
5544     primary_ip = self.op.primary_ip = hostname.ip
5545     if self.op.secondary_ip is None:
5546       if self.primary_ip_family == netutils.IP6Address.family:
5547         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5548                                    " IPv4 address must be given as secondary",
5549                                    errors.ECODE_INVAL)
5550       self.op.secondary_ip = primary_ip
5551
5552     secondary_ip = self.op.secondary_ip
5553     if not netutils.IP4Address.IsValid(secondary_ip):
5554       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5555                                  " address" % secondary_ip, errors.ECODE_INVAL)
5556
5557     node_list = cfg.GetNodeList()
5558     if not self.op.readd and node in node_list:
5559       raise errors.OpPrereqError("Node %s is already in the configuration" %
5560                                  node, errors.ECODE_EXISTS)
5561     elif self.op.readd and node not in node_list:
5562       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5563                                  errors.ECODE_NOENT)
5564
5565     self.changed_primary_ip = False
5566
5567     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5568       if self.op.readd and node == existing_node_name:
5569         if existing_node.secondary_ip != secondary_ip:
5570           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5571                                      " address configuration as before",
5572                                      errors.ECODE_INVAL)
5573         if existing_node.primary_ip != primary_ip:
5574           self.changed_primary_ip = True
5575
5576         continue
5577
5578       if (existing_node.primary_ip == primary_ip or
5579           existing_node.secondary_ip == primary_ip or
5580           existing_node.primary_ip == secondary_ip or
5581           existing_node.secondary_ip == secondary_ip):
5582         raise errors.OpPrereqError("New node ip address(es) conflict with"
5583                                    " existing node %s" % existing_node.name,
5584                                    errors.ECODE_NOTUNIQUE)
5585
5586     # After this 'if' block, None is no longer a valid value for the
5587     # _capable op attributes
5588     if self.op.readd:
5589       old_node = self.cfg.GetNodeInfo(node)
5590       assert old_node is not None, "Can't retrieve locked node %s" % node
5591       for attr in self._NFLAGS:
5592         if getattr(self.op, attr) is None:
5593           setattr(self.op, attr, getattr(old_node, attr))
5594     else:
5595       for attr in self._NFLAGS:
5596         if getattr(self.op, attr) is None:
5597           setattr(self.op, attr, True)
5598
5599     if self.op.readd and not self.op.vm_capable:
5600       pri, sec = cfg.GetNodeInstances(node)
5601       if pri or sec:
5602         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5603                                    " flag set to false, but it already holds"
5604                                    " instances" % node,
5605                                    errors.ECODE_STATE)
5606
5607     # check that the type of the node (single versus dual homed) is the
5608     # same as for the master
5609     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5610     master_singlehomed = myself.secondary_ip == myself.primary_ip
5611     newbie_singlehomed = secondary_ip == primary_ip
5612     if master_singlehomed != newbie_singlehomed:
5613       if master_singlehomed:
5614         raise errors.OpPrereqError("The master has no secondary ip but the"
5615                                    " new node has one",
5616                                    errors.ECODE_INVAL)
5617       else:
5618         raise errors.OpPrereqError("The master has a secondary ip but the"
5619                                    " new node doesn't have one",
5620                                    errors.ECODE_INVAL)
5621
5622     # checks reachability
5623     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5624       raise errors.OpPrereqError("Node not reachable by ping",
5625                                  errors.ECODE_ENVIRON)
5626
5627     if not newbie_singlehomed:
5628       # check reachability from my secondary ip to newbie's secondary ip
5629       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5630                               source=myself.secondary_ip):
5631         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5632                                    " based ping to node daemon port",
5633                                    errors.ECODE_ENVIRON)
5634
5635     if self.op.readd:
5636       exceptions = [node]
5637     else:
5638       exceptions = []
5639
5640     if self.op.master_capable:
5641       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5642     else:
5643       self.master_candidate = False
5644
5645     if self.op.readd:
5646       self.new_node = old_node
5647     else:
5648       node_group = cfg.LookupNodeGroup(self.op.group)
5649       self.new_node = objects.Node(name=node,
5650                                    primary_ip=primary_ip,
5651                                    secondary_ip=secondary_ip,
5652                                    master_candidate=self.master_candidate,
5653                                    offline=False, drained=False,
5654                                    group=node_group)
5655
5656     if self.op.ndparams:
5657       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5658
5659     if self.op.hv_state:
5660       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5661
5662     if self.op.disk_state:
5663       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5664
5665     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5666     #       it a property on the base class.
5667     result = rpc.DnsOnlyRunner().call_version([node])[node]
5668     result.Raise("Can't get version information from node %s" % node)
5669     if constants.PROTOCOL_VERSION == result.payload:
5670       logging.info("Communication to node %s fine, sw version %s match",
5671                    node, result.payload)
5672     else:
5673       raise errors.OpPrereqError("Version mismatch master version %s,"
5674                                  " node version %s" %
5675                                  (constants.PROTOCOL_VERSION, result.payload),
5676                                  errors.ECODE_ENVIRON)
5677
5678   def Exec(self, feedback_fn):
5679     """Adds the new node to the cluster.
5680
5681     """
5682     new_node = self.new_node
5683     node = new_node.name
5684
5685     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5686       "Not owning BGL"
5687
5688     # We adding a new node so we assume it's powered
5689     new_node.powered = True
5690
5691     # for re-adds, reset the offline/drained/master-candidate flags;
5692     # we need to reset here, otherwise offline would prevent RPC calls
5693     # later in the procedure; this also means that if the re-add
5694     # fails, we are left with a non-offlined, broken node
5695     if self.op.readd:
5696       new_node.drained = new_node.offline = False # pylint: disable=W0201
5697       self.LogInfo("Readding a node, the offline/drained flags were reset")
5698       # if we demote the node, we do cleanup later in the procedure
5699       new_node.master_candidate = self.master_candidate
5700       if self.changed_primary_ip:
5701         new_node.primary_ip = self.op.primary_ip
5702
5703     # copy the master/vm_capable flags
5704     for attr in self._NFLAGS:
5705       setattr(new_node, attr, getattr(self.op, attr))
5706
5707     # notify the user about any possible mc promotion
5708     if new_node.master_candidate:
5709       self.LogInfo("Node will be a master candidate")
5710
5711     if self.op.ndparams:
5712       new_node.ndparams = self.op.ndparams
5713     else:
5714       new_node.ndparams = {}
5715
5716     if self.op.hv_state:
5717       new_node.hv_state_static = self.new_hv_state
5718
5719     if self.op.disk_state:
5720       new_node.disk_state_static = self.new_disk_state
5721
5722     # Add node to our /etc/hosts, and add key to known_hosts
5723     if self.cfg.GetClusterInfo().modify_etc_hosts:
5724       master_node = self.cfg.GetMasterNode()
5725       result = self.rpc.call_etc_hosts_modify(master_node,
5726                                               constants.ETC_HOSTS_ADD,
5727                                               self.hostname.name,
5728                                               self.hostname.ip)
5729       result.Raise("Can't update hosts file with new host data")
5730
5731     if new_node.secondary_ip != new_node.primary_ip:
5732       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5733                                False)
5734
5735     node_verify_list = [self.cfg.GetMasterNode()]
5736     node_verify_param = {
5737       constants.NV_NODELIST: ([node], {}),
5738       # TODO: do a node-net-test as well?
5739     }
5740
5741     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5742                                        self.cfg.GetClusterName())
5743     for verifier in node_verify_list:
5744       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5745       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5746       if nl_payload:
5747         for failed in nl_payload:
5748           feedback_fn("ssh/hostname verification failed"
5749                       " (checking from %s): %s" %
5750                       (verifier, nl_payload[failed]))
5751         raise errors.OpExecError("ssh/hostname verification failed")
5752
5753     if self.op.readd:
5754       _RedistributeAncillaryFiles(self)
5755       self.context.ReaddNode(new_node)
5756       # make sure we redistribute the config
5757       self.cfg.Update(new_node, feedback_fn)
5758       # and make sure the new node will not have old files around
5759       if not new_node.master_candidate:
5760         result = self.rpc.call_node_demote_from_mc(new_node.name)
5761         msg = result.fail_msg
5762         if msg:
5763           self.LogWarning("Node failed to demote itself from master"
5764                           " candidate status: %s" % msg)
5765     else:
5766       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5767                                   additional_vm=self.op.vm_capable)
5768       self.context.AddNode(new_node, self.proc.GetECId())
5769
5770
5771 class LUNodeSetParams(LogicalUnit):
5772   """Modifies the parameters of a node.
5773
5774   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5775       to the node role (as _ROLE_*)
5776   @cvar _R2F: a dictionary from node role to tuples of flags
5777   @cvar _FLAGS: a list of attribute names corresponding to the flags
5778
5779   """
5780   HPATH = "node-modify"
5781   HTYPE = constants.HTYPE_NODE
5782   REQ_BGL = False
5783   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5784   _F2R = {
5785     (True, False, False): _ROLE_CANDIDATE,
5786     (False, True, False): _ROLE_DRAINED,
5787     (False, False, True): _ROLE_OFFLINE,
5788     (False, False, False): _ROLE_REGULAR,
5789     }
5790   _R2F = dict((v, k) for k, v in _F2R.items())
5791   _FLAGS = ["master_candidate", "drained", "offline"]
5792
5793   def CheckArguments(self):
5794     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5795     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5796                 self.op.master_capable, self.op.vm_capable,
5797                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5798                 self.op.disk_state]
5799     if all_mods.count(None) == len(all_mods):
5800       raise errors.OpPrereqError("Please pass at least one modification",
5801                                  errors.ECODE_INVAL)
5802     if all_mods.count(True) > 1:
5803       raise errors.OpPrereqError("Can't set the node into more than one"
5804                                  " state at the same time",
5805                                  errors.ECODE_INVAL)
5806
5807     # Boolean value that tells us whether we might be demoting from MC
5808     self.might_demote = (self.op.master_candidate is False or
5809                          self.op.offline is True or
5810                          self.op.drained is True or
5811                          self.op.master_capable is False)
5812
5813     if self.op.secondary_ip:
5814       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5815         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5816                                    " address" % self.op.secondary_ip,
5817                                    errors.ECODE_INVAL)
5818
5819     self.lock_all = self.op.auto_promote and self.might_demote
5820     self.lock_instances = self.op.secondary_ip is not None
5821
5822   def _InstanceFilter(self, instance):
5823     """Filter for getting affected instances.
5824
5825     """
5826     return (instance.disk_template in constants.DTS_INT_MIRROR and
5827             self.op.node_name in instance.all_nodes)
5828
5829   def ExpandNames(self):
5830     if self.lock_all:
5831       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5832     else:
5833       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5834
5835     # Since modifying a node can have severe effects on currently running
5836     # operations the resource lock is at least acquired in shared mode
5837     self.needed_locks[locking.LEVEL_NODE_RES] = \
5838       self.needed_locks[locking.LEVEL_NODE]
5839
5840     # Get node resource and instance locks in shared mode; they are not used
5841     # for anything but read-only access
5842     self.share_locks[locking.LEVEL_NODE_RES] = 1
5843     self.share_locks[locking.LEVEL_INSTANCE] = 1
5844
5845     if self.lock_instances:
5846       self.needed_locks[locking.LEVEL_INSTANCE] = \
5847         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5848
5849   def BuildHooksEnv(self):
5850     """Build hooks env.
5851
5852     This runs on the master node.
5853
5854     """
5855     return {
5856       "OP_TARGET": self.op.node_name,
5857       "MASTER_CANDIDATE": str(self.op.master_candidate),
5858       "OFFLINE": str(self.op.offline),
5859       "DRAINED": str(self.op.drained),
5860       "MASTER_CAPABLE": str(self.op.master_capable),
5861       "VM_CAPABLE": str(self.op.vm_capable),
5862       }
5863
5864   def BuildHooksNodes(self):
5865     """Build hooks nodes.
5866
5867     """
5868     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5869     return (nl, nl)
5870
5871   def CheckPrereq(self):
5872     """Check prerequisites.
5873
5874     This only checks the instance list against the existing names.
5875
5876     """
5877     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5878
5879     if self.lock_instances:
5880       affected_instances = \
5881         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5882
5883       # Verify instance locks
5884       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5885       wanted_instances = frozenset(affected_instances.keys())
5886       if wanted_instances - owned_instances:
5887         raise errors.OpPrereqError("Instances affected by changing node %s's"
5888                                    " secondary IP address have changed since"
5889                                    " locks were acquired, wanted '%s', have"
5890                                    " '%s'; retry the operation" %
5891                                    (self.op.node_name,
5892                                     utils.CommaJoin(wanted_instances),
5893                                     utils.CommaJoin(owned_instances)),
5894                                    errors.ECODE_STATE)
5895     else:
5896       affected_instances = None
5897
5898     if (self.op.master_candidate is not None or
5899         self.op.drained is not None or
5900         self.op.offline is not None):
5901       # we can't change the master's node flags
5902       if self.op.node_name == self.cfg.GetMasterNode():
5903         raise errors.OpPrereqError("The master role can be changed"
5904                                    " only via master-failover",
5905                                    errors.ECODE_INVAL)
5906
5907     if self.op.master_candidate and not node.master_capable:
5908       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5909                                  " it a master candidate" % node.name,
5910                                  errors.ECODE_STATE)
5911
5912     if self.op.vm_capable is False:
5913       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5914       if ipri or isec:
5915         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5916                                    " the vm_capable flag" % node.name,
5917                                    errors.ECODE_STATE)
5918
5919     if node.master_candidate and self.might_demote and not self.lock_all:
5920       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5921       # check if after removing the current node, we're missing master
5922       # candidates
5923       (mc_remaining, mc_should, _) = \
5924           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5925       if mc_remaining < mc_should:
5926         raise errors.OpPrereqError("Not enough master candidates, please"
5927                                    " pass auto promote option to allow"
5928                                    " promotion (--auto-promote or RAPI"
5929                                    " auto_promote=True)", errors.ECODE_STATE)
5930
5931     self.old_flags = old_flags = (node.master_candidate,
5932                                   node.drained, node.offline)
5933     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5934     self.old_role = old_role = self._F2R[old_flags]
5935
5936     # Check for ineffective changes
5937     for attr in self._FLAGS:
5938       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5939         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5940         setattr(self.op, attr, None)
5941
5942     # Past this point, any flag change to False means a transition
5943     # away from the respective state, as only real changes are kept
5944
5945     # TODO: We might query the real power state if it supports OOB
5946     if _SupportsOob(self.cfg, node):
5947       if self.op.offline is False and not (node.powered or
5948                                            self.op.powered is True):
5949         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5950                                     " offline status can be reset") %
5951                                    self.op.node_name, errors.ECODE_STATE)
5952     elif self.op.powered is not None:
5953       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5954                                   " as it does not support out-of-band"
5955                                   " handling") % self.op.node_name,
5956                                  errors.ECODE_STATE)
5957
5958     # If we're being deofflined/drained, we'll MC ourself if needed
5959     if (self.op.drained is False or self.op.offline is False or
5960         (self.op.master_capable and not node.master_capable)):
5961       if _DecideSelfPromotion(self):
5962         self.op.master_candidate = True
5963         self.LogInfo("Auto-promoting node to master candidate")
5964
5965     # If we're no longer master capable, we'll demote ourselves from MC
5966     if self.op.master_capable is False and node.master_candidate:
5967       self.LogInfo("Demoting from master candidate")
5968       self.op.master_candidate = False
5969
5970     # Compute new role
5971     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5972     if self.op.master_candidate:
5973       new_role = self._ROLE_CANDIDATE
5974     elif self.op.drained:
5975       new_role = self._ROLE_DRAINED
5976     elif self.op.offline:
5977       new_role = self._ROLE_OFFLINE
5978     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5979       # False is still in new flags, which means we're un-setting (the
5980       # only) True flag
5981       new_role = self._ROLE_REGULAR
5982     else: # no new flags, nothing, keep old role
5983       new_role = old_role
5984
5985     self.new_role = new_role
5986
5987     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5988       # Trying to transition out of offline status
5989       result = self.rpc.call_version([node.name])[node.name]
5990       if result.fail_msg:
5991         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5992                                    " to report its version: %s" %
5993                                    (node.name, result.fail_msg),
5994                                    errors.ECODE_STATE)
5995       else:
5996         self.LogWarning("Transitioning node from offline to online state"
5997                         " without using re-add. Please make sure the node"
5998                         " is healthy!")
5999
6000     # When changing the secondary ip, verify if this is a single-homed to
6001     # multi-homed transition or vice versa, and apply the relevant
6002     # restrictions.
6003     if self.op.secondary_ip:
6004       # Ok even without locking, because this can't be changed by any LU
6005       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6006       master_singlehomed = master.secondary_ip == master.primary_ip
6007       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6008         if self.op.force and node.name == master.name:
6009           self.LogWarning("Transitioning from single-homed to multi-homed"
6010                           " cluster. All nodes will require a secondary ip.")
6011         else:
6012           raise errors.OpPrereqError("Changing the secondary ip on a"
6013                                      " single-homed cluster requires the"
6014                                      " --force option to be passed, and the"
6015                                      " target node to be the master",
6016                                      errors.ECODE_INVAL)
6017       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6018         if self.op.force and node.name == master.name:
6019           self.LogWarning("Transitioning from multi-homed to single-homed"
6020                           " cluster. Secondary IPs will have to be removed.")
6021         else:
6022           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6023                                      " same as the primary IP on a multi-homed"
6024                                      " cluster, unless the --force option is"
6025                                      " passed, and the target node is the"
6026                                      " master", errors.ECODE_INVAL)
6027
6028       assert not (frozenset(affected_instances) -
6029                   self.owned_locks(locking.LEVEL_INSTANCE))
6030
6031       if node.offline:
6032         if affected_instances:
6033           msg = ("Cannot change secondary IP address: offline node has"
6034                  " instances (%s) configured to use it" %
6035                  utils.CommaJoin(affected_instances.keys()))
6036           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6037       else:
6038         # On online nodes, check that no instances are running, and that
6039         # the node has the new ip and we can reach it.
6040         for instance in affected_instances.values():
6041           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6042                               msg="cannot change secondary ip")
6043
6044         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6045         if master.name != node.name:
6046           # check reachability from master secondary ip to new secondary ip
6047           if not netutils.TcpPing(self.op.secondary_ip,
6048                                   constants.DEFAULT_NODED_PORT,
6049                                   source=master.secondary_ip):
6050             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6051                                        " based ping to node daemon port",
6052                                        errors.ECODE_ENVIRON)
6053
6054     if self.op.ndparams:
6055       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6056       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6057       self.new_ndparams = new_ndparams
6058
6059     if self.op.hv_state:
6060       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6061                                                  self.node.hv_state_static)
6062
6063     if self.op.disk_state:
6064       self.new_disk_state = \
6065         _MergeAndVerifyDiskState(self.op.disk_state,
6066                                  self.node.disk_state_static)
6067
6068   def Exec(self, feedback_fn):
6069     """Modifies a node.
6070
6071     """
6072     node = self.node
6073     old_role = self.old_role
6074     new_role = self.new_role
6075
6076     result = []
6077
6078     if self.op.ndparams:
6079       node.ndparams = self.new_ndparams
6080
6081     if self.op.powered is not None:
6082       node.powered = self.op.powered
6083
6084     if self.op.hv_state:
6085       node.hv_state_static = self.new_hv_state
6086
6087     if self.op.disk_state:
6088       node.disk_state_static = self.new_disk_state
6089
6090     for attr in ["master_capable", "vm_capable"]:
6091       val = getattr(self.op, attr)
6092       if val is not None:
6093         setattr(node, attr, val)
6094         result.append((attr, str(val)))
6095
6096     if new_role != old_role:
6097       # Tell the node to demote itself, if no longer MC and not offline
6098       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6099         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6100         if msg:
6101           self.LogWarning("Node failed to demote itself: %s", msg)
6102
6103       new_flags = self._R2F[new_role]
6104       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6105         if of != nf:
6106           result.append((desc, str(nf)))
6107       (node.master_candidate, node.drained, node.offline) = new_flags
6108
6109       # we locked all nodes, we adjust the CP before updating this node
6110       if self.lock_all:
6111         _AdjustCandidatePool(self, [node.name])
6112
6113     if self.op.secondary_ip:
6114       node.secondary_ip = self.op.secondary_ip
6115       result.append(("secondary_ip", self.op.secondary_ip))
6116
6117     # this will trigger configuration file update, if needed
6118     self.cfg.Update(node, feedback_fn)
6119
6120     # this will trigger job queue propagation or cleanup if the mc
6121     # flag changed
6122     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6123       self.context.ReaddNode(node)
6124
6125     return result
6126
6127
6128 class LUNodePowercycle(NoHooksLU):
6129   """Powercycles a node.
6130
6131   """
6132   REQ_BGL = False
6133
6134   def CheckArguments(self):
6135     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6136     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6137       raise errors.OpPrereqError("The node is the master and the force"
6138                                  " parameter was not set",
6139                                  errors.ECODE_INVAL)
6140
6141   def ExpandNames(self):
6142     """Locking for PowercycleNode.
6143
6144     This is a last-resort option and shouldn't block on other
6145     jobs. Therefore, we grab no locks.
6146
6147     """
6148     self.needed_locks = {}
6149
6150   def Exec(self, feedback_fn):
6151     """Reboots a node.
6152
6153     """
6154     result = self.rpc.call_node_powercycle(self.op.node_name,
6155                                            self.cfg.GetHypervisorType())
6156     result.Raise("Failed to schedule the reboot")
6157     return result.payload
6158
6159
6160 class LUClusterQuery(NoHooksLU):
6161   """Query cluster configuration.
6162
6163   """
6164   REQ_BGL = False
6165
6166   def ExpandNames(self):
6167     self.needed_locks = {}
6168
6169   def Exec(self, feedback_fn):
6170     """Return cluster config.
6171
6172     """
6173     cluster = self.cfg.GetClusterInfo()
6174     os_hvp = {}
6175
6176     # Filter just for enabled hypervisors
6177     for os_name, hv_dict in cluster.os_hvp.items():
6178       os_hvp[os_name] = {}
6179       for hv_name, hv_params in hv_dict.items():
6180         if hv_name in cluster.enabled_hypervisors:
6181           os_hvp[os_name][hv_name] = hv_params
6182
6183     # Convert ip_family to ip_version
6184     primary_ip_version = constants.IP4_VERSION
6185     if cluster.primary_ip_family == netutils.IP6Address.family:
6186       primary_ip_version = constants.IP6_VERSION
6187
6188     result = {
6189       "software_version": constants.RELEASE_VERSION,
6190       "protocol_version": constants.PROTOCOL_VERSION,
6191       "config_version": constants.CONFIG_VERSION,
6192       "os_api_version": max(constants.OS_API_VERSIONS),
6193       "export_version": constants.EXPORT_VERSION,
6194       "architecture": runtime.GetArchInfo(),
6195       "name": cluster.cluster_name,
6196       "master": cluster.master_node,
6197       "default_hypervisor": cluster.primary_hypervisor,
6198       "enabled_hypervisors": cluster.enabled_hypervisors,
6199       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6200                         for hypervisor_name in cluster.enabled_hypervisors]),
6201       "os_hvp": os_hvp,
6202       "beparams": cluster.beparams,
6203       "osparams": cluster.osparams,
6204       "ipolicy": cluster.ipolicy,
6205       "nicparams": cluster.nicparams,
6206       "ndparams": cluster.ndparams,
6207       "diskparams": cluster.diskparams,
6208       "candidate_pool_size": cluster.candidate_pool_size,
6209       "master_netdev": cluster.master_netdev,
6210       "master_netmask": cluster.master_netmask,
6211       "use_external_mip_script": cluster.use_external_mip_script,
6212       "volume_group_name": cluster.volume_group_name,
6213       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6214       "file_storage_dir": cluster.file_storage_dir,
6215       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6216       "maintain_node_health": cluster.maintain_node_health,
6217       "ctime": cluster.ctime,
6218       "mtime": cluster.mtime,
6219       "uuid": cluster.uuid,
6220       "tags": list(cluster.GetTags()),
6221       "uid_pool": cluster.uid_pool,
6222       "default_iallocator": cluster.default_iallocator,
6223       "reserved_lvs": cluster.reserved_lvs,
6224       "primary_ip_version": primary_ip_version,
6225       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6226       "hidden_os": cluster.hidden_os,
6227       "blacklisted_os": cluster.blacklisted_os,
6228       }
6229
6230     return result
6231
6232
6233 class LUClusterConfigQuery(NoHooksLU):
6234   """Return configuration values.
6235
6236   """
6237   REQ_BGL = False
6238
6239   def CheckArguments(self):
6240     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6241
6242   def ExpandNames(self):
6243     self.cq.ExpandNames(self)
6244
6245   def DeclareLocks(self, level):
6246     self.cq.DeclareLocks(self, level)
6247
6248   def Exec(self, feedback_fn):
6249     result = self.cq.OldStyleQuery(self)
6250
6251     assert len(result) == 1
6252
6253     return result[0]
6254
6255
6256 class _ClusterQuery(_QueryBase):
6257   FIELDS = query.CLUSTER_FIELDS
6258
6259   #: Do not sort (there is only one item)
6260   SORT_FIELD = None
6261
6262   def ExpandNames(self, lu):
6263     lu.needed_locks = {}
6264
6265     # The following variables interact with _QueryBase._GetNames
6266     self.wanted = locking.ALL_SET
6267     self.do_locking = self.use_locking
6268
6269     if self.do_locking:
6270       raise errors.OpPrereqError("Can not use locking for cluster queries",
6271                                  errors.ECODE_INVAL)
6272
6273   def DeclareLocks(self, lu, level):
6274     pass
6275
6276   def _GetQueryData(self, lu):
6277     """Computes the list of nodes and their attributes.
6278
6279     """
6280     # Locking is not used
6281     assert not (compat.any(lu.glm.is_owned(level)
6282                            for level in locking.LEVELS
6283                            if level != locking.LEVEL_CLUSTER) or
6284                 self.do_locking or self.use_locking)
6285
6286     if query.CQ_CONFIG in self.requested_data:
6287       cluster = lu.cfg.GetClusterInfo()
6288     else:
6289       cluster = NotImplemented
6290
6291     if query.CQ_QUEUE_DRAINED in self.requested_data:
6292       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6293     else:
6294       drain_flag = NotImplemented
6295
6296     if query.CQ_WATCHER_PAUSE in self.requested_data:
6297       watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6298     else:
6299       watcher_pause = NotImplemented
6300
6301     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6302
6303
6304 class LUInstanceActivateDisks(NoHooksLU):
6305   """Bring up an instance's disks.
6306
6307   """
6308   REQ_BGL = False
6309
6310   def ExpandNames(self):
6311     self._ExpandAndLockInstance()
6312     self.needed_locks[locking.LEVEL_NODE] = []
6313     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6314
6315   def DeclareLocks(self, level):
6316     if level == locking.LEVEL_NODE:
6317       self._LockInstancesNodes()
6318
6319   def CheckPrereq(self):
6320     """Check prerequisites.
6321
6322     This checks that the instance is in the cluster.
6323
6324     """
6325     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6326     assert self.instance is not None, \
6327       "Cannot retrieve locked instance %s" % self.op.instance_name
6328     _CheckNodeOnline(self, self.instance.primary_node)
6329
6330   def Exec(self, feedback_fn):
6331     """Activate the disks.
6332
6333     """
6334     disks_ok, disks_info = \
6335               _AssembleInstanceDisks(self, self.instance,
6336                                      ignore_size=self.op.ignore_size)
6337     if not disks_ok:
6338       raise errors.OpExecError("Cannot activate block devices")
6339
6340     if self.op.wait_for_sync:
6341       if not _WaitForSync(self, self.instance):
6342         raise errors.OpExecError("Some disks of the instance are degraded!")
6343
6344     return disks_info
6345
6346
6347 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6348                            ignore_size=False):
6349   """Prepare the block devices for an instance.
6350
6351   This sets up the block devices on all nodes.
6352
6353   @type lu: L{LogicalUnit}
6354   @param lu: the logical unit on whose behalf we execute
6355   @type instance: L{objects.Instance}
6356   @param instance: the instance for whose disks we assemble
6357   @type disks: list of L{objects.Disk} or None
6358   @param disks: which disks to assemble (or all, if None)
6359   @type ignore_secondaries: boolean
6360   @param ignore_secondaries: if true, errors on secondary nodes
6361       won't result in an error return from the function
6362   @type ignore_size: boolean
6363   @param ignore_size: if true, the current known size of the disk
6364       will not be used during the disk activation, useful for cases
6365       when the size is wrong
6366   @return: False if the operation failed, otherwise a list of
6367       (host, instance_visible_name, node_visible_name)
6368       with the mapping from node devices to instance devices
6369
6370   """
6371   device_info = []
6372   disks_ok = True
6373   iname = instance.name
6374   disks = _ExpandCheckDisks(instance, disks)
6375
6376   # With the two passes mechanism we try to reduce the window of
6377   # opportunity for the race condition of switching DRBD to primary
6378   # before handshaking occured, but we do not eliminate it
6379
6380   # The proper fix would be to wait (with some limits) until the
6381   # connection has been made and drbd transitions from WFConnection
6382   # into any other network-connected state (Connected, SyncTarget,
6383   # SyncSource, etc.)
6384
6385   # 1st pass, assemble on all nodes in secondary mode
6386   for idx, inst_disk in enumerate(disks):
6387     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6388       if ignore_size:
6389         node_disk = node_disk.Copy()
6390         node_disk.UnsetSize()
6391       lu.cfg.SetDiskID(node_disk, node)
6392       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6393                                              False, idx)
6394       msg = result.fail_msg
6395       if msg:
6396         is_offline_secondary = (node in instance.secondary_nodes and
6397                                 result.offline)
6398         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6399                            " (is_primary=False, pass=1): %s",
6400                            inst_disk.iv_name, node, msg)
6401         if not (ignore_secondaries or is_offline_secondary):
6402           disks_ok = False
6403
6404   # FIXME: race condition on drbd migration to primary
6405
6406   # 2nd pass, do only the primary node
6407   for idx, inst_disk in enumerate(disks):
6408     dev_path = None
6409
6410     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6411       if node != instance.primary_node:
6412         continue
6413       if ignore_size:
6414         node_disk = node_disk.Copy()
6415         node_disk.UnsetSize()
6416       lu.cfg.SetDiskID(node_disk, node)
6417       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6418                                              True, idx)
6419       msg = result.fail_msg
6420       if msg:
6421         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6422                            " (is_primary=True, pass=2): %s",
6423                            inst_disk.iv_name, node, msg)
6424         disks_ok = False
6425       else:
6426         dev_path = result.payload
6427
6428     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6429
6430   # leave the disks configured for the primary node
6431   # this is a workaround that would be fixed better by
6432   # improving the logical/physical id handling
6433   for disk in disks:
6434     lu.cfg.SetDiskID(disk, instance.primary_node)
6435
6436   return disks_ok, device_info
6437
6438
6439 def _StartInstanceDisks(lu, instance, force):
6440   """Start the disks of an instance.
6441
6442   """
6443   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6444                                            ignore_secondaries=force)
6445   if not disks_ok:
6446     _ShutdownInstanceDisks(lu, instance)
6447     if force is not None and not force:
6448       lu.proc.LogWarning("", hint="If the message above refers to a"
6449                          " secondary node,"
6450                          " you can retry the operation using '--force'.")
6451     raise errors.OpExecError("Disk consistency error")
6452
6453
6454 class LUInstanceDeactivateDisks(NoHooksLU):
6455   """Shutdown an instance's disks.
6456
6457   """
6458   REQ_BGL = False
6459
6460   def ExpandNames(self):
6461     self._ExpandAndLockInstance()
6462     self.needed_locks[locking.LEVEL_NODE] = []
6463     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6464
6465   def DeclareLocks(self, level):
6466     if level == locking.LEVEL_NODE:
6467       self._LockInstancesNodes()
6468
6469   def CheckPrereq(self):
6470     """Check prerequisites.
6471
6472     This checks that the instance is in the cluster.
6473
6474     """
6475     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6476     assert self.instance is not None, \
6477       "Cannot retrieve locked instance %s" % self.op.instance_name
6478
6479   def Exec(self, feedback_fn):
6480     """Deactivate the disks
6481
6482     """
6483     instance = self.instance
6484     if self.op.force:
6485       _ShutdownInstanceDisks(self, instance)
6486     else:
6487       _SafeShutdownInstanceDisks(self, instance)
6488
6489
6490 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6491   """Shutdown block devices of an instance.
6492
6493   This function checks if an instance is running, before calling
6494   _ShutdownInstanceDisks.
6495
6496   """
6497   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6498   _ShutdownInstanceDisks(lu, instance, disks=disks)
6499
6500
6501 def _ExpandCheckDisks(instance, disks):
6502   """Return the instance disks selected by the disks list
6503
6504   @type disks: list of L{objects.Disk} or None
6505   @param disks: selected disks
6506   @rtype: list of L{objects.Disk}
6507   @return: selected instance disks to act on
6508
6509   """
6510   if disks is None:
6511     return instance.disks
6512   else:
6513     if not set(disks).issubset(instance.disks):
6514       raise errors.ProgrammerError("Can only act on disks belonging to the"
6515                                    " target instance")
6516     return disks
6517
6518
6519 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6520   """Shutdown block devices of an instance.
6521
6522   This does the shutdown on all nodes of the instance.
6523
6524   If the ignore_primary is false, errors on the primary node are
6525   ignored.
6526
6527   """
6528   all_result = True
6529   disks = _ExpandCheckDisks(instance, disks)
6530
6531   for disk in disks:
6532     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6533       lu.cfg.SetDiskID(top_disk, node)
6534       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6535       msg = result.fail_msg
6536       if msg:
6537         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6538                       disk.iv_name, node, msg)
6539         if ((node == instance.primary_node and not ignore_primary) or
6540             (node != instance.primary_node and not result.offline)):
6541           all_result = False
6542   return all_result
6543
6544
6545 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6546   """Checks if a node has enough free memory.
6547
6548   This function check if a given node has the needed amount of free
6549   memory. In case the node has less memory or we cannot get the
6550   information from the node, this function raise an OpPrereqError
6551   exception.
6552
6553   @type lu: C{LogicalUnit}
6554   @param lu: a logical unit from which we get configuration data
6555   @type node: C{str}
6556   @param node: the node to check
6557   @type reason: C{str}
6558   @param reason: string to use in the error message
6559   @type requested: C{int}
6560   @param requested: the amount of memory in MiB to check for
6561   @type hypervisor_name: C{str}
6562   @param hypervisor_name: the hypervisor to ask for memory stats
6563   @rtype: integer
6564   @return: node current free memory
6565   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6566       we cannot check the node
6567
6568   """
6569   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6570   nodeinfo[node].Raise("Can't get data from node %s" % node,
6571                        prereq=True, ecode=errors.ECODE_ENVIRON)
6572   (_, _, (hv_info, )) = nodeinfo[node].payload
6573
6574   free_mem = hv_info.get("memory_free", None)
6575   if not isinstance(free_mem, int):
6576     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6577                                " was '%s'" % (node, free_mem),
6578                                errors.ECODE_ENVIRON)
6579   if requested > free_mem:
6580     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6581                                " needed %s MiB, available %s MiB" %
6582                                (node, reason, requested, free_mem),
6583                                errors.ECODE_NORES)
6584   return free_mem
6585
6586
6587 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6588   """Checks if nodes have enough free disk space in the all VGs.
6589
6590   This function check if all given nodes have the needed amount of
6591   free disk. In case any node has less disk or we cannot get the
6592   information from the node, this function raise an OpPrereqError
6593   exception.
6594
6595   @type lu: C{LogicalUnit}
6596   @param lu: a logical unit from which we get configuration data
6597   @type nodenames: C{list}
6598   @param nodenames: the list of node names to check
6599   @type req_sizes: C{dict}
6600   @param req_sizes: the hash of vg and corresponding amount of disk in
6601       MiB to check for
6602   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6603       or we cannot check the node
6604
6605   """
6606   for vg, req_size in req_sizes.items():
6607     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6608
6609
6610 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6611   """Checks if nodes have enough free disk space in the specified VG.
6612
6613   This function check if all given nodes have the needed amount of
6614   free disk. In case any node has less disk or we cannot get the
6615   information from the node, this function raise an OpPrereqError
6616   exception.
6617
6618   @type lu: C{LogicalUnit}
6619   @param lu: a logical unit from which we get configuration data
6620   @type nodenames: C{list}
6621   @param nodenames: the list of node names to check
6622   @type vg: C{str}
6623   @param vg: the volume group to check
6624   @type requested: C{int}
6625   @param requested: the amount of disk in MiB to check for
6626   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6627       or we cannot check the node
6628
6629   """
6630   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6631   for node in nodenames:
6632     info = nodeinfo[node]
6633     info.Raise("Cannot get current information from node %s" % node,
6634                prereq=True, ecode=errors.ECODE_ENVIRON)
6635     (_, (vg_info, ), _) = info.payload
6636     vg_free = vg_info.get("vg_free", None)
6637     if not isinstance(vg_free, int):
6638       raise errors.OpPrereqError("Can't compute free disk space on node"
6639                                  " %s for vg %s, result was '%s'" %
6640                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6641     if requested > vg_free:
6642       raise errors.OpPrereqError("Not enough disk space on target node %s"
6643                                  " vg %s: required %d MiB, available %d MiB" %
6644                                  (node, vg, requested, vg_free),
6645                                  errors.ECODE_NORES)
6646
6647
6648 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6649   """Checks if nodes have enough physical CPUs
6650
6651   This function checks if all given nodes have the needed number of
6652   physical CPUs. In case any node has less CPUs or we cannot get the
6653   information from the node, this function raises an OpPrereqError
6654   exception.
6655
6656   @type lu: C{LogicalUnit}
6657   @param lu: a logical unit from which we get configuration data
6658   @type nodenames: C{list}
6659   @param nodenames: the list of node names to check
6660   @type requested: C{int}
6661   @param requested: the minimum acceptable number of physical CPUs
6662   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6663       or we cannot check the node
6664
6665   """
6666   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6667   for node in nodenames:
6668     info = nodeinfo[node]
6669     info.Raise("Cannot get current information from node %s" % node,
6670                prereq=True, ecode=errors.ECODE_ENVIRON)
6671     (_, _, (hv_info, )) = info.payload
6672     num_cpus = hv_info.get("cpu_total", None)
6673     if not isinstance(num_cpus, int):
6674       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6675                                  " on node %s, result was '%s'" %
6676                                  (node, num_cpus), errors.ECODE_ENVIRON)
6677     if requested > num_cpus:
6678       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6679                                  "required" % (node, num_cpus, requested),
6680                                  errors.ECODE_NORES)
6681
6682
6683 class LUInstanceStartup(LogicalUnit):
6684   """Starts an instance.
6685
6686   """
6687   HPATH = "instance-start"
6688   HTYPE = constants.HTYPE_INSTANCE
6689   REQ_BGL = False
6690
6691   def CheckArguments(self):
6692     # extra beparams
6693     if self.op.beparams:
6694       # fill the beparams dict
6695       objects.UpgradeBeParams(self.op.beparams)
6696       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6697
6698   def ExpandNames(self):
6699     self._ExpandAndLockInstance()
6700     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6701
6702   def DeclareLocks(self, level):
6703     if level == locking.LEVEL_NODE_RES:
6704       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6705
6706   def BuildHooksEnv(self):
6707     """Build hooks env.
6708
6709     This runs on master, primary and secondary nodes of the instance.
6710
6711     """
6712     env = {
6713       "FORCE": self.op.force,
6714       }
6715
6716     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6717
6718     return env
6719
6720   def BuildHooksNodes(self):
6721     """Build hooks nodes.
6722
6723     """
6724     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6725     return (nl, nl)
6726
6727   def CheckPrereq(self):
6728     """Check prerequisites.
6729
6730     This checks that the instance is in the cluster.
6731
6732     """
6733     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6734     assert self.instance is not None, \
6735       "Cannot retrieve locked instance %s" % self.op.instance_name
6736
6737     # extra hvparams
6738     if self.op.hvparams:
6739       # check hypervisor parameter syntax (locally)
6740       cluster = self.cfg.GetClusterInfo()
6741       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6742       filled_hvp = cluster.FillHV(instance)
6743       filled_hvp.update(self.op.hvparams)
6744       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6745       hv_type.CheckParameterSyntax(filled_hvp)
6746       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6747
6748     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6749
6750     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6751
6752     if self.primary_offline and self.op.ignore_offline_nodes:
6753       self.proc.LogWarning("Ignoring offline primary node")
6754
6755       if self.op.hvparams or self.op.beparams:
6756         self.proc.LogWarning("Overridden parameters are ignored")
6757     else:
6758       _CheckNodeOnline(self, instance.primary_node)
6759
6760       bep = self.cfg.GetClusterInfo().FillBE(instance)
6761       bep.update(self.op.beparams)
6762
6763       # check bridges existence
6764       _CheckInstanceBridgesExist(self, instance)
6765
6766       remote_info = self.rpc.call_instance_info(instance.primary_node,
6767                                                 instance.name,
6768                                                 instance.hypervisor)
6769       remote_info.Raise("Error checking node %s" % instance.primary_node,
6770                         prereq=True, ecode=errors.ECODE_ENVIRON)
6771       if not remote_info.payload: # not running already
6772         _CheckNodeFreeMemory(self, instance.primary_node,
6773                              "starting instance %s" % instance.name,
6774                              bep[constants.BE_MINMEM], instance.hypervisor)
6775
6776   def Exec(self, feedback_fn):
6777     """Start the instance.
6778
6779     """
6780     instance = self.instance
6781     force = self.op.force
6782
6783     if not self.op.no_remember:
6784       self.cfg.MarkInstanceUp(instance.name)
6785
6786     if self.primary_offline:
6787       assert self.op.ignore_offline_nodes
6788       self.proc.LogInfo("Primary node offline, marked instance as started")
6789     else:
6790       node_current = instance.primary_node
6791
6792       _StartInstanceDisks(self, instance, force)
6793
6794       result = \
6795         self.rpc.call_instance_start(node_current,
6796                                      (instance, self.op.hvparams,
6797                                       self.op.beparams),
6798                                      self.op.startup_paused)
6799       msg = result.fail_msg
6800       if msg:
6801         _ShutdownInstanceDisks(self, instance)
6802         raise errors.OpExecError("Could not start instance: %s" % msg)
6803
6804
6805 class LUInstanceReboot(LogicalUnit):
6806   """Reboot an instance.
6807
6808   """
6809   HPATH = "instance-reboot"
6810   HTYPE = constants.HTYPE_INSTANCE
6811   REQ_BGL = False
6812
6813   def ExpandNames(self):
6814     self._ExpandAndLockInstance()
6815
6816   def BuildHooksEnv(self):
6817     """Build hooks env.
6818
6819     This runs on master, primary and secondary nodes of the instance.
6820
6821     """
6822     env = {
6823       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6824       "REBOOT_TYPE": self.op.reboot_type,
6825       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6826       }
6827
6828     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6829
6830     return env
6831
6832   def BuildHooksNodes(self):
6833     """Build hooks nodes.
6834
6835     """
6836     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6837     return (nl, nl)
6838
6839   def CheckPrereq(self):
6840     """Check prerequisites.
6841
6842     This checks that the instance is in the cluster.
6843
6844     """
6845     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6846     assert self.instance is not None, \
6847       "Cannot retrieve locked instance %s" % self.op.instance_name
6848     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6849     _CheckNodeOnline(self, instance.primary_node)
6850
6851     # check bridges existence
6852     _CheckInstanceBridgesExist(self, instance)
6853
6854   def Exec(self, feedback_fn):
6855     """Reboot the instance.
6856
6857     """
6858     instance = self.instance
6859     ignore_secondaries = self.op.ignore_secondaries
6860     reboot_type = self.op.reboot_type
6861
6862     remote_info = self.rpc.call_instance_info(instance.primary_node,
6863                                               instance.name,
6864                                               instance.hypervisor)
6865     remote_info.Raise("Error checking node %s" % instance.primary_node)
6866     instance_running = bool(remote_info.payload)
6867
6868     node_current = instance.primary_node
6869
6870     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6871                                             constants.INSTANCE_REBOOT_HARD]:
6872       for disk in instance.disks:
6873         self.cfg.SetDiskID(disk, node_current)
6874       result = self.rpc.call_instance_reboot(node_current, instance,
6875                                              reboot_type,
6876                                              self.op.shutdown_timeout)
6877       result.Raise("Could not reboot instance")
6878     else:
6879       if instance_running:
6880         result = self.rpc.call_instance_shutdown(node_current, instance,
6881                                                  self.op.shutdown_timeout)
6882         result.Raise("Could not shutdown instance for full reboot")
6883         _ShutdownInstanceDisks(self, instance)
6884       else:
6885         self.LogInfo("Instance %s was already stopped, starting now",
6886                      instance.name)
6887       _StartInstanceDisks(self, instance, ignore_secondaries)
6888       result = self.rpc.call_instance_start(node_current,
6889                                             (instance, None, None), False)
6890       msg = result.fail_msg
6891       if msg:
6892         _ShutdownInstanceDisks(self, instance)
6893         raise errors.OpExecError("Could not start instance for"
6894                                  " full reboot: %s" % msg)
6895
6896     self.cfg.MarkInstanceUp(instance.name)
6897
6898
6899 class LUInstanceShutdown(LogicalUnit):
6900   """Shutdown an instance.
6901
6902   """
6903   HPATH = "instance-stop"
6904   HTYPE = constants.HTYPE_INSTANCE
6905   REQ_BGL = False
6906
6907   def ExpandNames(self):
6908     self._ExpandAndLockInstance()
6909
6910   def BuildHooksEnv(self):
6911     """Build hooks env.
6912
6913     This runs on master, primary and secondary nodes of the instance.
6914
6915     """
6916     env = _BuildInstanceHookEnvByObject(self, self.instance)
6917     env["TIMEOUT"] = self.op.timeout
6918     return env
6919
6920   def BuildHooksNodes(self):
6921     """Build hooks nodes.
6922
6923     """
6924     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6925     return (nl, nl)
6926
6927   def CheckPrereq(self):
6928     """Check prerequisites.
6929
6930     This checks that the instance is in the cluster.
6931
6932     """
6933     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6934     assert self.instance is not None, \
6935       "Cannot retrieve locked instance %s" % self.op.instance_name
6936
6937     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6938
6939     self.primary_offline = \
6940       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6941
6942     if self.primary_offline and self.op.ignore_offline_nodes:
6943       self.proc.LogWarning("Ignoring offline primary node")
6944     else:
6945       _CheckNodeOnline(self, self.instance.primary_node)
6946
6947   def Exec(self, feedback_fn):
6948     """Shutdown the instance.
6949
6950     """
6951     instance = self.instance
6952     node_current = instance.primary_node
6953     timeout = self.op.timeout
6954
6955     if not self.op.no_remember:
6956       self.cfg.MarkInstanceDown(instance.name)
6957
6958     if self.primary_offline:
6959       assert self.op.ignore_offline_nodes
6960       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6961     else:
6962       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6963       msg = result.fail_msg
6964       if msg:
6965         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6966
6967       _ShutdownInstanceDisks(self, instance)
6968
6969
6970 class LUInstanceReinstall(LogicalUnit):
6971   """Reinstall an instance.
6972
6973   """
6974   HPATH = "instance-reinstall"
6975   HTYPE = constants.HTYPE_INSTANCE
6976   REQ_BGL = False
6977
6978   def ExpandNames(self):
6979     self._ExpandAndLockInstance()
6980
6981   def BuildHooksEnv(self):
6982     """Build hooks env.
6983
6984     This runs on master, primary and secondary nodes of the instance.
6985
6986     """
6987     return _BuildInstanceHookEnvByObject(self, self.instance)
6988
6989   def BuildHooksNodes(self):
6990     """Build hooks nodes.
6991
6992     """
6993     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6994     return (nl, nl)
6995
6996   def CheckPrereq(self):
6997     """Check prerequisites.
6998
6999     This checks that the instance is in the cluster and is not running.
7000
7001     """
7002     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7003     assert instance is not None, \
7004       "Cannot retrieve locked instance %s" % self.op.instance_name
7005     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7006                      " offline, cannot reinstall")
7007
7008     if instance.disk_template == constants.DT_DISKLESS:
7009       raise errors.OpPrereqError("Instance '%s' has no disks" %
7010                                  self.op.instance_name,
7011                                  errors.ECODE_INVAL)
7012     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7013
7014     if self.op.os_type is not None:
7015       # OS verification
7016       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7017       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7018       instance_os = self.op.os_type
7019     else:
7020       instance_os = instance.os
7021
7022     nodelist = list(instance.all_nodes)
7023
7024     if self.op.osparams:
7025       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7026       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7027       self.os_inst = i_osdict # the new dict (without defaults)
7028     else:
7029       self.os_inst = None
7030
7031     self.instance = instance
7032
7033   def Exec(self, feedback_fn):
7034     """Reinstall the instance.
7035
7036     """
7037     inst = self.instance
7038
7039     if self.op.os_type is not None:
7040       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7041       inst.os = self.op.os_type
7042       # Write to configuration
7043       self.cfg.Update(inst, feedback_fn)
7044
7045     _StartInstanceDisks(self, inst, None)
7046     try:
7047       feedback_fn("Running the instance OS create scripts...")
7048       # FIXME: pass debug option from opcode to backend
7049       result = self.rpc.call_instance_os_add(inst.primary_node,
7050                                              (inst, self.os_inst), True,
7051                                              self.op.debug_level)
7052       result.Raise("Could not install OS for instance %s on node %s" %
7053                    (inst.name, inst.primary_node))
7054     finally:
7055       _ShutdownInstanceDisks(self, inst)
7056
7057
7058 class LUInstanceRecreateDisks(LogicalUnit):
7059   """Recreate an instance's missing disks.
7060
7061   """
7062   HPATH = "instance-recreate-disks"
7063   HTYPE = constants.HTYPE_INSTANCE
7064   REQ_BGL = False
7065
7066   _MODIFYABLE = frozenset([
7067     constants.IDISK_SIZE,
7068     constants.IDISK_MODE,
7069     ])
7070
7071   # New or changed disk parameters may have different semantics
7072   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7073     constants.IDISK_ADOPT,
7074
7075     # TODO: Implement support changing VG while recreating
7076     constants.IDISK_VG,
7077     constants.IDISK_METAVG,
7078     ]))
7079
7080   def _RunAllocator(self):
7081     """Run the allocator based on input opcode.
7082
7083     """
7084     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7085
7086     # FIXME
7087     # The allocator should actually run in "relocate" mode, but current
7088     # allocators don't support relocating all the nodes of an instance at
7089     # the same time. As a workaround we use "allocate" mode, but this is
7090     # suboptimal for two reasons:
7091     # - The instance name passed to the allocator is present in the list of
7092     #   existing instances, so there could be a conflict within the
7093     #   internal structures of the allocator. This doesn't happen with the
7094     #   current allocators, but it's a liability.
7095     # - The allocator counts the resources used by the instance twice: once
7096     #   because the instance exists already, and once because it tries to
7097     #   allocate a new instance.
7098     # The allocator could choose some of the nodes on which the instance is
7099     # running, but that's not a problem. If the instance nodes are broken,
7100     # they should be already be marked as drained or offline, and hence
7101     # skipped by the allocator. If instance disks have been lost for other
7102     # reasons, then recreating the disks on the same nodes should be fine.
7103     disk_template = self.instance.disk_template
7104     spindle_use = be_full[constants.BE_SPINDLE_USE]
7105     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7106                                         disk_template=disk_template,
7107                                         tags=list(self.instance.GetTags()),
7108                                         os=self.instance.os,
7109                                         nics=[{}],
7110                                         vcpus=be_full[constants.BE_VCPUS],
7111                                         memory=be_full[constants.BE_MAXMEM],
7112                                         spindle_use=spindle_use,
7113                                         disks=[{constants.IDISK_SIZE: d.size,
7114                                                 constants.IDISK_MODE: d.mode}
7115                                                 for d in self.instance.disks],
7116                                         hypervisor=self.instance.hypervisor)
7117     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7118
7119     ial.Run(self.op.iallocator)
7120
7121     assert req.RequiredNodes() == len(self.instance.all_nodes)
7122
7123     if not ial.success:
7124       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7125                                  " %s" % (self.op.iallocator, ial.info),
7126                                  errors.ECODE_NORES)
7127
7128     self.op.nodes = ial.result
7129     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7130                  self.op.instance_name, self.op.iallocator,
7131                  utils.CommaJoin(ial.result))
7132
7133   def CheckArguments(self):
7134     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7135       # Normalize and convert deprecated list of disk indices
7136       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7137
7138     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7139     if duplicates:
7140       raise errors.OpPrereqError("Some disks have been specified more than"
7141                                  " once: %s" % utils.CommaJoin(duplicates),
7142                                  errors.ECODE_INVAL)
7143
7144     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7145     # when neither iallocator nor nodes are specified
7146     if self.op.iallocator or self.op.nodes:
7147       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7148
7149     for (idx, params) in self.op.disks:
7150       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7151       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7152       if unsupported:
7153         raise errors.OpPrereqError("Parameters for disk %s try to change"
7154                                    " unmodifyable parameter(s): %s" %
7155                                    (idx, utils.CommaJoin(unsupported)),
7156                                    errors.ECODE_INVAL)
7157
7158   def ExpandNames(self):
7159     self._ExpandAndLockInstance()
7160     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7161     if self.op.nodes:
7162       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7163       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7164     else:
7165       self.needed_locks[locking.LEVEL_NODE] = []
7166       if self.op.iallocator:
7167         # iallocator will select a new node in the same group
7168         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7169     self.needed_locks[locking.LEVEL_NODE_RES] = []
7170
7171   def DeclareLocks(self, level):
7172     if level == locking.LEVEL_NODEGROUP:
7173       assert self.op.iallocator is not None
7174       assert not self.op.nodes
7175       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7176       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7177       # Lock the primary group used by the instance optimistically; this
7178       # requires going via the node before it's locked, requiring
7179       # verification later on
7180       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7181         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7182
7183     elif level == locking.LEVEL_NODE:
7184       # If an allocator is used, then we lock all the nodes in the current
7185       # instance group, as we don't know yet which ones will be selected;
7186       # if we replace the nodes without using an allocator, locks are
7187       # already declared in ExpandNames; otherwise, we need to lock all the
7188       # instance nodes for disk re-creation
7189       if self.op.iallocator:
7190         assert not self.op.nodes
7191         assert not self.needed_locks[locking.LEVEL_NODE]
7192         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7193
7194         # Lock member nodes of the group of the primary node
7195         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7196           self.needed_locks[locking.LEVEL_NODE].extend(
7197             self.cfg.GetNodeGroup(group_uuid).members)
7198       elif not self.op.nodes:
7199         self._LockInstancesNodes(primary_only=False)
7200     elif level == locking.LEVEL_NODE_RES:
7201       # Copy node locks
7202       self.needed_locks[locking.LEVEL_NODE_RES] = \
7203         self.needed_locks[locking.LEVEL_NODE][:]
7204
7205   def BuildHooksEnv(self):
7206     """Build hooks env.
7207
7208     This runs on master, primary and secondary nodes of the instance.
7209
7210     """
7211     return _BuildInstanceHookEnvByObject(self, self.instance)
7212
7213   def BuildHooksNodes(self):
7214     """Build hooks nodes.
7215
7216     """
7217     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7218     return (nl, nl)
7219
7220   def CheckPrereq(self):
7221     """Check prerequisites.
7222
7223     This checks that the instance is in the cluster and is not running.
7224
7225     """
7226     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7227     assert instance is not None, \
7228       "Cannot retrieve locked instance %s" % self.op.instance_name
7229     if self.op.nodes:
7230       if len(self.op.nodes) != len(instance.all_nodes):
7231         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7232                                    " %d replacement nodes were specified" %
7233                                    (instance.name, len(instance.all_nodes),
7234                                     len(self.op.nodes)),
7235                                    errors.ECODE_INVAL)
7236       assert instance.disk_template != constants.DT_DRBD8 or \
7237           len(self.op.nodes) == 2
7238       assert instance.disk_template != constants.DT_PLAIN or \
7239           len(self.op.nodes) == 1
7240       primary_node = self.op.nodes[0]
7241     else:
7242       primary_node = instance.primary_node
7243     if not self.op.iallocator:
7244       _CheckNodeOnline(self, primary_node)
7245
7246     if instance.disk_template == constants.DT_DISKLESS:
7247       raise errors.OpPrereqError("Instance '%s' has no disks" %
7248                                  self.op.instance_name, errors.ECODE_INVAL)
7249
7250     # Verify if node group locks are still correct
7251     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7252     if owned_groups:
7253       # Node group locks are acquired only for the primary node (and only
7254       # when the allocator is used)
7255       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7256                                primary_only=True)
7257
7258     # if we replace nodes *and* the old primary is offline, we don't
7259     # check the instance state
7260     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7261     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7262       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7263                           msg="cannot recreate disks")
7264
7265     if self.op.disks:
7266       self.disks = dict(self.op.disks)
7267     else:
7268       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7269
7270     maxidx = max(self.disks.keys())
7271     if maxidx >= len(instance.disks):
7272       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7273                                  errors.ECODE_INVAL)
7274
7275     if ((self.op.nodes or self.op.iallocator) and
7276         sorted(self.disks.keys()) != range(len(instance.disks))):
7277       raise errors.OpPrereqError("Can't recreate disks partially and"
7278                                  " change the nodes at the same time",
7279                                  errors.ECODE_INVAL)
7280
7281     self.instance = instance
7282
7283     if self.op.iallocator:
7284       self._RunAllocator()
7285       # Release unneeded node and node resource locks
7286       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7287       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7288
7289   def Exec(self, feedback_fn):
7290     """Recreate the disks.
7291
7292     """
7293     instance = self.instance
7294
7295     assert (self.owned_locks(locking.LEVEL_NODE) ==
7296             self.owned_locks(locking.LEVEL_NODE_RES))
7297
7298     to_skip = []
7299     mods = [] # keeps track of needed changes
7300
7301     for idx, disk in enumerate(instance.disks):
7302       try:
7303         changes = self.disks[idx]
7304       except KeyError:
7305         # Disk should not be recreated
7306         to_skip.append(idx)
7307         continue
7308
7309       # update secondaries for disks, if needed
7310       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7311         # need to update the nodes and minors
7312         assert len(self.op.nodes) == 2
7313         assert len(disk.logical_id) == 6 # otherwise disk internals
7314                                          # have changed
7315         (_, _, old_port, _, _, old_secret) = disk.logical_id
7316         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7317         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7318                   new_minors[0], new_minors[1], old_secret)
7319         assert len(disk.logical_id) == len(new_id)
7320       else:
7321         new_id = None
7322
7323       mods.append((idx, new_id, changes))
7324
7325     # now that we have passed all asserts above, we can apply the mods
7326     # in a single run (to avoid partial changes)
7327     for idx, new_id, changes in mods:
7328       disk = instance.disks[idx]
7329       if new_id is not None:
7330         assert disk.dev_type == constants.LD_DRBD8
7331         disk.logical_id = new_id
7332       if changes:
7333         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7334                     mode=changes.get(constants.IDISK_MODE, None))
7335
7336     # change primary node, if needed
7337     if self.op.nodes:
7338       instance.primary_node = self.op.nodes[0]
7339       self.LogWarning("Changing the instance's nodes, you will have to"
7340                       " remove any disks left on the older nodes manually")
7341
7342     if self.op.nodes:
7343       self.cfg.Update(instance, feedback_fn)
7344
7345     # All touched nodes must be locked
7346     mylocks = self.owned_locks(locking.LEVEL_NODE)
7347     assert mylocks.issuperset(frozenset(instance.all_nodes))
7348     _CreateDisks(self, instance, to_skip=to_skip)
7349
7350
7351 class LUInstanceRename(LogicalUnit):
7352   """Rename an instance.
7353
7354   """
7355   HPATH = "instance-rename"
7356   HTYPE = constants.HTYPE_INSTANCE
7357
7358   def CheckArguments(self):
7359     """Check arguments.
7360
7361     """
7362     if self.op.ip_check and not self.op.name_check:
7363       # TODO: make the ip check more flexible and not depend on the name check
7364       raise errors.OpPrereqError("IP address check requires a name check",
7365                                  errors.ECODE_INVAL)
7366
7367   def BuildHooksEnv(self):
7368     """Build hooks env.
7369
7370     This runs on master, primary and secondary nodes of the instance.
7371
7372     """
7373     env = _BuildInstanceHookEnvByObject(self, self.instance)
7374     env["INSTANCE_NEW_NAME"] = self.op.new_name
7375     return env
7376
7377   def BuildHooksNodes(self):
7378     """Build hooks nodes.
7379
7380     """
7381     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7382     return (nl, nl)
7383
7384   def CheckPrereq(self):
7385     """Check prerequisites.
7386
7387     This checks that the instance is in the cluster and is not running.
7388
7389     """
7390     self.op.instance_name = _ExpandInstanceName(self.cfg,
7391                                                 self.op.instance_name)
7392     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7393     assert instance is not None
7394     _CheckNodeOnline(self, instance.primary_node)
7395     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7396                         msg="cannot rename")
7397     self.instance = instance
7398
7399     new_name = self.op.new_name
7400     if self.op.name_check:
7401       hostname = netutils.GetHostname(name=new_name)
7402       if hostname.name != new_name:
7403         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7404                      hostname.name)
7405       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7406         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7407                                     " same as given hostname '%s'") %
7408                                     (hostname.name, self.op.new_name),
7409                                     errors.ECODE_INVAL)
7410       new_name = self.op.new_name = hostname.name
7411       if (self.op.ip_check and
7412           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7413         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7414                                    (hostname.ip, new_name),
7415                                    errors.ECODE_NOTUNIQUE)
7416
7417     instance_list = self.cfg.GetInstanceList()
7418     if new_name in instance_list and new_name != instance.name:
7419       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7420                                  new_name, errors.ECODE_EXISTS)
7421
7422   def Exec(self, feedback_fn):
7423     """Rename the instance.
7424
7425     """
7426     inst = self.instance
7427     old_name = inst.name
7428
7429     rename_file_storage = False
7430     if (inst.disk_template in constants.DTS_FILEBASED and
7431         self.op.new_name != inst.name):
7432       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7433       rename_file_storage = True
7434
7435     self.cfg.RenameInstance(inst.name, self.op.new_name)
7436     # Change the instance lock. This is definitely safe while we hold the BGL.
7437     # Otherwise the new lock would have to be added in acquired mode.
7438     assert self.REQ_BGL
7439     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7440     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7441
7442     # re-read the instance from the configuration after rename
7443     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7444
7445     if rename_file_storage:
7446       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7447       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7448                                                      old_file_storage_dir,
7449                                                      new_file_storage_dir)
7450       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7451                    " (but the instance has been renamed in Ganeti)" %
7452                    (inst.primary_node, old_file_storage_dir,
7453                     new_file_storage_dir))
7454
7455     _StartInstanceDisks(self, inst, None)
7456     try:
7457       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7458                                                  old_name, self.op.debug_level)
7459       msg = result.fail_msg
7460       if msg:
7461         msg = ("Could not run OS rename script for instance %s on node %s"
7462                " (but the instance has been renamed in Ganeti): %s" %
7463                (inst.name, inst.primary_node, msg))
7464         self.proc.LogWarning(msg)
7465     finally:
7466       _ShutdownInstanceDisks(self, inst)
7467
7468     return inst.name
7469
7470
7471 class LUInstanceRemove(LogicalUnit):
7472   """Remove an instance.
7473
7474   """
7475   HPATH = "instance-remove"
7476   HTYPE = constants.HTYPE_INSTANCE
7477   REQ_BGL = False
7478
7479   def ExpandNames(self):
7480     self._ExpandAndLockInstance()
7481     self.needed_locks[locking.LEVEL_NODE] = []
7482     self.needed_locks[locking.LEVEL_NODE_RES] = []
7483     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7484
7485   def DeclareLocks(self, level):
7486     if level == locking.LEVEL_NODE:
7487       self._LockInstancesNodes()
7488     elif level == locking.LEVEL_NODE_RES:
7489       # Copy node locks
7490       self.needed_locks[locking.LEVEL_NODE_RES] = \
7491         self.needed_locks[locking.LEVEL_NODE][:]
7492
7493   def BuildHooksEnv(self):
7494     """Build hooks env.
7495
7496     This runs on master, primary and secondary nodes of the instance.
7497
7498     """
7499     env = _BuildInstanceHookEnvByObject(self, self.instance)
7500     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7501     return env
7502
7503   def BuildHooksNodes(self):
7504     """Build hooks nodes.
7505
7506     """
7507     nl = [self.cfg.GetMasterNode()]
7508     nl_post = list(self.instance.all_nodes) + nl
7509     return (nl, nl_post)
7510
7511   def CheckPrereq(self):
7512     """Check prerequisites.
7513
7514     This checks that the instance is in the cluster.
7515
7516     """
7517     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7518     assert self.instance is not None, \
7519       "Cannot retrieve locked instance %s" % self.op.instance_name
7520
7521   def Exec(self, feedback_fn):
7522     """Remove the instance.
7523
7524     """
7525     instance = self.instance
7526     logging.info("Shutting down instance %s on node %s",
7527                  instance.name, instance.primary_node)
7528
7529     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7530                                              self.op.shutdown_timeout)
7531     msg = result.fail_msg
7532     if msg:
7533       if self.op.ignore_failures:
7534         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7535       else:
7536         raise errors.OpExecError("Could not shutdown instance %s on"
7537                                  " node %s: %s" %
7538                                  (instance.name, instance.primary_node, msg))
7539
7540     assert (self.owned_locks(locking.LEVEL_NODE) ==
7541             self.owned_locks(locking.LEVEL_NODE_RES))
7542     assert not (set(instance.all_nodes) -
7543                 self.owned_locks(locking.LEVEL_NODE)), \
7544       "Not owning correct locks"
7545
7546     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7547
7548
7549 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7550   """Utility function to remove an instance.
7551
7552   """
7553   logging.info("Removing block devices for instance %s", instance.name)
7554
7555   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7556     if not ignore_failures:
7557       raise errors.OpExecError("Can't remove instance's disks")
7558     feedback_fn("Warning: can't remove instance's disks")
7559
7560   logging.info("Removing instance %s out of cluster config", instance.name)
7561
7562   lu.cfg.RemoveInstance(instance.name)
7563
7564   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7565     "Instance lock removal conflict"
7566
7567   # Remove lock for the instance
7568   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7569
7570
7571 class LUInstanceQuery(NoHooksLU):
7572   """Logical unit for querying instances.
7573
7574   """
7575   # pylint: disable=W0142
7576   REQ_BGL = False
7577
7578   def CheckArguments(self):
7579     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7580                              self.op.output_fields, self.op.use_locking)
7581
7582   def ExpandNames(self):
7583     self.iq.ExpandNames(self)
7584
7585   def DeclareLocks(self, level):
7586     self.iq.DeclareLocks(self, level)
7587
7588   def Exec(self, feedback_fn):
7589     return self.iq.OldStyleQuery(self)
7590
7591
7592 class LUInstanceFailover(LogicalUnit):
7593   """Failover an instance.
7594
7595   """
7596   HPATH = "instance-failover"
7597   HTYPE = constants.HTYPE_INSTANCE
7598   REQ_BGL = False
7599
7600   def CheckArguments(self):
7601     """Check the arguments.
7602
7603     """
7604     self.iallocator = getattr(self.op, "iallocator", None)
7605     self.target_node = getattr(self.op, "target_node", None)
7606
7607   def ExpandNames(self):
7608     self._ExpandAndLockInstance()
7609
7610     if self.op.target_node is not None:
7611       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7612
7613     self.needed_locks[locking.LEVEL_NODE] = []
7614     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7615
7616     self.needed_locks[locking.LEVEL_NODE_RES] = []
7617     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7618
7619     ignore_consistency = self.op.ignore_consistency
7620     shutdown_timeout = self.op.shutdown_timeout
7621     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7622                                        cleanup=False,
7623                                        failover=True,
7624                                        ignore_consistency=ignore_consistency,
7625                                        shutdown_timeout=shutdown_timeout,
7626                                        ignore_ipolicy=self.op.ignore_ipolicy)
7627     self.tasklets = [self._migrater]
7628
7629   def DeclareLocks(self, level):
7630     if level == locking.LEVEL_NODE:
7631       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7632       if instance.disk_template in constants.DTS_EXT_MIRROR:
7633         if self.op.target_node is None:
7634           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7635         else:
7636           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7637                                                    self.op.target_node]
7638         del self.recalculate_locks[locking.LEVEL_NODE]
7639       else:
7640         self._LockInstancesNodes()
7641     elif level == locking.LEVEL_NODE_RES:
7642       # Copy node locks
7643       self.needed_locks[locking.LEVEL_NODE_RES] = \
7644         self.needed_locks[locking.LEVEL_NODE][:]
7645
7646   def BuildHooksEnv(self):
7647     """Build hooks env.
7648
7649     This runs on master, primary and secondary nodes of the instance.
7650
7651     """
7652     instance = self._migrater.instance
7653     source_node = instance.primary_node
7654     target_node = self.op.target_node
7655     env = {
7656       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7657       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7658       "OLD_PRIMARY": source_node,
7659       "NEW_PRIMARY": target_node,
7660       }
7661
7662     if instance.disk_template in constants.DTS_INT_MIRROR:
7663       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7664       env["NEW_SECONDARY"] = source_node
7665     else:
7666       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7667
7668     env.update(_BuildInstanceHookEnvByObject(self, instance))
7669
7670     return env
7671
7672   def BuildHooksNodes(self):
7673     """Build hooks nodes.
7674
7675     """
7676     instance = self._migrater.instance
7677     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7678     return (nl, nl + [instance.primary_node])
7679
7680
7681 class LUInstanceMigrate(LogicalUnit):
7682   """Migrate an instance.
7683
7684   This is migration without shutting down, compared to the failover,
7685   which is done with shutdown.
7686
7687   """
7688   HPATH = "instance-migrate"
7689   HTYPE = constants.HTYPE_INSTANCE
7690   REQ_BGL = False
7691
7692   def ExpandNames(self):
7693     self._ExpandAndLockInstance()
7694
7695     if self.op.target_node is not None:
7696       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7697
7698     self.needed_locks[locking.LEVEL_NODE] = []
7699     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7700
7701     self.needed_locks[locking.LEVEL_NODE] = []
7702     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7703
7704     self._migrater = \
7705       TLMigrateInstance(self, self.op.instance_name,
7706                         cleanup=self.op.cleanup,
7707                         failover=False,
7708                         fallback=self.op.allow_failover,
7709                         allow_runtime_changes=self.op.allow_runtime_changes,
7710                         ignore_ipolicy=self.op.ignore_ipolicy)
7711     self.tasklets = [self._migrater]
7712
7713   def DeclareLocks(self, level):
7714     if level == locking.LEVEL_NODE:
7715       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7716       if instance.disk_template in constants.DTS_EXT_MIRROR:
7717         if self.op.target_node is None:
7718           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7719         else:
7720           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7721                                                    self.op.target_node]
7722         del self.recalculate_locks[locking.LEVEL_NODE]
7723       else:
7724         self._LockInstancesNodes()
7725     elif level == locking.LEVEL_NODE_RES:
7726       # Copy node locks
7727       self.needed_locks[locking.LEVEL_NODE_RES] = \
7728         self.needed_locks[locking.LEVEL_NODE][:]
7729
7730   def BuildHooksEnv(self):
7731     """Build hooks env.
7732
7733     This runs on master, primary and secondary nodes of the instance.
7734
7735     """
7736     instance = self._migrater.instance
7737     source_node = instance.primary_node
7738     target_node = self.op.target_node
7739     env = _BuildInstanceHookEnvByObject(self, instance)
7740     env.update({
7741       "MIGRATE_LIVE": self._migrater.live,
7742       "MIGRATE_CLEANUP": self.op.cleanup,
7743       "OLD_PRIMARY": source_node,
7744       "NEW_PRIMARY": target_node,
7745       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7746       })
7747
7748     if instance.disk_template in constants.DTS_INT_MIRROR:
7749       env["OLD_SECONDARY"] = target_node
7750       env["NEW_SECONDARY"] = source_node
7751     else:
7752       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7753
7754     return env
7755
7756   def BuildHooksNodes(self):
7757     """Build hooks nodes.
7758
7759     """
7760     instance = self._migrater.instance
7761     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7762     return (nl, nl + [instance.primary_node])
7763
7764
7765 class LUInstanceMove(LogicalUnit):
7766   """Move an instance by data-copying.
7767
7768   """
7769   HPATH = "instance-move"
7770   HTYPE = constants.HTYPE_INSTANCE
7771   REQ_BGL = False
7772
7773   def ExpandNames(self):
7774     self._ExpandAndLockInstance()
7775     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7776     self.op.target_node = target_node
7777     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7778     self.needed_locks[locking.LEVEL_NODE_RES] = []
7779     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7780
7781   def DeclareLocks(self, level):
7782     if level == locking.LEVEL_NODE:
7783       self._LockInstancesNodes(primary_only=True)
7784     elif level == locking.LEVEL_NODE_RES:
7785       # Copy node locks
7786       self.needed_locks[locking.LEVEL_NODE_RES] = \
7787         self.needed_locks[locking.LEVEL_NODE][:]
7788
7789   def BuildHooksEnv(self):
7790     """Build hooks env.
7791
7792     This runs on master, primary and secondary nodes of the instance.
7793
7794     """
7795     env = {
7796       "TARGET_NODE": self.op.target_node,
7797       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7798       }
7799     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7800     return env
7801
7802   def BuildHooksNodes(self):
7803     """Build hooks nodes.
7804
7805     """
7806     nl = [
7807       self.cfg.GetMasterNode(),
7808       self.instance.primary_node,
7809       self.op.target_node,
7810       ]
7811     return (nl, nl)
7812
7813   def CheckPrereq(self):
7814     """Check prerequisites.
7815
7816     This checks that the instance is in the cluster.
7817
7818     """
7819     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7820     assert self.instance is not None, \
7821       "Cannot retrieve locked instance %s" % self.op.instance_name
7822
7823     node = self.cfg.GetNodeInfo(self.op.target_node)
7824     assert node is not None, \
7825       "Cannot retrieve locked node %s" % self.op.target_node
7826
7827     self.target_node = target_node = node.name
7828
7829     if target_node == instance.primary_node:
7830       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7831                                  (instance.name, target_node),
7832                                  errors.ECODE_STATE)
7833
7834     bep = self.cfg.GetClusterInfo().FillBE(instance)
7835
7836     for idx, dsk in enumerate(instance.disks):
7837       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7838         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7839                                    " cannot copy" % idx, errors.ECODE_STATE)
7840
7841     _CheckNodeOnline(self, target_node)
7842     _CheckNodeNotDrained(self, target_node)
7843     _CheckNodeVmCapable(self, target_node)
7844     cluster = self.cfg.GetClusterInfo()
7845     group_info = self.cfg.GetNodeGroup(node.group)
7846     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7847     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7848                             ignore=self.op.ignore_ipolicy)
7849
7850     if instance.admin_state == constants.ADMINST_UP:
7851       # check memory requirements on the secondary node
7852       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7853                            instance.name, bep[constants.BE_MAXMEM],
7854                            instance.hypervisor)
7855     else:
7856       self.LogInfo("Not checking memory on the secondary node as"
7857                    " instance will not be started")
7858
7859     # check bridge existance
7860     _CheckInstanceBridgesExist(self, instance, node=target_node)
7861
7862   def Exec(self, feedback_fn):
7863     """Move an instance.
7864
7865     The move is done by shutting it down on its present node, copying
7866     the data over (slow) and starting it on the new node.
7867
7868     """
7869     instance = self.instance
7870
7871     source_node = instance.primary_node
7872     target_node = self.target_node
7873
7874     self.LogInfo("Shutting down instance %s on source node %s",
7875                  instance.name, source_node)
7876
7877     assert (self.owned_locks(locking.LEVEL_NODE) ==
7878             self.owned_locks(locking.LEVEL_NODE_RES))
7879
7880     result = self.rpc.call_instance_shutdown(source_node, instance,
7881                                              self.op.shutdown_timeout)
7882     msg = result.fail_msg
7883     if msg:
7884       if self.op.ignore_consistency:
7885         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7886                              " Proceeding anyway. Please make sure node"
7887                              " %s is down. Error details: %s",
7888                              instance.name, source_node, source_node, msg)
7889       else:
7890         raise errors.OpExecError("Could not shutdown instance %s on"
7891                                  " node %s: %s" %
7892                                  (instance.name, source_node, msg))
7893
7894     # create the target disks
7895     try:
7896       _CreateDisks(self, instance, target_node=target_node)
7897     except errors.OpExecError:
7898       self.LogWarning("Device creation failed, reverting...")
7899       try:
7900         _RemoveDisks(self, instance, target_node=target_node)
7901       finally:
7902         self.cfg.ReleaseDRBDMinors(instance.name)
7903         raise
7904
7905     cluster_name = self.cfg.GetClusterInfo().cluster_name
7906
7907     errs = []
7908     # activate, get path, copy the data over
7909     for idx, disk in enumerate(instance.disks):
7910       self.LogInfo("Copying data for disk %d", idx)
7911       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7912                                                instance.name, True, idx)
7913       if result.fail_msg:
7914         self.LogWarning("Can't assemble newly created disk %d: %s",
7915                         idx, result.fail_msg)
7916         errs.append(result.fail_msg)
7917         break
7918       dev_path = result.payload
7919       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7920                                              target_node, dev_path,
7921                                              cluster_name)
7922       if result.fail_msg:
7923         self.LogWarning("Can't copy data over for disk %d: %s",
7924                         idx, result.fail_msg)
7925         errs.append(result.fail_msg)
7926         break
7927
7928     if errs:
7929       self.LogWarning("Some disks failed to copy, aborting")
7930       try:
7931         _RemoveDisks(self, instance, target_node=target_node)
7932       finally:
7933         self.cfg.ReleaseDRBDMinors(instance.name)
7934         raise errors.OpExecError("Errors during disk copy: %s" %
7935                                  (",".join(errs),))
7936
7937     instance.primary_node = target_node
7938     self.cfg.Update(instance, feedback_fn)
7939
7940     self.LogInfo("Removing the disks on the original node")
7941     _RemoveDisks(self, instance, target_node=source_node)
7942
7943     # Only start the instance if it's marked as up
7944     if instance.admin_state == constants.ADMINST_UP:
7945       self.LogInfo("Starting instance %s on node %s",
7946                    instance.name, target_node)
7947
7948       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7949                                            ignore_secondaries=True)
7950       if not disks_ok:
7951         _ShutdownInstanceDisks(self, instance)
7952         raise errors.OpExecError("Can't activate the instance's disks")
7953
7954       result = self.rpc.call_instance_start(target_node,
7955                                             (instance, None, None), False)
7956       msg = result.fail_msg
7957       if msg:
7958         _ShutdownInstanceDisks(self, instance)
7959         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7960                                  (instance.name, target_node, msg))
7961
7962
7963 class LUNodeMigrate(LogicalUnit):
7964   """Migrate all instances from a node.
7965
7966   """
7967   HPATH = "node-migrate"
7968   HTYPE = constants.HTYPE_NODE
7969   REQ_BGL = False
7970
7971   def CheckArguments(self):
7972     pass
7973
7974   def ExpandNames(self):
7975     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7976
7977     self.share_locks = _ShareAll()
7978     self.needed_locks = {
7979       locking.LEVEL_NODE: [self.op.node_name],
7980       }
7981
7982   def BuildHooksEnv(self):
7983     """Build hooks env.
7984
7985     This runs on the master, the primary and all the secondaries.
7986
7987     """
7988     return {
7989       "NODE_NAME": self.op.node_name,
7990       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7991       }
7992
7993   def BuildHooksNodes(self):
7994     """Build hooks nodes.
7995
7996     """
7997     nl = [self.cfg.GetMasterNode()]
7998     return (nl, nl)
7999
8000   def CheckPrereq(self):
8001     pass
8002
8003   def Exec(self, feedback_fn):
8004     # Prepare jobs for migration instances
8005     allow_runtime_changes = self.op.allow_runtime_changes
8006     jobs = [
8007       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8008                                  mode=self.op.mode,
8009                                  live=self.op.live,
8010                                  iallocator=self.op.iallocator,
8011                                  target_node=self.op.target_node,
8012                                  allow_runtime_changes=allow_runtime_changes,
8013                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8014       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8015       ]
8016
8017     # TODO: Run iallocator in this opcode and pass correct placement options to
8018     # OpInstanceMigrate. Since other jobs can modify the cluster between
8019     # running the iallocator and the actual migration, a good consistency model
8020     # will have to be found.
8021
8022     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8023             frozenset([self.op.node_name]))
8024
8025     return ResultWithJobs(jobs)
8026
8027
8028 class TLMigrateInstance(Tasklet):
8029   """Tasklet class for instance migration.
8030
8031   @type live: boolean
8032   @ivar live: whether the migration will be done live or non-live;
8033       this variable is initalized only after CheckPrereq has run
8034   @type cleanup: boolean
8035   @ivar cleanup: Wheater we cleanup from a failed migration
8036   @type iallocator: string
8037   @ivar iallocator: The iallocator used to determine target_node
8038   @type target_node: string
8039   @ivar target_node: If given, the target_node to reallocate the instance to
8040   @type failover: boolean
8041   @ivar failover: Whether operation results in failover or migration
8042   @type fallback: boolean
8043   @ivar fallback: Whether fallback to failover is allowed if migration not
8044                   possible
8045   @type ignore_consistency: boolean
8046   @ivar ignore_consistency: Wheter we should ignore consistency between source
8047                             and target node
8048   @type shutdown_timeout: int
8049   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8050   @type ignore_ipolicy: bool
8051   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8052
8053   """
8054
8055   # Constants
8056   _MIGRATION_POLL_INTERVAL = 1      # seconds
8057   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8058
8059   def __init__(self, lu, instance_name, cleanup=False,
8060                failover=False, fallback=False,
8061                ignore_consistency=False,
8062                allow_runtime_changes=True,
8063                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8064                ignore_ipolicy=False):
8065     """Initializes this class.
8066
8067     """
8068     Tasklet.__init__(self, lu)
8069
8070     # Parameters
8071     self.instance_name = instance_name
8072     self.cleanup = cleanup
8073     self.live = False # will be overridden later
8074     self.failover = failover
8075     self.fallback = fallback
8076     self.ignore_consistency = ignore_consistency
8077     self.shutdown_timeout = shutdown_timeout
8078     self.ignore_ipolicy = ignore_ipolicy
8079     self.allow_runtime_changes = allow_runtime_changes
8080
8081   def CheckPrereq(self):
8082     """Check prerequisites.
8083
8084     This checks that the instance is in the cluster.
8085
8086     """
8087     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8088     instance = self.cfg.GetInstanceInfo(instance_name)
8089     assert instance is not None
8090     self.instance = instance
8091     cluster = self.cfg.GetClusterInfo()
8092
8093     if (not self.cleanup and
8094         not instance.admin_state == constants.ADMINST_UP and
8095         not self.failover and self.fallback):
8096       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8097                       " switching to failover")
8098       self.failover = True
8099
8100     if instance.disk_template not in constants.DTS_MIRRORED:
8101       if self.failover:
8102         text = "failovers"
8103       else:
8104         text = "migrations"
8105       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8106                                  " %s" % (instance.disk_template, text),
8107                                  errors.ECODE_STATE)
8108
8109     if instance.disk_template in constants.DTS_EXT_MIRROR:
8110       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8111
8112       if self.lu.op.iallocator:
8113         self._RunAllocator()
8114       else:
8115         # We set set self.target_node as it is required by
8116         # BuildHooksEnv
8117         self.target_node = self.lu.op.target_node
8118
8119       # Check that the target node is correct in terms of instance policy
8120       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8121       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8122       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8123                                                               group_info)
8124       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8125                               ignore=self.ignore_ipolicy)
8126
8127       # self.target_node is already populated, either directly or by the
8128       # iallocator run
8129       target_node = self.target_node
8130       if self.target_node == instance.primary_node:
8131         raise errors.OpPrereqError("Cannot migrate instance %s"
8132                                    " to its primary (%s)" %
8133                                    (instance.name, instance.primary_node),
8134                                    errors.ECODE_STATE)
8135
8136       if len(self.lu.tasklets) == 1:
8137         # It is safe to release locks only when we're the only tasklet
8138         # in the LU
8139         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8140                       keep=[instance.primary_node, self.target_node])
8141
8142     else:
8143       secondary_nodes = instance.secondary_nodes
8144       if not secondary_nodes:
8145         raise errors.ConfigurationError("No secondary node but using"
8146                                         " %s disk template" %
8147                                         instance.disk_template)
8148       target_node = secondary_nodes[0]
8149       if self.lu.op.iallocator or (self.lu.op.target_node and
8150                                    self.lu.op.target_node != target_node):
8151         if self.failover:
8152           text = "failed over"
8153         else:
8154           text = "migrated"
8155         raise errors.OpPrereqError("Instances with disk template %s cannot"
8156                                    " be %s to arbitrary nodes"
8157                                    " (neither an iallocator nor a target"
8158                                    " node can be passed)" %
8159                                    (instance.disk_template, text),
8160                                    errors.ECODE_INVAL)
8161       nodeinfo = self.cfg.GetNodeInfo(target_node)
8162       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8163       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8164                                                               group_info)
8165       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8166                               ignore=self.ignore_ipolicy)
8167
8168     i_be = cluster.FillBE(instance)
8169
8170     # check memory requirements on the secondary node
8171     if (not self.cleanup and
8172          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8173       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8174                                                "migrating instance %s" %
8175                                                instance.name,
8176                                                i_be[constants.BE_MINMEM],
8177                                                instance.hypervisor)
8178     else:
8179       self.lu.LogInfo("Not checking memory on the secondary node as"
8180                       " instance will not be started")
8181
8182     # check if failover must be forced instead of migration
8183     if (not self.cleanup and not self.failover and
8184         i_be[constants.BE_ALWAYS_FAILOVER]):
8185       self.lu.LogInfo("Instance configured to always failover; fallback"
8186                       " to failover")
8187       self.failover = True
8188
8189     # check bridge existance
8190     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8191
8192     if not self.cleanup:
8193       _CheckNodeNotDrained(self.lu, target_node)
8194       if not self.failover:
8195         result = self.rpc.call_instance_migratable(instance.primary_node,
8196                                                    instance)
8197         if result.fail_msg and self.fallback:
8198           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8199                           " failover")
8200           self.failover = True
8201         else:
8202           result.Raise("Can't migrate, please use failover",
8203                        prereq=True, ecode=errors.ECODE_STATE)
8204
8205     assert not (self.failover and self.cleanup)
8206
8207     if not self.failover:
8208       if self.lu.op.live is not None and self.lu.op.mode is not None:
8209         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8210                                    " parameters are accepted",
8211                                    errors.ECODE_INVAL)
8212       if self.lu.op.live is not None:
8213         if self.lu.op.live:
8214           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8215         else:
8216           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8217         # reset the 'live' parameter to None so that repeated
8218         # invocations of CheckPrereq do not raise an exception
8219         self.lu.op.live = None
8220       elif self.lu.op.mode is None:
8221         # read the default value from the hypervisor
8222         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8223         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8224
8225       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8226     else:
8227       # Failover is never live
8228       self.live = False
8229
8230     if not (self.failover or self.cleanup):
8231       remote_info = self.rpc.call_instance_info(instance.primary_node,
8232                                                 instance.name,
8233                                                 instance.hypervisor)
8234       remote_info.Raise("Error checking instance on node %s" %
8235                         instance.primary_node)
8236       instance_running = bool(remote_info.payload)
8237       if instance_running:
8238         self.current_mem = int(remote_info.payload["memory"])
8239
8240   def _RunAllocator(self):
8241     """Run the allocator based on input opcode.
8242
8243     """
8244     # FIXME: add a self.ignore_ipolicy option
8245     req = iallocator.IAReqRelocate(name=self.instance_name,
8246                                    relocate_from=[self.instance.primary_node])
8247     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8248
8249     ial.Run(self.lu.op.iallocator)
8250
8251     if not ial.success:
8252       raise errors.OpPrereqError("Can't compute nodes using"
8253                                  " iallocator '%s': %s" %
8254                                  (self.lu.op.iallocator, ial.info),
8255                                  errors.ECODE_NORES)
8256     self.target_node = ial.result[0]
8257     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8258                     self.instance_name, self.lu.op.iallocator,
8259                     utils.CommaJoin(ial.result))
8260
8261   def _WaitUntilSync(self):
8262     """Poll with custom rpc for disk sync.
8263
8264     This uses our own step-based rpc call.
8265
8266     """
8267     self.feedback_fn("* wait until resync is done")
8268     all_done = False
8269     while not all_done:
8270       all_done = True
8271       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8272                                             self.nodes_ip,
8273                                             (self.instance.disks,
8274                                              self.instance))
8275       min_percent = 100
8276       for node, nres in result.items():
8277         nres.Raise("Cannot resync disks on node %s" % node)
8278         node_done, node_percent = nres.payload
8279         all_done = all_done and node_done
8280         if node_percent is not None:
8281           min_percent = min(min_percent, node_percent)
8282       if not all_done:
8283         if min_percent < 100:
8284           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8285         time.sleep(2)
8286
8287   def _EnsureSecondary(self, node):
8288     """Demote a node to secondary.
8289
8290     """
8291     self.feedback_fn("* switching node %s to secondary mode" % node)
8292
8293     for dev in self.instance.disks:
8294       self.cfg.SetDiskID(dev, node)
8295
8296     result = self.rpc.call_blockdev_close(node, self.instance.name,
8297                                           self.instance.disks)
8298     result.Raise("Cannot change disk to secondary on node %s" % node)
8299
8300   def _GoStandalone(self):
8301     """Disconnect from the network.
8302
8303     """
8304     self.feedback_fn("* changing into standalone mode")
8305     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8306                                                self.instance.disks)
8307     for node, nres in result.items():
8308       nres.Raise("Cannot disconnect disks node %s" % node)
8309
8310   def _GoReconnect(self, multimaster):
8311     """Reconnect to the network.
8312
8313     """
8314     if multimaster:
8315       msg = "dual-master"
8316     else:
8317       msg = "single-master"
8318     self.feedback_fn("* changing disks into %s mode" % msg)
8319     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8320                                            (self.instance.disks, self.instance),
8321                                            self.instance.name, multimaster)
8322     for node, nres in result.items():
8323       nres.Raise("Cannot change disks config on node %s" % node)
8324
8325   def _ExecCleanup(self):
8326     """Try to cleanup after a failed migration.
8327
8328     The cleanup is done by:
8329       - check that the instance is running only on one node
8330         (and update the config if needed)
8331       - change disks on its secondary node to secondary
8332       - wait until disks are fully synchronized
8333       - disconnect from the network
8334       - change disks into single-master mode
8335       - wait again until disks are fully synchronized
8336
8337     """
8338     instance = self.instance
8339     target_node = self.target_node
8340     source_node = self.source_node
8341
8342     # check running on only one node
8343     self.feedback_fn("* checking where the instance actually runs"
8344                      " (if this hangs, the hypervisor might be in"
8345                      " a bad state)")
8346     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8347     for node, result in ins_l.items():
8348       result.Raise("Can't contact node %s" % node)
8349
8350     runningon_source = instance.name in ins_l[source_node].payload
8351     runningon_target = instance.name in ins_l[target_node].payload
8352
8353     if runningon_source and runningon_target:
8354       raise errors.OpExecError("Instance seems to be running on two nodes,"
8355                                " or the hypervisor is confused; you will have"
8356                                " to ensure manually that it runs only on one"
8357                                " and restart this operation")
8358
8359     if not (runningon_source or runningon_target):
8360       raise errors.OpExecError("Instance does not seem to be running at all;"
8361                                " in this case it's safer to repair by"
8362                                " running 'gnt-instance stop' to ensure disk"
8363                                " shutdown, and then restarting it")
8364
8365     if runningon_target:
8366       # the migration has actually succeeded, we need to update the config
8367       self.feedback_fn("* instance running on secondary node (%s),"
8368                        " updating config" % target_node)
8369       instance.primary_node = target_node
8370       self.cfg.Update(instance, self.feedback_fn)
8371       demoted_node = source_node
8372     else:
8373       self.feedback_fn("* instance confirmed to be running on its"
8374                        " primary node (%s)" % source_node)
8375       demoted_node = target_node
8376
8377     if instance.disk_template in constants.DTS_INT_MIRROR:
8378       self._EnsureSecondary(demoted_node)
8379       try:
8380         self._WaitUntilSync()
8381       except errors.OpExecError:
8382         # we ignore here errors, since if the device is standalone, it
8383         # won't be able to sync
8384         pass
8385       self._GoStandalone()
8386       self._GoReconnect(False)
8387       self._WaitUntilSync()
8388
8389     self.feedback_fn("* done")
8390
8391   def _RevertDiskStatus(self):
8392     """Try to revert the disk status after a failed migration.
8393
8394     """
8395     target_node = self.target_node
8396     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8397       return
8398
8399     try:
8400       self._EnsureSecondary(target_node)
8401       self._GoStandalone()
8402       self._GoReconnect(False)
8403       self._WaitUntilSync()
8404     except errors.OpExecError, err:
8405       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8406                          " please try to recover the instance manually;"
8407                          " error '%s'" % str(err))
8408
8409   def _AbortMigration(self):
8410     """Call the hypervisor code to abort a started migration.
8411
8412     """
8413     instance = self.instance
8414     target_node = self.target_node
8415     source_node = self.source_node
8416     migration_info = self.migration_info
8417
8418     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8419                                                                  instance,
8420                                                                  migration_info,
8421                                                                  False)
8422     abort_msg = abort_result.fail_msg
8423     if abort_msg:
8424       logging.error("Aborting migration failed on target node %s: %s",
8425                     target_node, abort_msg)
8426       # Don't raise an exception here, as we stil have to try to revert the
8427       # disk status, even if this step failed.
8428
8429     abort_result = self.rpc.call_instance_finalize_migration_src(
8430       source_node, instance, False, self.live)
8431     abort_msg = abort_result.fail_msg
8432     if abort_msg:
8433       logging.error("Aborting migration failed on source node %s: %s",
8434                     source_node, abort_msg)
8435
8436   def _ExecMigration(self):
8437     """Migrate an instance.
8438
8439     The migrate is done by:
8440       - change the disks into dual-master mode
8441       - wait until disks are fully synchronized again
8442       - migrate the instance
8443       - change disks on the new secondary node (the old primary) to secondary
8444       - wait until disks are fully synchronized
8445       - change disks into single-master mode
8446
8447     """
8448     instance = self.instance
8449     target_node = self.target_node
8450     source_node = self.source_node
8451
8452     # Check for hypervisor version mismatch and warn the user.
8453     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8454                                        None, [self.instance.hypervisor])
8455     for ninfo in nodeinfo.values():
8456       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8457                   ninfo.node)
8458     (_, _, (src_info, )) = nodeinfo[source_node].payload
8459     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8460
8461     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8462         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8463       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8464       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8465       if src_version != dst_version:
8466         self.feedback_fn("* warning: hypervisor version mismatch between"
8467                          " source (%s) and target (%s) node" %
8468                          (src_version, dst_version))
8469
8470     self.feedback_fn("* checking disk consistency between source and target")
8471     for (idx, dev) in enumerate(instance.disks):
8472       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8473         raise errors.OpExecError("Disk %s is degraded or not fully"
8474                                  " synchronized on target node,"
8475                                  " aborting migration" % idx)
8476
8477     if self.current_mem > self.tgt_free_mem:
8478       if not self.allow_runtime_changes:
8479         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8480                                  " free memory to fit instance %s on target"
8481                                  " node %s (have %dMB, need %dMB)" %
8482                                  (instance.name, target_node,
8483                                   self.tgt_free_mem, self.current_mem))
8484       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8485       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8486                                                      instance,
8487                                                      self.tgt_free_mem)
8488       rpcres.Raise("Cannot modify instance runtime memory")
8489
8490     # First get the migration information from the remote node
8491     result = self.rpc.call_migration_info(source_node, instance)
8492     msg = result.fail_msg
8493     if msg:
8494       log_err = ("Failed fetching source migration information from %s: %s" %
8495                  (source_node, msg))
8496       logging.error(log_err)
8497       raise errors.OpExecError(log_err)
8498
8499     self.migration_info = migration_info = result.payload
8500
8501     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8502       # Then switch the disks to master/master mode
8503       self._EnsureSecondary(target_node)
8504       self._GoStandalone()
8505       self._GoReconnect(True)
8506       self._WaitUntilSync()
8507
8508     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8509     result = self.rpc.call_accept_instance(target_node,
8510                                            instance,
8511                                            migration_info,
8512                                            self.nodes_ip[target_node])
8513
8514     msg = result.fail_msg
8515     if msg:
8516       logging.error("Instance pre-migration failed, trying to revert"
8517                     " disk status: %s", msg)
8518       self.feedback_fn("Pre-migration failed, aborting")
8519       self._AbortMigration()
8520       self._RevertDiskStatus()
8521       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8522                                (instance.name, msg))
8523
8524     self.feedback_fn("* migrating instance to %s" % target_node)
8525     result = self.rpc.call_instance_migrate(source_node, instance,
8526                                             self.nodes_ip[target_node],
8527                                             self.live)
8528     msg = result.fail_msg
8529     if msg:
8530       logging.error("Instance migration failed, trying to revert"
8531                     " disk status: %s", msg)
8532       self.feedback_fn("Migration failed, aborting")
8533       self._AbortMigration()
8534       self._RevertDiskStatus()
8535       raise errors.OpExecError("Could not migrate instance %s: %s" %
8536                                (instance.name, msg))
8537
8538     self.feedback_fn("* starting memory transfer")
8539     last_feedback = time.time()
8540     while True:
8541       result = self.rpc.call_instance_get_migration_status(source_node,
8542                                                            instance)
8543       msg = result.fail_msg
8544       ms = result.payload   # MigrationStatus instance
8545       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8546         logging.error("Instance migration failed, trying to revert"
8547                       " disk status: %s", msg)
8548         self.feedback_fn("Migration failed, aborting")
8549         self._AbortMigration()
8550         self._RevertDiskStatus()
8551         raise errors.OpExecError("Could not migrate instance %s: %s" %
8552                                  (instance.name, msg))
8553
8554       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8555         self.feedback_fn("* memory transfer complete")
8556         break
8557
8558       if (utils.TimeoutExpired(last_feedback,
8559                                self._MIGRATION_FEEDBACK_INTERVAL) and
8560           ms.transferred_ram is not None):
8561         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8562         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8563         last_feedback = time.time()
8564
8565       time.sleep(self._MIGRATION_POLL_INTERVAL)
8566
8567     result = self.rpc.call_instance_finalize_migration_src(source_node,
8568                                                            instance,
8569                                                            True,
8570                                                            self.live)
8571     msg = result.fail_msg
8572     if msg:
8573       logging.error("Instance migration succeeded, but finalization failed"
8574                     " on the source node: %s", msg)
8575       raise errors.OpExecError("Could not finalize instance migration: %s" %
8576                                msg)
8577
8578     instance.primary_node = target_node
8579
8580     # distribute new instance config to the other nodes
8581     self.cfg.Update(instance, self.feedback_fn)
8582
8583     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8584                                                            instance,
8585                                                            migration_info,
8586                                                            True)
8587     msg = result.fail_msg
8588     if msg:
8589       logging.error("Instance migration succeeded, but finalization failed"
8590                     " on the target node: %s", msg)
8591       raise errors.OpExecError("Could not finalize instance migration: %s" %
8592                                msg)
8593
8594     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8595       self._EnsureSecondary(source_node)
8596       self._WaitUntilSync()
8597       self._GoStandalone()
8598       self._GoReconnect(False)
8599       self._WaitUntilSync()
8600
8601     # If the instance's disk template is `rbd' and there was a successful
8602     # migration, unmap the device from the source node.
8603     if self.instance.disk_template == constants.DT_RBD:
8604       disks = _ExpandCheckDisks(instance, instance.disks)
8605       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8606       for disk in disks:
8607         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8608         msg = result.fail_msg
8609         if msg:
8610           logging.error("Migration was successful, but couldn't unmap the"
8611                         " block device %s on source node %s: %s",
8612                         disk.iv_name, source_node, msg)
8613           logging.error("You need to unmap the device %s manually on %s",
8614                         disk.iv_name, source_node)
8615
8616     self.feedback_fn("* done")
8617
8618   def _ExecFailover(self):
8619     """Failover an instance.
8620
8621     The failover is done by shutting it down on its present node and
8622     starting it on the secondary.
8623
8624     """
8625     instance = self.instance
8626     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8627
8628     source_node = instance.primary_node
8629     target_node = self.target_node
8630
8631     if instance.admin_state == constants.ADMINST_UP:
8632       self.feedback_fn("* checking disk consistency between source and target")
8633       for (idx, dev) in enumerate(instance.disks):
8634         # for drbd, these are drbd over lvm
8635         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8636                                      False):
8637           if primary_node.offline:
8638             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8639                              " target node %s" %
8640                              (primary_node.name, idx, target_node))
8641           elif not self.ignore_consistency:
8642             raise errors.OpExecError("Disk %s is degraded on target node,"
8643                                      " aborting failover" % idx)
8644     else:
8645       self.feedback_fn("* not checking disk consistency as instance is not"
8646                        " running")
8647
8648     self.feedback_fn("* shutting down instance on source node")
8649     logging.info("Shutting down instance %s on node %s",
8650                  instance.name, source_node)
8651
8652     result = self.rpc.call_instance_shutdown(source_node, instance,
8653                                              self.shutdown_timeout)
8654     msg = result.fail_msg
8655     if msg:
8656       if self.ignore_consistency or primary_node.offline:
8657         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8658                            " proceeding anyway; please make sure node"
8659                            " %s is down; error details: %s",
8660                            instance.name, source_node, source_node, msg)
8661       else:
8662         raise errors.OpExecError("Could not shutdown instance %s on"
8663                                  " node %s: %s" %
8664                                  (instance.name, source_node, msg))
8665
8666     self.feedback_fn("* deactivating the instance's disks on source node")
8667     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8668       raise errors.OpExecError("Can't shut down the instance's disks")
8669
8670     instance.primary_node = target_node
8671     # distribute new instance config to the other nodes
8672     self.cfg.Update(instance, self.feedback_fn)
8673
8674     # Only start the instance if it's marked as up
8675     if instance.admin_state == constants.ADMINST_UP:
8676       self.feedback_fn("* activating the instance's disks on target node %s" %
8677                        target_node)
8678       logging.info("Starting instance %s on node %s",
8679                    instance.name, target_node)
8680
8681       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8682                                            ignore_secondaries=True)
8683       if not disks_ok:
8684         _ShutdownInstanceDisks(self.lu, instance)
8685         raise errors.OpExecError("Can't activate the instance's disks")
8686
8687       self.feedback_fn("* starting the instance on the target node %s" %
8688                        target_node)
8689       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8690                                             False)
8691       msg = result.fail_msg
8692       if msg:
8693         _ShutdownInstanceDisks(self.lu, instance)
8694         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8695                                  (instance.name, target_node, msg))
8696
8697   def Exec(self, feedback_fn):
8698     """Perform the migration.
8699
8700     """
8701     self.feedback_fn = feedback_fn
8702     self.source_node = self.instance.primary_node
8703
8704     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8705     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8706       self.target_node = self.instance.secondary_nodes[0]
8707       # Otherwise self.target_node has been populated either
8708       # directly, or through an iallocator.
8709
8710     self.all_nodes = [self.source_node, self.target_node]
8711     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8712                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8713
8714     if self.failover:
8715       feedback_fn("Failover instance %s" % self.instance.name)
8716       self._ExecFailover()
8717     else:
8718       feedback_fn("Migrating instance %s" % self.instance.name)
8719
8720       if self.cleanup:
8721         return self._ExecCleanup()
8722       else:
8723         return self._ExecMigration()
8724
8725
8726 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8727                     force_open):
8728   """Wrapper around L{_CreateBlockDevInner}.
8729
8730   This method annotates the root device first.
8731
8732   """
8733   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8734   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8735                               force_open)
8736
8737
8738 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8739                          info, force_open):
8740   """Create a tree of block devices on a given node.
8741
8742   If this device type has to be created on secondaries, create it and
8743   all its children.
8744
8745   If not, just recurse to children keeping the same 'force' value.
8746
8747   @attention: The device has to be annotated already.
8748
8749   @param lu: the lu on whose behalf we execute
8750   @param node: the node on which to create the device
8751   @type instance: L{objects.Instance}
8752   @param instance: the instance which owns the device
8753   @type device: L{objects.Disk}
8754   @param device: the device to create
8755   @type force_create: boolean
8756   @param force_create: whether to force creation of this device; this
8757       will be change to True whenever we find a device which has
8758       CreateOnSecondary() attribute
8759   @param info: the extra 'metadata' we should attach to the device
8760       (this will be represented as a LVM tag)
8761   @type force_open: boolean
8762   @param force_open: this parameter will be passes to the
8763       L{backend.BlockdevCreate} function where it specifies
8764       whether we run on primary or not, and it affects both
8765       the child assembly and the device own Open() execution
8766
8767   """
8768   if device.CreateOnSecondary():
8769     force_create = True
8770
8771   if device.children:
8772     for child in device.children:
8773       _CreateBlockDevInner(lu, node, instance, child, force_create,
8774                            info, force_open)
8775
8776   if not force_create:
8777     return
8778
8779   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8780
8781
8782 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8783   """Create a single block device on a given node.
8784
8785   This will not recurse over children of the device, so they must be
8786   created in advance.
8787
8788   @param lu: the lu on whose behalf we execute
8789   @param node: the node on which to create the device
8790   @type instance: L{objects.Instance}
8791   @param instance: the instance which owns the device
8792   @type device: L{objects.Disk}
8793   @param device: the device to create
8794   @param info: the extra 'metadata' we should attach to the device
8795       (this will be represented as a LVM tag)
8796   @type force_open: boolean
8797   @param force_open: this parameter will be passes to the
8798       L{backend.BlockdevCreate} function where it specifies
8799       whether we run on primary or not, and it affects both
8800       the child assembly and the device own Open() execution
8801
8802   """
8803   lu.cfg.SetDiskID(device, node)
8804   result = lu.rpc.call_blockdev_create(node, device, device.size,
8805                                        instance.name, force_open, info)
8806   result.Raise("Can't create block device %s on"
8807                " node %s for instance %s" % (device, node, instance.name))
8808   if device.physical_id is None:
8809     device.physical_id = result.payload
8810
8811
8812 def _GenerateUniqueNames(lu, exts):
8813   """Generate a suitable LV name.
8814
8815   This will generate a logical volume name for the given instance.
8816
8817   """
8818   results = []
8819   for val in exts:
8820     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8821     results.append("%s%s" % (new_id, val))
8822   return results
8823
8824
8825 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8826                          iv_name, p_minor, s_minor):
8827   """Generate a drbd8 device complete with its children.
8828
8829   """
8830   assert len(vgnames) == len(names) == 2
8831   port = lu.cfg.AllocatePort()
8832   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8833
8834   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8835                           logical_id=(vgnames[0], names[0]),
8836                           params={})
8837   dev_meta = objects.Disk(dev_type=constants.LD_LV,
8838                           size=constants.DRBD_META_SIZE,
8839                           logical_id=(vgnames[1], names[1]),
8840                           params={})
8841   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8842                           logical_id=(primary, secondary, port,
8843                                       p_minor, s_minor,
8844                                       shared_secret),
8845                           children=[dev_data, dev_meta],
8846                           iv_name=iv_name, params={})
8847   return drbd_dev
8848
8849
8850 _DISK_TEMPLATE_NAME_PREFIX = {
8851   constants.DT_PLAIN: "",
8852   constants.DT_RBD: ".rbd",
8853   }
8854
8855
8856 _DISK_TEMPLATE_DEVICE_TYPE = {
8857   constants.DT_PLAIN: constants.LD_LV,
8858   constants.DT_FILE: constants.LD_FILE,
8859   constants.DT_SHARED_FILE: constants.LD_FILE,
8860   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8861   constants.DT_RBD: constants.LD_RBD,
8862   }
8863
8864
8865 def _GenerateDiskTemplate(
8866   lu, template_name, instance_name, primary_node, secondary_nodes,
8867   disk_info, file_storage_dir, file_driver, base_index,
8868   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8869   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8870   """Generate the entire disk layout for a given template type.
8871
8872   """
8873   #TODO: compute space requirements
8874
8875   vgname = lu.cfg.GetVGName()
8876   disk_count = len(disk_info)
8877   disks = []
8878
8879   if template_name == constants.DT_DISKLESS:
8880     pass
8881   elif template_name == constants.DT_DRBD8:
8882     if len(secondary_nodes) != 1:
8883       raise errors.ProgrammerError("Wrong template configuration")
8884     remote_node = secondary_nodes[0]
8885     minors = lu.cfg.AllocateDRBDMinor(
8886       [primary_node, remote_node] * len(disk_info), instance_name)
8887
8888     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8889                                                        full_disk_params)
8890     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8891
8892     names = []
8893     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8894                                                for i in range(disk_count)]):
8895       names.append(lv_prefix + "_data")
8896       names.append(lv_prefix + "_meta")
8897     for idx, disk in enumerate(disk_info):
8898       disk_index = idx + base_index
8899       data_vg = disk.get(constants.IDISK_VG, vgname)
8900       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8901       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8902                                       disk[constants.IDISK_SIZE],
8903                                       [data_vg, meta_vg],
8904                                       names[idx * 2:idx * 2 + 2],
8905                                       "disk/%d" % disk_index,
8906                                       minors[idx * 2], minors[idx * 2 + 1])
8907       disk_dev.mode = disk[constants.IDISK_MODE]
8908       disks.append(disk_dev)
8909   else:
8910     if secondary_nodes:
8911       raise errors.ProgrammerError("Wrong template configuration")
8912
8913     if template_name == constants.DT_FILE:
8914       _req_file_storage()
8915     elif template_name == constants.DT_SHARED_FILE:
8916       _req_shr_file_storage()
8917
8918     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8919     if name_prefix is None:
8920       names = None
8921     else:
8922       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8923                                         (name_prefix, base_index + i)
8924                                         for i in range(disk_count)])
8925
8926     if template_name == constants.DT_PLAIN:
8927       def logical_id_fn(idx, _, disk):
8928         vg = disk.get(constants.IDISK_VG, vgname)
8929         return (vg, names[idx])
8930     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8931       logical_id_fn = \
8932         lambda _, disk_index, disk: (file_driver,
8933                                      "%s/disk%d" % (file_storage_dir,
8934                                                     disk_index))
8935     elif template_name == constants.DT_BLOCK:
8936       logical_id_fn = \
8937         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8938                                        disk[constants.IDISK_ADOPT])
8939     elif template_name == constants.DT_RBD:
8940       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8941     else:
8942       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8943
8944     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8945
8946     for idx, disk in enumerate(disk_info):
8947       disk_index = idx + base_index
8948       size = disk[constants.IDISK_SIZE]
8949       feedback_fn("* disk %s, size %s" %
8950                   (disk_index, utils.FormatUnit(size, "h")))
8951       disks.append(objects.Disk(dev_type=dev_type, size=size,
8952                                 logical_id=logical_id_fn(idx, disk_index, disk),
8953                                 iv_name="disk/%d" % disk_index,
8954                                 mode=disk[constants.IDISK_MODE],
8955                                 params={}))
8956
8957   return disks
8958
8959
8960 def _GetInstanceInfoText(instance):
8961   """Compute that text that should be added to the disk's metadata.
8962
8963   """
8964   return "originstname+%s" % instance.name
8965
8966
8967 def _CalcEta(time_taken, written, total_size):
8968   """Calculates the ETA based on size written and total size.
8969
8970   @param time_taken: The time taken so far
8971   @param written: amount written so far
8972   @param total_size: The total size of data to be written
8973   @return: The remaining time in seconds
8974
8975   """
8976   avg_time = time_taken / float(written)
8977   return (total_size - written) * avg_time
8978
8979
8980 def _WipeDisks(lu, instance, disks=None):
8981   """Wipes instance disks.
8982
8983   @type lu: L{LogicalUnit}
8984   @param lu: the logical unit on whose behalf we execute
8985   @type instance: L{objects.Instance}
8986   @param instance: the instance whose disks we should create
8987   @return: the success of the wipe
8988
8989   """
8990   node = instance.primary_node
8991
8992   if disks is None:
8993     disks = [(idx, disk, 0)
8994              for (idx, disk) in enumerate(instance.disks)]
8995
8996   for (_, device, _) in disks:
8997     lu.cfg.SetDiskID(device, node)
8998
8999   logging.info("Pausing synchronization of disks of instance '%s'",
9000                instance.name)
9001   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9002                                                   (map(compat.snd, disks),
9003                                                    instance),
9004                                                   True)
9005   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9006
9007   for idx, success in enumerate(result.payload):
9008     if not success:
9009       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9010                    " failed", idx, instance.name)
9011
9012   try:
9013     for (idx, device, offset) in disks:
9014       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9015       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9016       wipe_chunk_size = \
9017         int(min(constants.MAX_WIPE_CHUNK,
9018                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9019
9020       size = device.size
9021       last_output = 0
9022       start_time = time.time()
9023
9024       if offset == 0:
9025         info_text = ""
9026       else:
9027         info_text = (" (from %s to %s)" %
9028                      (utils.FormatUnit(offset, "h"),
9029                       utils.FormatUnit(size, "h")))
9030
9031       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9032
9033       logging.info("Wiping disk %d for instance %s on node %s using"
9034                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9035
9036       while offset < size:
9037         wipe_size = min(wipe_chunk_size, size - offset)
9038
9039         logging.debug("Wiping disk %d, offset %s, chunk %s",
9040                       idx, offset, wipe_size)
9041
9042         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9043                                            wipe_size)
9044         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9045                      (idx, offset, wipe_size))
9046
9047         now = time.time()
9048         offset += wipe_size
9049         if now - last_output >= 60:
9050           eta = _CalcEta(now - start_time, offset, size)
9051           lu.LogInfo(" - done: %.1f%% ETA: %s",
9052                      offset / float(size) * 100, utils.FormatSeconds(eta))
9053           last_output = now
9054   finally:
9055     logging.info("Resuming synchronization of disks for instance '%s'",
9056                  instance.name)
9057
9058     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9059                                                     (map(compat.snd, disks),
9060                                                      instance),
9061                                                     False)
9062
9063     if result.fail_msg:
9064       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9065                     node, result.fail_msg)
9066     else:
9067       for idx, success in enumerate(result.payload):
9068         if not success:
9069           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9070                         " failed", idx, instance.name)
9071
9072
9073 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9074   """Create all disks for an instance.
9075
9076   This abstracts away some work from AddInstance.
9077
9078   @type lu: L{LogicalUnit}
9079   @param lu: the logical unit on whose behalf we execute
9080   @type instance: L{objects.Instance}
9081   @param instance: the instance whose disks we should create
9082   @type to_skip: list
9083   @param to_skip: list of indices to skip
9084   @type target_node: string
9085   @param target_node: if passed, overrides the target node for creation
9086   @rtype: boolean
9087   @return: the success of the creation
9088
9089   """
9090   info = _GetInstanceInfoText(instance)
9091   if target_node is None:
9092     pnode = instance.primary_node
9093     all_nodes = instance.all_nodes
9094   else:
9095     pnode = target_node
9096     all_nodes = [pnode]
9097
9098   if instance.disk_template in constants.DTS_FILEBASED:
9099     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9100     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9101
9102     result.Raise("Failed to create directory '%s' on"
9103                  " node %s" % (file_storage_dir, pnode))
9104
9105   # Note: this needs to be kept in sync with adding of disks in
9106   # LUInstanceSetParams
9107   for idx, device in enumerate(instance.disks):
9108     if to_skip and idx in to_skip:
9109       continue
9110     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9111     #HARDCODE
9112     for node in all_nodes:
9113       f_create = node == pnode
9114       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9115
9116
9117 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9118   """Remove all disks for an instance.
9119
9120   This abstracts away some work from `AddInstance()` and
9121   `RemoveInstance()`. Note that in case some of the devices couldn't
9122   be removed, the removal will continue with the other ones (compare
9123   with `_CreateDisks()`).
9124
9125   @type lu: L{LogicalUnit}
9126   @param lu: the logical unit on whose behalf we execute
9127   @type instance: L{objects.Instance}
9128   @param instance: the instance whose disks we should remove
9129   @type target_node: string
9130   @param target_node: used to override the node on which to remove the disks
9131   @rtype: boolean
9132   @return: the success of the removal
9133
9134   """
9135   logging.info("Removing block devices for instance %s", instance.name)
9136
9137   all_result = True
9138   ports_to_release = set()
9139   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9140   for (idx, device) in enumerate(anno_disks):
9141     if target_node:
9142       edata = [(target_node, device)]
9143     else:
9144       edata = device.ComputeNodeTree(instance.primary_node)
9145     for node, disk in edata:
9146       lu.cfg.SetDiskID(disk, node)
9147       result = lu.rpc.call_blockdev_remove(node, disk)
9148       if result.fail_msg:
9149         lu.LogWarning("Could not remove disk %s on node %s,"
9150                       " continuing anyway: %s", idx, node, result.fail_msg)
9151         if not (result.offline and node != instance.primary_node):
9152           all_result = False
9153
9154     # if this is a DRBD disk, return its port to the pool
9155     if device.dev_type in constants.LDS_DRBD:
9156       ports_to_release.add(device.logical_id[2])
9157
9158   if all_result or ignore_failures:
9159     for port in ports_to_release:
9160       lu.cfg.AddTcpUdpPort(port)
9161
9162   if instance.disk_template == constants.DT_FILE:
9163     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9164     if target_node:
9165       tgt = target_node
9166     else:
9167       tgt = instance.primary_node
9168     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9169     if result.fail_msg:
9170       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9171                     file_storage_dir, instance.primary_node, result.fail_msg)
9172       all_result = False
9173
9174   return all_result
9175
9176
9177 def _ComputeDiskSizePerVG(disk_template, disks):
9178   """Compute disk size requirements in the volume group
9179
9180   """
9181   def _compute(disks, payload):
9182     """Universal algorithm.
9183
9184     """
9185     vgs = {}
9186     for disk in disks:
9187       vgs[disk[constants.IDISK_VG]] = \
9188         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9189
9190     return vgs
9191
9192   # Required free disk space as a function of disk and swap space
9193   req_size_dict = {
9194     constants.DT_DISKLESS: {},
9195     constants.DT_PLAIN: _compute(disks, 0),
9196     # 128 MB are added for drbd metadata for each disk
9197     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9198     constants.DT_FILE: {},
9199     constants.DT_SHARED_FILE: {},
9200   }
9201
9202   if disk_template not in req_size_dict:
9203     raise errors.ProgrammerError("Disk template '%s' size requirement"
9204                                  " is unknown" % disk_template)
9205
9206   return req_size_dict[disk_template]
9207
9208
9209 def _FilterVmNodes(lu, nodenames):
9210   """Filters out non-vm_capable nodes from a list.
9211
9212   @type lu: L{LogicalUnit}
9213   @param lu: the logical unit for which we check
9214   @type nodenames: list
9215   @param nodenames: the list of nodes on which we should check
9216   @rtype: list
9217   @return: the list of vm-capable nodes
9218
9219   """
9220   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9221   return [name for name in nodenames if name not in vm_nodes]
9222
9223
9224 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9225   """Hypervisor parameter validation.
9226
9227   This function abstract the hypervisor parameter validation to be
9228   used in both instance create and instance modify.
9229
9230   @type lu: L{LogicalUnit}
9231   @param lu: the logical unit for which we check
9232   @type nodenames: list
9233   @param nodenames: the list of nodes on which we should check
9234   @type hvname: string
9235   @param hvname: the name of the hypervisor we should use
9236   @type hvparams: dict
9237   @param hvparams: the parameters which we need to check
9238   @raise errors.OpPrereqError: if the parameters are not valid
9239
9240   """
9241   nodenames = _FilterVmNodes(lu, nodenames)
9242
9243   cluster = lu.cfg.GetClusterInfo()
9244   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9245
9246   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9247   for node in nodenames:
9248     info = hvinfo[node]
9249     if info.offline:
9250       continue
9251     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9252
9253
9254 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9255   """OS parameters validation.
9256
9257   @type lu: L{LogicalUnit}
9258   @param lu: the logical unit for which we check
9259   @type required: boolean
9260   @param required: whether the validation should fail if the OS is not
9261       found
9262   @type nodenames: list
9263   @param nodenames: the list of nodes on which we should check
9264   @type osname: string
9265   @param osname: the name of the hypervisor we should use
9266   @type osparams: dict
9267   @param osparams: the parameters which we need to check
9268   @raise errors.OpPrereqError: if the parameters are not valid
9269
9270   """
9271   nodenames = _FilterVmNodes(lu, nodenames)
9272   result = lu.rpc.call_os_validate(nodenames, required, osname,
9273                                    [constants.OS_VALIDATE_PARAMETERS],
9274                                    osparams)
9275   for node, nres in result.items():
9276     # we don't check for offline cases since this should be run only
9277     # against the master node and/or an instance's nodes
9278     nres.Raise("OS Parameters validation failed on node %s" % node)
9279     if not nres.payload:
9280       lu.LogInfo("OS %s not found on node %s, validation skipped",
9281                  osname, node)
9282
9283
9284 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9285   """Wrapper around IAReqInstanceAlloc.
9286
9287   @param op: The instance opcode
9288   @param disks: The computed disks
9289   @param nics: The computed nics
9290   @param beparams: The full filled beparams
9291
9292   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9293
9294   """
9295   spindle_use = beparams[constants.BE_SPINDLE_USE]
9296   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9297                                        disk_template=op.disk_template,
9298                                        tags=op.tags,
9299                                        os=op.os_type,
9300                                        vcpus=beparams[constants.BE_VCPUS],
9301                                        memory=beparams[constants.BE_MAXMEM],
9302                                        spindle_use=spindle_use,
9303                                        disks=disks,
9304                                        nics=[n.ToDict() for n in nics],
9305                                        hypervisor=op.hypervisor)
9306
9307
9308 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9309   """Computes the nics.
9310
9311   @param op: The instance opcode
9312   @param cluster: Cluster configuration object
9313   @param default_ip: The default ip to assign
9314   @param cfg: An instance of the configuration object
9315   @param proc: The executer instance
9316
9317   @returns: The build up nics
9318
9319   """
9320   nics = []
9321   for idx, nic in enumerate(op.nics):
9322     nic_mode_req = nic.get(constants.INIC_MODE, None)
9323     nic_mode = nic_mode_req
9324     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9325       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9326
9327     # in routed mode, for the first nic, the default ip is 'auto'
9328     if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9329       default_ip_mode = constants.VALUE_AUTO
9330     else:
9331       default_ip_mode = constants.VALUE_NONE
9332
9333     # ip validity checks
9334     ip = nic.get(constants.INIC_IP, default_ip_mode)
9335     if ip is None or ip.lower() == constants.VALUE_NONE:
9336       nic_ip = None
9337     elif ip.lower() == constants.VALUE_AUTO:
9338       if not op.name_check:
9339         raise errors.OpPrereqError("IP address set to auto but name checks"
9340                                    " have been skipped",
9341                                    errors.ECODE_INVAL)
9342       nic_ip = default_ip
9343     else:
9344       if not netutils.IPAddress.IsValid(ip):
9345         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9346                                    errors.ECODE_INVAL)
9347       nic_ip = ip
9348
9349     # TODO: check the ip address for uniqueness
9350     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9351       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9352                                  errors.ECODE_INVAL)
9353
9354     # MAC address verification
9355     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9356     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9357       mac = utils.NormalizeAndValidateMac(mac)
9358
9359       try:
9360         # TODO: We need to factor this out
9361         cfg.ReserveMAC(mac, proc.GetECId())
9362       except errors.ReservationError:
9363         raise errors.OpPrereqError("MAC address %s already in use"
9364                                    " in cluster" % mac,
9365                                    errors.ECODE_NOTUNIQUE)
9366
9367     #  Build nic parameters
9368     link = nic.get(constants.INIC_LINK, None)
9369     if link == constants.VALUE_AUTO:
9370       link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9371     nicparams = {}
9372     if nic_mode_req:
9373       nicparams[constants.NIC_MODE] = nic_mode
9374     if link:
9375       nicparams[constants.NIC_LINK] = link
9376
9377     check_params = cluster.SimpleFillNIC(nicparams)
9378     objects.NIC.CheckParameterSyntax(check_params)
9379     nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9380
9381   return nics
9382
9383
9384 def _ComputeDisks(op, default_vg):
9385   """Computes the instance disks.
9386
9387   @param op: The instance opcode
9388   @param default_vg: The default_vg to assume
9389
9390   @return: The computer disks
9391
9392   """
9393   disks = []
9394   for disk in op.disks:
9395     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9396     if mode not in constants.DISK_ACCESS_SET:
9397       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9398                                  mode, errors.ECODE_INVAL)
9399     size = disk.get(constants.IDISK_SIZE, None)
9400     if size is None:
9401       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9402     try:
9403       size = int(size)
9404     except (TypeError, ValueError):
9405       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9406                                  errors.ECODE_INVAL)
9407
9408     data_vg = disk.get(constants.IDISK_VG, default_vg)
9409     new_disk = {
9410       constants.IDISK_SIZE: size,
9411       constants.IDISK_MODE: mode,
9412       constants.IDISK_VG: data_vg,
9413       }
9414     if constants.IDISK_METAVG in disk:
9415       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9416     if constants.IDISK_ADOPT in disk:
9417       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9418     disks.append(new_disk)
9419
9420   return disks
9421
9422
9423 def _ComputeFullBeParams(op, cluster):
9424   """Computes the full beparams.
9425
9426   @param op: The instance opcode
9427   @param cluster: The cluster config object
9428
9429   @return: The fully filled beparams
9430
9431   """
9432   default_beparams = cluster.beparams[constants.PP_DEFAULT]
9433   for param, value in op.beparams.iteritems():
9434     if value == constants.VALUE_AUTO:
9435       op.beparams[param] = default_beparams[param]
9436   objects.UpgradeBeParams(op.beparams)
9437   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9438   return cluster.SimpleFillBE(op.beparams)
9439
9440
9441 class LUInstanceCreate(LogicalUnit):
9442   """Create an instance.
9443
9444   """
9445   HPATH = "instance-add"
9446   HTYPE = constants.HTYPE_INSTANCE
9447   REQ_BGL = False
9448
9449   def CheckArguments(self):
9450     """Check arguments.
9451
9452     """
9453     # do not require name_check to ease forward/backward compatibility
9454     # for tools
9455     if self.op.no_install and self.op.start:
9456       self.LogInfo("No-installation mode selected, disabling startup")
9457       self.op.start = False
9458     # validate/normalize the instance name
9459     self.op.instance_name = \
9460       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9461
9462     if self.op.ip_check and not self.op.name_check:
9463       # TODO: make the ip check more flexible and not depend on the name check
9464       raise errors.OpPrereqError("Cannot do IP address check without a name"
9465                                  " check", errors.ECODE_INVAL)
9466
9467     # check nics' parameter names
9468     for nic in self.op.nics:
9469       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9470
9471     # check disks. parameter names and consistent adopt/no-adopt strategy
9472     has_adopt = has_no_adopt = False
9473     for disk in self.op.disks:
9474       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9475       if constants.IDISK_ADOPT in disk:
9476         has_adopt = True
9477       else:
9478         has_no_adopt = True
9479     if has_adopt and has_no_adopt:
9480       raise errors.OpPrereqError("Either all disks are adopted or none is",
9481                                  errors.ECODE_INVAL)
9482     if has_adopt:
9483       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9484         raise errors.OpPrereqError("Disk adoption is not supported for the"
9485                                    " '%s' disk template" %
9486                                    self.op.disk_template,
9487                                    errors.ECODE_INVAL)
9488       if self.op.iallocator is not None:
9489         raise errors.OpPrereqError("Disk adoption not allowed with an"
9490                                    " iallocator script", errors.ECODE_INVAL)
9491       if self.op.mode == constants.INSTANCE_IMPORT:
9492         raise errors.OpPrereqError("Disk adoption not allowed for"
9493                                    " instance import", errors.ECODE_INVAL)
9494     else:
9495       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9496         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9497                                    " but no 'adopt' parameter given" %
9498                                    self.op.disk_template,
9499                                    errors.ECODE_INVAL)
9500
9501     self.adopt_disks = has_adopt
9502
9503     # instance name verification
9504     if self.op.name_check:
9505       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9506       self.op.instance_name = self.hostname1.name
9507       # used in CheckPrereq for ip ping check
9508       self.check_ip = self.hostname1.ip
9509     else:
9510       self.check_ip = None
9511
9512     # file storage checks
9513     if (self.op.file_driver and
9514         not self.op.file_driver in constants.FILE_DRIVER):
9515       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9516                                  self.op.file_driver, errors.ECODE_INVAL)
9517
9518     if self.op.disk_template == constants.DT_FILE:
9519       opcodes.RequireFileStorage()
9520     elif self.op.disk_template == constants.DT_SHARED_FILE:
9521       opcodes.RequireSharedFileStorage()
9522
9523     ### Node/iallocator related checks
9524     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9525
9526     if self.op.pnode is not None:
9527       if self.op.disk_template in constants.DTS_INT_MIRROR:
9528         if self.op.snode is None:
9529           raise errors.OpPrereqError("The networked disk templates need"
9530                                      " a mirror node", errors.ECODE_INVAL)
9531       elif self.op.snode:
9532         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9533                         " template")
9534         self.op.snode = None
9535
9536     self._cds = _GetClusterDomainSecret()
9537
9538     if self.op.mode == constants.INSTANCE_IMPORT:
9539       # On import force_variant must be True, because if we forced it at
9540       # initial install, our only chance when importing it back is that it
9541       # works again!
9542       self.op.force_variant = True
9543
9544       if self.op.no_install:
9545         self.LogInfo("No-installation mode has no effect during import")
9546
9547     elif self.op.mode == constants.INSTANCE_CREATE:
9548       if self.op.os_type is None:
9549         raise errors.OpPrereqError("No guest OS specified",
9550                                    errors.ECODE_INVAL)
9551       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9552         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9553                                    " installation" % self.op.os_type,
9554                                    errors.ECODE_STATE)
9555       if self.op.disk_template is None:
9556         raise errors.OpPrereqError("No disk template specified",
9557                                    errors.ECODE_INVAL)
9558
9559     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9560       # Check handshake to ensure both clusters have the same domain secret
9561       src_handshake = self.op.source_handshake
9562       if not src_handshake:
9563         raise errors.OpPrereqError("Missing source handshake",
9564                                    errors.ECODE_INVAL)
9565
9566       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9567                                                            src_handshake)
9568       if errmsg:
9569         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9570                                    errors.ECODE_INVAL)
9571
9572       # Load and check source CA
9573       self.source_x509_ca_pem = self.op.source_x509_ca
9574       if not self.source_x509_ca_pem:
9575         raise errors.OpPrereqError("Missing source X509 CA",
9576                                    errors.ECODE_INVAL)
9577
9578       try:
9579         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9580                                                     self._cds)
9581       except OpenSSL.crypto.Error, err:
9582         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9583                                    (err, ), errors.ECODE_INVAL)
9584
9585       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9586       if errcode is not None:
9587         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9588                                    errors.ECODE_INVAL)
9589
9590       self.source_x509_ca = cert
9591
9592       src_instance_name = self.op.source_instance_name
9593       if not src_instance_name:
9594         raise errors.OpPrereqError("Missing source instance name",
9595                                    errors.ECODE_INVAL)
9596
9597       self.source_instance_name = \
9598           netutils.GetHostname(name=src_instance_name).name
9599
9600     else:
9601       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9602                                  self.op.mode, errors.ECODE_INVAL)
9603
9604   def ExpandNames(self):
9605     """ExpandNames for CreateInstance.
9606
9607     Figure out the right locks for instance creation.
9608
9609     """
9610     self.needed_locks = {}
9611
9612     instance_name = self.op.instance_name
9613     # this is just a preventive check, but someone might still add this
9614     # instance in the meantime, and creation will fail at lock-add time
9615     if instance_name in self.cfg.GetInstanceList():
9616       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9617                                  instance_name, errors.ECODE_EXISTS)
9618
9619     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9620
9621     if self.op.iallocator:
9622       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9623       # specifying a group on instance creation and then selecting nodes from
9624       # that group
9625       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9626       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9627     else:
9628       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9629       nodelist = [self.op.pnode]
9630       if self.op.snode is not None:
9631         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9632         nodelist.append(self.op.snode)
9633       self.needed_locks[locking.LEVEL_NODE] = nodelist
9634       # Lock resources of instance's primary and secondary nodes (copy to
9635       # prevent accidential modification)
9636       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9637
9638     # in case of import lock the source node too
9639     if self.op.mode == constants.INSTANCE_IMPORT:
9640       src_node = self.op.src_node
9641       src_path = self.op.src_path
9642
9643       if src_path is None:
9644         self.op.src_path = src_path = self.op.instance_name
9645
9646       if src_node is None:
9647         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9648         self.op.src_node = None
9649         if os.path.isabs(src_path):
9650           raise errors.OpPrereqError("Importing an instance from a path"
9651                                      " requires a source node option",
9652                                      errors.ECODE_INVAL)
9653       else:
9654         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9655         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9656           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9657         if not os.path.isabs(src_path):
9658           self.op.src_path = src_path = \
9659             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9660
9661   def _RunAllocator(self):
9662     """Run the allocator based on input opcode.
9663
9664     """
9665     req = _CreateInstanceAllocRequest(self.op, self.disks,
9666                                       self.nics, self.be_full)
9667     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9668
9669     ial.Run(self.op.iallocator)
9670
9671     if not ial.success:
9672       raise errors.OpPrereqError("Can't compute nodes using"
9673                                  " iallocator '%s': %s" %
9674                                  (self.op.iallocator, ial.info),
9675                                  errors.ECODE_NORES)
9676     self.op.pnode = ial.result[0]
9677     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9678                  self.op.instance_name, self.op.iallocator,
9679                  utils.CommaJoin(ial.result))
9680
9681     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9682
9683     if req.RequiredNodes() == 2:
9684       self.op.snode = ial.result[1]
9685
9686   def BuildHooksEnv(self):
9687     """Build hooks env.
9688
9689     This runs on master, primary and secondary nodes of the instance.
9690
9691     """
9692     env = {
9693       "ADD_MODE": self.op.mode,
9694       }
9695     if self.op.mode == constants.INSTANCE_IMPORT:
9696       env["SRC_NODE"] = self.op.src_node
9697       env["SRC_PATH"] = self.op.src_path
9698       env["SRC_IMAGES"] = self.src_images
9699
9700     env.update(_BuildInstanceHookEnv(
9701       name=self.op.instance_name,
9702       primary_node=self.op.pnode,
9703       secondary_nodes=self.secondaries,
9704       status=self.op.start,
9705       os_type=self.op.os_type,
9706       minmem=self.be_full[constants.BE_MINMEM],
9707       maxmem=self.be_full[constants.BE_MAXMEM],
9708       vcpus=self.be_full[constants.BE_VCPUS],
9709       nics=_NICListToTuple(self, self.nics),
9710       disk_template=self.op.disk_template,
9711       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9712              for d in self.disks],
9713       bep=self.be_full,
9714       hvp=self.hv_full,
9715       hypervisor_name=self.op.hypervisor,
9716       tags=self.op.tags,
9717     ))
9718
9719     return env
9720
9721   def BuildHooksNodes(self):
9722     """Build hooks nodes.
9723
9724     """
9725     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9726     return nl, nl
9727
9728   def _ReadExportInfo(self):
9729     """Reads the export information from disk.
9730
9731     It will override the opcode source node and path with the actual
9732     information, if these two were not specified before.
9733
9734     @return: the export information
9735
9736     """
9737     assert self.op.mode == constants.INSTANCE_IMPORT
9738
9739     src_node = self.op.src_node
9740     src_path = self.op.src_path
9741
9742     if src_node is None:
9743       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9744       exp_list = self.rpc.call_export_list(locked_nodes)
9745       found = False
9746       for node in exp_list:
9747         if exp_list[node].fail_msg:
9748           continue
9749         if src_path in exp_list[node].payload:
9750           found = True
9751           self.op.src_node = src_node = node
9752           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9753                                                        src_path)
9754           break
9755       if not found:
9756         raise errors.OpPrereqError("No export found for relative path %s" %
9757                                     src_path, errors.ECODE_INVAL)
9758
9759     _CheckNodeOnline(self, src_node)
9760     result = self.rpc.call_export_info(src_node, src_path)
9761     result.Raise("No export or invalid export found in dir %s" % src_path)
9762
9763     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9764     if not export_info.has_section(constants.INISECT_EXP):
9765       raise errors.ProgrammerError("Corrupted export config",
9766                                    errors.ECODE_ENVIRON)
9767
9768     ei_version = export_info.get(constants.INISECT_EXP, "version")
9769     if (int(ei_version) != constants.EXPORT_VERSION):
9770       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9771                                  (ei_version, constants.EXPORT_VERSION),
9772                                  errors.ECODE_ENVIRON)
9773     return export_info
9774
9775   def _ReadExportParams(self, einfo):
9776     """Use export parameters as defaults.
9777
9778     In case the opcode doesn't specify (as in override) some instance
9779     parameters, then try to use them from the export information, if
9780     that declares them.
9781
9782     """
9783     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9784
9785     if self.op.disk_template is None:
9786       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9787         self.op.disk_template = einfo.get(constants.INISECT_INS,
9788                                           "disk_template")
9789         if self.op.disk_template not in constants.DISK_TEMPLATES:
9790           raise errors.OpPrereqError("Disk template specified in configuration"
9791                                      " file is not one of the allowed values:"
9792                                      " %s" %
9793                                      " ".join(constants.DISK_TEMPLATES),
9794                                      errors.ECODE_INVAL)
9795       else:
9796         raise errors.OpPrereqError("No disk template specified and the export"
9797                                    " is missing the disk_template information",
9798                                    errors.ECODE_INVAL)
9799
9800     if not self.op.disks:
9801       disks = []
9802       # TODO: import the disk iv_name too
9803       for idx in range(constants.MAX_DISKS):
9804         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9805           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9806           disks.append({constants.IDISK_SIZE: disk_sz})
9807       self.op.disks = disks
9808       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9809         raise errors.OpPrereqError("No disk info specified and the export"
9810                                    " is missing the disk information",
9811                                    errors.ECODE_INVAL)
9812
9813     if not self.op.nics:
9814       nics = []
9815       for idx in range(constants.MAX_NICS):
9816         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9817           ndict = {}
9818           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9819             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9820             ndict[name] = v
9821           nics.append(ndict)
9822         else:
9823           break
9824       self.op.nics = nics
9825
9826     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9827       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9828
9829     if (self.op.hypervisor is None and
9830         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9831       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9832
9833     if einfo.has_section(constants.INISECT_HYP):
9834       # use the export parameters but do not override the ones
9835       # specified by the user
9836       for name, value in einfo.items(constants.INISECT_HYP):
9837         if name not in self.op.hvparams:
9838           self.op.hvparams[name] = value
9839
9840     if einfo.has_section(constants.INISECT_BEP):
9841       # use the parameters, without overriding
9842       for name, value in einfo.items(constants.INISECT_BEP):
9843         if name not in self.op.beparams:
9844           self.op.beparams[name] = value
9845         # Compatibility for the old "memory" be param
9846         if name == constants.BE_MEMORY:
9847           if constants.BE_MAXMEM not in self.op.beparams:
9848             self.op.beparams[constants.BE_MAXMEM] = value
9849           if constants.BE_MINMEM not in self.op.beparams:
9850             self.op.beparams[constants.BE_MINMEM] = value
9851     else:
9852       # try to read the parameters old style, from the main section
9853       for name in constants.BES_PARAMETERS:
9854         if (name not in self.op.beparams and
9855             einfo.has_option(constants.INISECT_INS, name)):
9856           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9857
9858     if einfo.has_section(constants.INISECT_OSP):
9859       # use the parameters, without overriding
9860       for name, value in einfo.items(constants.INISECT_OSP):
9861         if name not in self.op.osparams:
9862           self.op.osparams[name] = value
9863
9864   def _RevertToDefaults(self, cluster):
9865     """Revert the instance parameters to the default values.
9866
9867     """
9868     # hvparams
9869     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9870     for name in self.op.hvparams.keys():
9871       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9872         del self.op.hvparams[name]
9873     # beparams
9874     be_defs = cluster.SimpleFillBE({})
9875     for name in self.op.beparams.keys():
9876       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9877         del self.op.beparams[name]
9878     # nic params
9879     nic_defs = cluster.SimpleFillNIC({})
9880     for nic in self.op.nics:
9881       for name in constants.NICS_PARAMETERS:
9882         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9883           del nic[name]
9884     # osparams
9885     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9886     for name in self.op.osparams.keys():
9887       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9888         del self.op.osparams[name]
9889
9890   def _CalculateFileStorageDir(self):
9891     """Calculate final instance file storage dir.
9892
9893     """
9894     # file storage dir calculation/check
9895     self.instance_file_storage_dir = None
9896     if self.op.disk_template in constants.DTS_FILEBASED:
9897       # build the full file storage dir path
9898       joinargs = []
9899
9900       if self.op.disk_template == constants.DT_SHARED_FILE:
9901         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9902       else:
9903         get_fsd_fn = self.cfg.GetFileStorageDir
9904
9905       cfg_storagedir = get_fsd_fn()
9906       if not cfg_storagedir:
9907         raise errors.OpPrereqError("Cluster file storage dir not defined",
9908                                    errors.ECODE_STATE)
9909       joinargs.append(cfg_storagedir)
9910
9911       if self.op.file_storage_dir is not None:
9912         joinargs.append(self.op.file_storage_dir)
9913
9914       joinargs.append(self.op.instance_name)
9915
9916       # pylint: disable=W0142
9917       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9918
9919   def CheckPrereq(self): # pylint: disable=R0914
9920     """Check prerequisites.
9921
9922     """
9923     self._CalculateFileStorageDir()
9924
9925     if self.op.mode == constants.INSTANCE_IMPORT:
9926       export_info = self._ReadExportInfo()
9927       self._ReadExportParams(export_info)
9928       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9929     else:
9930       self._old_instance_name = None
9931
9932     if (not self.cfg.GetVGName() and
9933         self.op.disk_template not in constants.DTS_NOT_LVM):
9934       raise errors.OpPrereqError("Cluster does not support lvm-based"
9935                                  " instances", errors.ECODE_STATE)
9936
9937     if (self.op.hypervisor is None or
9938         self.op.hypervisor == constants.VALUE_AUTO):
9939       self.op.hypervisor = self.cfg.GetHypervisorType()
9940
9941     cluster = self.cfg.GetClusterInfo()
9942     enabled_hvs = cluster.enabled_hypervisors
9943     if self.op.hypervisor not in enabled_hvs:
9944       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9945                                  " cluster (%s)" %
9946                                  (self.op.hypervisor, ",".join(enabled_hvs)),
9947                                  errors.ECODE_STATE)
9948
9949     # Check tag validity
9950     for tag in self.op.tags:
9951       objects.TaggableObject.ValidateTag(tag)
9952
9953     # check hypervisor parameter syntax (locally)
9954     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9955     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9956                                       self.op.hvparams)
9957     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9958     hv_type.CheckParameterSyntax(filled_hvp)
9959     self.hv_full = filled_hvp
9960     # check that we don't specify global parameters on an instance
9961     _CheckGlobalHvParams(self.op.hvparams)
9962
9963     # fill and remember the beparams dict
9964     self.be_full = _ComputeFullBeParams(self.op, cluster)
9965
9966     # build os parameters
9967     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9968
9969     # now that hvp/bep are in final format, let's reset to defaults,
9970     # if told to do so
9971     if self.op.identify_defaults:
9972       self._RevertToDefaults(cluster)
9973
9974     # NIC buildup
9975     self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
9976                              self.proc)
9977
9978     # disk checks/pre-build
9979     default_vg = self.cfg.GetVGName()
9980     self.disks = _ComputeDisks(self.op, default_vg)
9981
9982     if self.op.mode == constants.INSTANCE_IMPORT:
9983       disk_images = []
9984       for idx in range(len(self.disks)):
9985         option = "disk%d_dump" % idx
9986         if export_info.has_option(constants.INISECT_INS, option):
9987           # FIXME: are the old os-es, disk sizes, etc. useful?
9988           export_name = export_info.get(constants.INISECT_INS, option)
9989           image = utils.PathJoin(self.op.src_path, export_name)
9990           disk_images.append(image)
9991         else:
9992           disk_images.append(False)
9993
9994       self.src_images = disk_images
9995
9996       if self.op.instance_name == self._old_instance_name:
9997         for idx, nic in enumerate(self.nics):
9998           if nic.mac == constants.VALUE_AUTO:
9999             nic_mac_ini = "nic%d_mac" % idx
10000             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10001
10002     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10003
10004     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10005     if self.op.ip_check:
10006       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10007         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10008                                    (self.check_ip, self.op.instance_name),
10009                                    errors.ECODE_NOTUNIQUE)
10010
10011     #### mac address generation
10012     # By generating here the mac address both the allocator and the hooks get
10013     # the real final mac address rather than the 'auto' or 'generate' value.
10014     # There is a race condition between the generation and the instance object
10015     # creation, which means that we know the mac is valid now, but we're not
10016     # sure it will be when we actually add the instance. If things go bad
10017     # adding the instance will abort because of a duplicate mac, and the
10018     # creation job will fail.
10019     for nic in self.nics:
10020       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10021         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10022
10023     #### allocator run
10024
10025     if self.op.iallocator is not None:
10026       self._RunAllocator()
10027
10028     # Release all unneeded node locks
10029     _ReleaseLocks(self, locking.LEVEL_NODE,
10030                   keep=filter(None, [self.op.pnode, self.op.snode,
10031                                      self.op.src_node]))
10032     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10033                   keep=filter(None, [self.op.pnode, self.op.snode,
10034                                      self.op.src_node]))
10035
10036     #### node related checks
10037
10038     # check primary node
10039     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10040     assert self.pnode is not None, \
10041       "Cannot retrieve locked node %s" % self.op.pnode
10042     if pnode.offline:
10043       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10044                                  pnode.name, errors.ECODE_STATE)
10045     if pnode.drained:
10046       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10047                                  pnode.name, errors.ECODE_STATE)
10048     if not pnode.vm_capable:
10049       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10050                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10051
10052     self.secondaries = []
10053
10054     # mirror node verification
10055     if self.op.disk_template in constants.DTS_INT_MIRROR:
10056       if self.op.snode == pnode.name:
10057         raise errors.OpPrereqError("The secondary node cannot be the"
10058                                    " primary node", errors.ECODE_INVAL)
10059       _CheckNodeOnline(self, self.op.snode)
10060       _CheckNodeNotDrained(self, self.op.snode)
10061       _CheckNodeVmCapable(self, self.op.snode)
10062       self.secondaries.append(self.op.snode)
10063
10064       snode = self.cfg.GetNodeInfo(self.op.snode)
10065       if pnode.group != snode.group:
10066         self.LogWarning("The primary and secondary nodes are in two"
10067                         " different node groups; the disk parameters"
10068                         " from the first disk's node group will be"
10069                         " used")
10070
10071     nodenames = [pnode.name] + self.secondaries
10072
10073     # Verify instance specs
10074     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10075     ispec = {
10076       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10077       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10078       constants.ISPEC_DISK_COUNT: len(self.disks),
10079       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10080       constants.ISPEC_NIC_COUNT: len(self.nics),
10081       constants.ISPEC_SPINDLE_USE: spindle_use,
10082       }
10083
10084     group_info = self.cfg.GetNodeGroup(pnode.group)
10085     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10086     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10087     if not self.op.ignore_ipolicy and res:
10088       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10089              (pnode.group, group_info.name, utils.CommaJoin(res)))
10090       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10091
10092     if not self.adopt_disks:
10093       if self.op.disk_template == constants.DT_RBD:
10094         # _CheckRADOSFreeSpace() is just a placeholder.
10095         # Any function that checks prerequisites can be placed here.
10096         # Check if there is enough space on the RADOS cluster.
10097         _CheckRADOSFreeSpace()
10098       else:
10099         # Check lv size requirements, if not adopting
10100         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10101         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10102
10103     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10104       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10105                                 disk[constants.IDISK_ADOPT])
10106                      for disk in self.disks])
10107       if len(all_lvs) != len(self.disks):
10108         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10109                                    errors.ECODE_INVAL)
10110       for lv_name in all_lvs:
10111         try:
10112           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10113           # to ReserveLV uses the same syntax
10114           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10115         except errors.ReservationError:
10116           raise errors.OpPrereqError("LV named %s used by another instance" %
10117                                      lv_name, errors.ECODE_NOTUNIQUE)
10118
10119       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10120       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10121
10122       node_lvs = self.rpc.call_lv_list([pnode.name],
10123                                        vg_names.payload.keys())[pnode.name]
10124       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10125       node_lvs = node_lvs.payload
10126
10127       delta = all_lvs.difference(node_lvs.keys())
10128       if delta:
10129         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10130                                    utils.CommaJoin(delta),
10131                                    errors.ECODE_INVAL)
10132       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10133       if online_lvs:
10134         raise errors.OpPrereqError("Online logical volumes found, cannot"
10135                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10136                                    errors.ECODE_STATE)
10137       # update the size of disk based on what is found
10138       for dsk in self.disks:
10139         dsk[constants.IDISK_SIZE] = \
10140           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10141                                         dsk[constants.IDISK_ADOPT])][0]))
10142
10143     elif self.op.disk_template == constants.DT_BLOCK:
10144       # Normalize and de-duplicate device paths
10145       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10146                        for disk in self.disks])
10147       if len(all_disks) != len(self.disks):
10148         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10149                                    errors.ECODE_INVAL)
10150       baddisks = [d for d in all_disks
10151                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10152       if baddisks:
10153         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10154                                    " cannot be adopted" %
10155                                    (", ".join(baddisks),
10156                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10157                                    errors.ECODE_INVAL)
10158
10159       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10160                                             list(all_disks))[pnode.name]
10161       node_disks.Raise("Cannot get block device information from node %s" %
10162                        pnode.name)
10163       node_disks = node_disks.payload
10164       delta = all_disks.difference(node_disks.keys())
10165       if delta:
10166         raise errors.OpPrereqError("Missing block device(s): %s" %
10167                                    utils.CommaJoin(delta),
10168                                    errors.ECODE_INVAL)
10169       for dsk in self.disks:
10170         dsk[constants.IDISK_SIZE] = \
10171           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10172
10173     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10174
10175     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10176     # check OS parameters (remotely)
10177     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10178
10179     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10180
10181     # memory check on primary node
10182     #TODO(dynmem): use MINMEM for checking
10183     if self.op.start:
10184       _CheckNodeFreeMemory(self, self.pnode.name,
10185                            "creating instance %s" % self.op.instance_name,
10186                            self.be_full[constants.BE_MAXMEM],
10187                            self.op.hypervisor)
10188
10189     self.dry_run_result = list(nodenames)
10190
10191   def Exec(self, feedback_fn):
10192     """Create and add the instance to the cluster.
10193
10194     """
10195     instance = self.op.instance_name
10196     pnode_name = self.pnode.name
10197
10198     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10199                 self.owned_locks(locking.LEVEL_NODE)), \
10200       "Node locks differ from node resource locks"
10201
10202     ht_kind = self.op.hypervisor
10203     if ht_kind in constants.HTS_REQ_PORT:
10204       network_port = self.cfg.AllocatePort()
10205     else:
10206       network_port = None
10207
10208     # This is ugly but we got a chicken-egg problem here
10209     # We can only take the group disk parameters, as the instance
10210     # has no disks yet (we are generating them right here).
10211     node = self.cfg.GetNodeInfo(pnode_name)
10212     nodegroup = self.cfg.GetNodeGroup(node.group)
10213     disks = _GenerateDiskTemplate(self,
10214                                   self.op.disk_template,
10215                                   instance, pnode_name,
10216                                   self.secondaries,
10217                                   self.disks,
10218                                   self.instance_file_storage_dir,
10219                                   self.op.file_driver,
10220                                   0,
10221                                   feedback_fn,
10222                                   self.cfg.GetGroupDiskParams(nodegroup))
10223
10224     iobj = objects.Instance(name=instance, os=self.op.os_type,
10225                             primary_node=pnode_name,
10226                             nics=self.nics, disks=disks,
10227                             disk_template=self.op.disk_template,
10228                             admin_state=constants.ADMINST_DOWN,
10229                             network_port=network_port,
10230                             beparams=self.op.beparams,
10231                             hvparams=self.op.hvparams,
10232                             hypervisor=self.op.hypervisor,
10233                             osparams=self.op.osparams,
10234                             )
10235
10236     if self.op.tags:
10237       for tag in self.op.tags:
10238         iobj.AddTag(tag)
10239
10240     if self.adopt_disks:
10241       if self.op.disk_template == constants.DT_PLAIN:
10242         # rename LVs to the newly-generated names; we need to construct
10243         # 'fake' LV disks with the old data, plus the new unique_id
10244         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10245         rename_to = []
10246         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10247           rename_to.append(t_dsk.logical_id)
10248           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10249           self.cfg.SetDiskID(t_dsk, pnode_name)
10250         result = self.rpc.call_blockdev_rename(pnode_name,
10251                                                zip(tmp_disks, rename_to))
10252         result.Raise("Failed to rename adoped LVs")
10253     else:
10254       feedback_fn("* creating instance disks...")
10255       try:
10256         _CreateDisks(self, iobj)
10257       except errors.OpExecError:
10258         self.LogWarning("Device creation failed, reverting...")
10259         try:
10260           _RemoveDisks(self, iobj)
10261         finally:
10262           self.cfg.ReleaseDRBDMinors(instance)
10263           raise
10264
10265     feedback_fn("adding instance %s to cluster config" % instance)
10266
10267     self.cfg.AddInstance(iobj, self.proc.GetECId())
10268
10269     # Declare that we don't want to remove the instance lock anymore, as we've
10270     # added the instance to the config
10271     del self.remove_locks[locking.LEVEL_INSTANCE]
10272
10273     if self.op.mode == constants.INSTANCE_IMPORT:
10274       # Release unused nodes
10275       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10276     else:
10277       # Release all nodes
10278       _ReleaseLocks(self, locking.LEVEL_NODE)
10279
10280     disk_abort = False
10281     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10282       feedback_fn("* wiping instance disks...")
10283       try:
10284         _WipeDisks(self, iobj)
10285       except errors.OpExecError, err:
10286         logging.exception("Wiping disks failed")
10287         self.LogWarning("Wiping instance disks failed (%s)", err)
10288         disk_abort = True
10289
10290     if disk_abort:
10291       # Something is already wrong with the disks, don't do anything else
10292       pass
10293     elif self.op.wait_for_sync:
10294       disk_abort = not _WaitForSync(self, iobj)
10295     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10296       # make sure the disks are not degraded (still sync-ing is ok)
10297       feedback_fn("* checking mirrors status")
10298       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10299     else:
10300       disk_abort = False
10301
10302     if disk_abort:
10303       _RemoveDisks(self, iobj)
10304       self.cfg.RemoveInstance(iobj.name)
10305       # Make sure the instance lock gets removed
10306       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10307       raise errors.OpExecError("There are some degraded disks for"
10308                                " this instance")
10309
10310     # Release all node resource locks
10311     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10312
10313     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10314       # we need to set the disks ID to the primary node, since the
10315       # preceding code might or might have not done it, depending on
10316       # disk template and other options
10317       for disk in iobj.disks:
10318         self.cfg.SetDiskID(disk, pnode_name)
10319       if self.op.mode == constants.INSTANCE_CREATE:
10320         if not self.op.no_install:
10321           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10322                         not self.op.wait_for_sync)
10323           if pause_sync:
10324             feedback_fn("* pausing disk sync to install instance OS")
10325             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10326                                                               (iobj.disks,
10327                                                                iobj), True)
10328             for idx, success in enumerate(result.payload):
10329               if not success:
10330                 logging.warn("pause-sync of instance %s for disk %d failed",
10331                              instance, idx)
10332
10333           feedback_fn("* running the instance OS create scripts...")
10334           # FIXME: pass debug option from opcode to backend
10335           os_add_result = \
10336             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10337                                           self.op.debug_level)
10338           if pause_sync:
10339             feedback_fn("* resuming disk sync")
10340             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10341                                                               (iobj.disks,
10342                                                                iobj), False)
10343             for idx, success in enumerate(result.payload):
10344               if not success:
10345                 logging.warn("resume-sync of instance %s for disk %d failed",
10346                              instance, idx)
10347
10348           os_add_result.Raise("Could not add os for instance %s"
10349                               " on node %s" % (instance, pnode_name))
10350
10351       else:
10352         if self.op.mode == constants.INSTANCE_IMPORT:
10353           feedback_fn("* running the instance OS import scripts...")
10354
10355           transfers = []
10356
10357           for idx, image in enumerate(self.src_images):
10358             if not image:
10359               continue
10360
10361             # FIXME: pass debug option from opcode to backend
10362             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10363                                                constants.IEIO_FILE, (image, ),
10364                                                constants.IEIO_SCRIPT,
10365                                                (iobj.disks[idx], idx),
10366                                                None)
10367             transfers.append(dt)
10368
10369           import_result = \
10370             masterd.instance.TransferInstanceData(self, feedback_fn,
10371                                                   self.op.src_node, pnode_name,
10372                                                   self.pnode.secondary_ip,
10373                                                   iobj, transfers)
10374           if not compat.all(import_result):
10375             self.LogWarning("Some disks for instance %s on node %s were not"
10376                             " imported successfully" % (instance, pnode_name))
10377
10378           rename_from = self._old_instance_name
10379
10380         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10381           feedback_fn("* preparing remote import...")
10382           # The source cluster will stop the instance before attempting to make
10383           # a connection. In some cases stopping an instance can take a long
10384           # time, hence the shutdown timeout is added to the connection
10385           # timeout.
10386           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10387                              self.op.source_shutdown_timeout)
10388           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10389
10390           assert iobj.primary_node == self.pnode.name
10391           disk_results = \
10392             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10393                                           self.source_x509_ca,
10394                                           self._cds, timeouts)
10395           if not compat.all(disk_results):
10396             # TODO: Should the instance still be started, even if some disks
10397             # failed to import (valid for local imports, too)?
10398             self.LogWarning("Some disks for instance %s on node %s were not"
10399                             " imported successfully" % (instance, pnode_name))
10400
10401           rename_from = self.source_instance_name
10402
10403         else:
10404           # also checked in the prereq part
10405           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10406                                        % self.op.mode)
10407
10408         # Run rename script on newly imported instance
10409         assert iobj.name == instance
10410         feedback_fn("Running rename script for %s" % instance)
10411         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10412                                                    rename_from,
10413                                                    self.op.debug_level)
10414         if result.fail_msg:
10415           self.LogWarning("Failed to run rename script for %s on node"
10416                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10417
10418     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10419
10420     if self.op.start:
10421       iobj.admin_state = constants.ADMINST_UP
10422       self.cfg.Update(iobj, feedback_fn)
10423       logging.info("Starting instance %s on node %s", instance, pnode_name)
10424       feedback_fn("* starting instance...")
10425       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10426                                             False)
10427       result.Raise("Could not start instance")
10428
10429     return list(iobj.all_nodes)
10430
10431
10432 class LUInstanceMultiAlloc(NoHooksLU):
10433   """Allocates multiple instances at the same time.
10434
10435   """
10436   REQ_BGL = False
10437
10438   def CheckArguments(self):
10439     """Check arguments.
10440
10441     """
10442     nodes = []
10443     for inst in self.op.instances:
10444       if inst.iallocator is not None:
10445         raise errors.OpPrereqError("iallocator are not allowed to be set on"
10446                                    " instance objects", errors.ECODE_INVAL)
10447       nodes.append(bool(inst.pnode))
10448       if inst.disk_template in constants.DTS_INT_MIRROR:
10449         nodes.append(bool(inst.snode))
10450
10451     has_nodes = compat.any(nodes)
10452     if compat.all(nodes) ^ has_nodes:
10453       raise errors.OpPrereqError("There are instance objects providing"
10454                                  " pnode/snode while others do not",
10455                                  errors.ECODE_INVAL)
10456
10457     if self.op.iallocator is None:
10458       default_iallocator = self.cfg.GetDefaultIAllocator()
10459       if default_iallocator and has_nodes:
10460         self.op.iallocator = default_iallocator
10461       else:
10462         raise errors.OpPrereqError("No iallocator or nodes on the instances"
10463                                    " given and no cluster-wide default"
10464                                    " iallocator found; please specify either"
10465                                    " an iallocator or nodes on the instances"
10466                                    " or set a cluster-wide default iallocator",
10467                                    errors.ECODE_INVAL)
10468
10469     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10470     if dups:
10471       raise errors.OpPrereqError("There are duplicate instance names: %s" %
10472                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
10473
10474   def ExpandNames(self):
10475     """Calculate the locks.
10476
10477     """
10478     self.share_locks = _ShareAll()
10479     self.needed_locks = {}
10480
10481     if self.op.iallocator:
10482       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10483       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10484     else:
10485       nodeslist = []
10486       for inst in self.op.instances:
10487         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10488         nodeslist.append(inst.pnode)
10489         if inst.snode is not None:
10490           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10491           nodeslist.append(inst.snode)
10492
10493       self.needed_locks[locking.LEVEL_NODE] = nodeslist
10494       # Lock resources of instance's primary and secondary nodes (copy to
10495       # prevent accidential modification)
10496       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10497
10498   def CheckPrereq(self):
10499     """Check prerequisite.
10500
10501     """
10502     cluster = self.cfg.GetClusterInfo()
10503     default_vg = self.cfg.GetVGName()
10504     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10505                                          _ComputeNics(op, cluster, None,
10506                                                       self.cfg, self.proc),
10507                                          _ComputeFullBeParams(op, cluster))
10508              for op in self.op.instances]
10509     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10510     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10511
10512     ial.Run(self.op.iallocator)
10513
10514     if not ial.success:
10515       raise errors.OpPrereqError("Can't compute nodes using"
10516                                  " iallocator '%s': %s" %
10517                                  (self.op.iallocator, ial.info),
10518                                  errors.ECODE_NORES)
10519
10520     self.ia_result = ial.result
10521
10522     if self.op.dry_run:
10523       self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10524         constants.JOB_IDS_KEY: [],
10525         })
10526
10527   def _ConstructPartialResult(self):
10528     """Contructs the partial result.
10529
10530     """
10531     (allocatable, failed) = self.ia_result
10532     return {
10533       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10534         map(compat.fst, allocatable),
10535       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10536       }
10537
10538   def Exec(self, feedback_fn):
10539     """Executes the opcode.
10540
10541     """
10542     op2inst = dict((op.instance_name, op) for op in self.op.instances)
10543     (allocatable, failed) = self.ia_result
10544
10545     jobs = []
10546     for (name, nodes) in allocatable:
10547       op = op2inst.pop(name)
10548
10549       if len(nodes) > 1:
10550         (op.pnode, op.snode) = nodes
10551       else:
10552         (op.pnode,) = nodes
10553
10554       jobs.append([op])
10555
10556     missing = set(op2inst.keys()) - set(failed)
10557     assert not missing, \
10558       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10559
10560     return ResultWithJobs(jobs, **self._ConstructPartialResult())
10561
10562
10563 def _CheckRADOSFreeSpace():
10564   """Compute disk size requirements inside the RADOS cluster.
10565
10566   """
10567   # For the RADOS cluster we assume there is always enough space.
10568   pass
10569
10570
10571 class LUInstanceConsole(NoHooksLU):
10572   """Connect to an instance's console.
10573
10574   This is somewhat special in that it returns the command line that
10575   you need to run on the master node in order to connect to the
10576   console.
10577
10578   """
10579   REQ_BGL = False
10580
10581   def ExpandNames(self):
10582     self.share_locks = _ShareAll()
10583     self._ExpandAndLockInstance()
10584
10585   def CheckPrereq(self):
10586     """Check prerequisites.
10587
10588     This checks that the instance is in the cluster.
10589
10590     """
10591     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10592     assert self.instance is not None, \
10593       "Cannot retrieve locked instance %s" % self.op.instance_name
10594     _CheckNodeOnline(self, self.instance.primary_node)
10595
10596   def Exec(self, feedback_fn):
10597     """Connect to the console of an instance
10598
10599     """
10600     instance = self.instance
10601     node = instance.primary_node
10602
10603     node_insts = self.rpc.call_instance_list([node],
10604                                              [instance.hypervisor])[node]
10605     node_insts.Raise("Can't get node information from %s" % node)
10606
10607     if instance.name not in node_insts.payload:
10608       if instance.admin_state == constants.ADMINST_UP:
10609         state = constants.INSTST_ERRORDOWN
10610       elif instance.admin_state == constants.ADMINST_DOWN:
10611         state = constants.INSTST_ADMINDOWN
10612       else:
10613         state = constants.INSTST_ADMINOFFLINE
10614       raise errors.OpExecError("Instance %s is not running (state %s)" %
10615                                (instance.name, state))
10616
10617     logging.debug("Connecting to console of %s on %s", instance.name, node)
10618
10619     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10620
10621
10622 def _GetInstanceConsole(cluster, instance):
10623   """Returns console information for an instance.
10624
10625   @type cluster: L{objects.Cluster}
10626   @type instance: L{objects.Instance}
10627   @rtype: dict
10628
10629   """
10630   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10631   # beparams and hvparams are passed separately, to avoid editing the
10632   # instance and then saving the defaults in the instance itself.
10633   hvparams = cluster.FillHV(instance)
10634   beparams = cluster.FillBE(instance)
10635   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10636
10637   assert console.instance == instance.name
10638   assert console.Validate()
10639
10640   return console.ToDict()
10641
10642
10643 class LUInstanceReplaceDisks(LogicalUnit):
10644   """Replace the disks of an instance.
10645
10646   """
10647   HPATH = "mirrors-replace"
10648   HTYPE = constants.HTYPE_INSTANCE
10649   REQ_BGL = False
10650
10651   def CheckArguments(self):
10652     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10653                                   self.op.iallocator)
10654
10655   def ExpandNames(self):
10656     self._ExpandAndLockInstance()
10657
10658     assert locking.LEVEL_NODE not in self.needed_locks
10659     assert locking.LEVEL_NODE_RES not in self.needed_locks
10660     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10661
10662     assert self.op.iallocator is None or self.op.remote_node is None, \
10663       "Conflicting options"
10664
10665     if self.op.remote_node is not None:
10666       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10667
10668       # Warning: do not remove the locking of the new secondary here
10669       # unless DRBD8.AddChildren is changed to work in parallel;
10670       # currently it doesn't since parallel invocations of
10671       # FindUnusedMinor will conflict
10672       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10673       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10674     else:
10675       self.needed_locks[locking.LEVEL_NODE] = []
10676       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10677
10678       if self.op.iallocator is not None:
10679         # iallocator will select a new node in the same group
10680         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10681
10682     self.needed_locks[locking.LEVEL_NODE_RES] = []
10683
10684     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10685                                    self.op.iallocator, self.op.remote_node,
10686                                    self.op.disks, False, self.op.early_release,
10687                                    self.op.ignore_ipolicy)
10688
10689     self.tasklets = [self.replacer]
10690
10691   def DeclareLocks(self, level):
10692     if level == locking.LEVEL_NODEGROUP:
10693       assert self.op.remote_node is None
10694       assert self.op.iallocator is not None
10695       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10696
10697       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10698       # Lock all groups used by instance optimistically; this requires going
10699       # via the node before it's locked, requiring verification later on
10700       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10701         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10702
10703     elif level == locking.LEVEL_NODE:
10704       if self.op.iallocator is not None:
10705         assert self.op.remote_node is None
10706         assert not self.needed_locks[locking.LEVEL_NODE]
10707
10708         # Lock member nodes of all locked groups
10709         self.needed_locks[locking.LEVEL_NODE] = \
10710             [node_name
10711              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10712              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10713       else:
10714         self._LockInstancesNodes()
10715     elif level == locking.LEVEL_NODE_RES:
10716       # Reuse node locks
10717       self.needed_locks[locking.LEVEL_NODE_RES] = \
10718         self.needed_locks[locking.LEVEL_NODE]
10719
10720   def BuildHooksEnv(self):
10721     """Build hooks env.
10722
10723     This runs on the master, the primary and all the secondaries.
10724
10725     """
10726     instance = self.replacer.instance
10727     env = {
10728       "MODE": self.op.mode,
10729       "NEW_SECONDARY": self.op.remote_node,
10730       "OLD_SECONDARY": instance.secondary_nodes[0],
10731       }
10732     env.update(_BuildInstanceHookEnvByObject(self, instance))
10733     return env
10734
10735   def BuildHooksNodes(self):
10736     """Build hooks nodes.
10737
10738     """
10739     instance = self.replacer.instance
10740     nl = [
10741       self.cfg.GetMasterNode(),
10742       instance.primary_node,
10743       ]
10744     if self.op.remote_node is not None:
10745       nl.append(self.op.remote_node)
10746     return nl, nl
10747
10748   def CheckPrereq(self):
10749     """Check prerequisites.
10750
10751     """
10752     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10753             self.op.iallocator is None)
10754
10755     # Verify if node group locks are still correct
10756     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10757     if owned_groups:
10758       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10759
10760     return LogicalUnit.CheckPrereq(self)
10761
10762
10763 class TLReplaceDisks(Tasklet):
10764   """Replaces disks for an instance.
10765
10766   Note: Locking is not within the scope of this class.
10767
10768   """
10769   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10770                disks, delay_iallocator, early_release, ignore_ipolicy):
10771     """Initializes this class.
10772
10773     """
10774     Tasklet.__init__(self, lu)
10775
10776     # Parameters
10777     self.instance_name = instance_name
10778     self.mode = mode
10779     self.iallocator_name = iallocator_name
10780     self.remote_node = remote_node
10781     self.disks = disks
10782     self.delay_iallocator = delay_iallocator
10783     self.early_release = early_release
10784     self.ignore_ipolicy = ignore_ipolicy
10785
10786     # Runtime data
10787     self.instance = None
10788     self.new_node = None
10789     self.target_node = None
10790     self.other_node = None
10791     self.remote_node_info = None
10792     self.node_secondary_ip = None
10793
10794   @staticmethod
10795   def CheckArguments(mode, remote_node, ialloc):
10796     """Helper function for users of this class.
10797
10798     """
10799     # check for valid parameter combination
10800     if mode == constants.REPLACE_DISK_CHG:
10801       if remote_node is None and ialloc is None:
10802         raise errors.OpPrereqError("When changing the secondary either an"
10803                                    " iallocator script must be used or the"
10804                                    " new node given", errors.ECODE_INVAL)
10805
10806       if remote_node is not None and ialloc is not None:
10807         raise errors.OpPrereqError("Give either the iallocator or the new"
10808                                    " secondary, not both", errors.ECODE_INVAL)
10809
10810     elif remote_node is not None or ialloc is not None:
10811       # Not replacing the secondary
10812       raise errors.OpPrereqError("The iallocator and new node options can"
10813                                  " only be used when changing the"
10814                                  " secondary node", errors.ECODE_INVAL)
10815
10816   @staticmethod
10817   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10818     """Compute a new secondary node using an IAllocator.
10819
10820     """
10821     req = iallocator.IAReqRelocate(name=instance_name,
10822                                    relocate_from=list(relocate_from))
10823     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10824
10825     ial.Run(iallocator_name)
10826
10827     if not ial.success:
10828       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10829                                  " %s" % (iallocator_name, ial.info),
10830                                  errors.ECODE_NORES)
10831
10832     remote_node_name = ial.result[0]
10833
10834     lu.LogInfo("Selected new secondary for instance '%s': %s",
10835                instance_name, remote_node_name)
10836
10837     return remote_node_name
10838
10839   def _FindFaultyDisks(self, node_name):
10840     """Wrapper for L{_FindFaultyInstanceDisks}.
10841
10842     """
10843     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10844                                     node_name, True)
10845
10846   def _CheckDisksActivated(self, instance):
10847     """Checks if the instance disks are activated.
10848
10849     @param instance: The instance to check disks
10850     @return: True if they are activated, False otherwise
10851
10852     """
10853     nodes = instance.all_nodes
10854
10855     for idx, dev in enumerate(instance.disks):
10856       for node in nodes:
10857         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10858         self.cfg.SetDiskID(dev, node)
10859
10860         result = _BlockdevFind(self, node, dev, instance)
10861
10862         if result.offline:
10863           continue
10864         elif result.fail_msg or not result.payload:
10865           return False
10866
10867     return True
10868
10869   def CheckPrereq(self):
10870     """Check prerequisites.
10871
10872     This checks that the instance is in the cluster.
10873
10874     """
10875     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10876     assert instance is not None, \
10877       "Cannot retrieve locked instance %s" % self.instance_name
10878
10879     if instance.disk_template != constants.DT_DRBD8:
10880       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10881                                  " instances", errors.ECODE_INVAL)
10882
10883     if len(instance.secondary_nodes) != 1:
10884       raise errors.OpPrereqError("The instance has a strange layout,"
10885                                  " expected one secondary but found %d" %
10886                                  len(instance.secondary_nodes),
10887                                  errors.ECODE_FAULT)
10888
10889     if not self.delay_iallocator:
10890       self._CheckPrereq2()
10891
10892   def _CheckPrereq2(self):
10893     """Check prerequisites, second part.
10894
10895     This function should always be part of CheckPrereq. It was separated and is
10896     now called from Exec because during node evacuation iallocator was only
10897     called with an unmodified cluster model, not taking planned changes into
10898     account.
10899
10900     """
10901     instance = self.instance
10902     secondary_node = instance.secondary_nodes[0]
10903
10904     if self.iallocator_name is None:
10905       remote_node = self.remote_node
10906     else:
10907       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10908                                        instance.name, instance.secondary_nodes)
10909
10910     if remote_node is None:
10911       self.remote_node_info = None
10912     else:
10913       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10914              "Remote node '%s' is not locked" % remote_node
10915
10916       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10917       assert self.remote_node_info is not None, \
10918         "Cannot retrieve locked node %s" % remote_node
10919
10920     if remote_node == self.instance.primary_node:
10921       raise errors.OpPrereqError("The specified node is the primary node of"
10922                                  " the instance", errors.ECODE_INVAL)
10923
10924     if remote_node == secondary_node:
10925       raise errors.OpPrereqError("The specified node is already the"
10926                                  " secondary node of the instance",
10927                                  errors.ECODE_INVAL)
10928
10929     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10930                                     constants.REPLACE_DISK_CHG):
10931       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10932                                  errors.ECODE_INVAL)
10933
10934     if self.mode == constants.REPLACE_DISK_AUTO:
10935       if not self._CheckDisksActivated(instance):
10936         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10937                                    " first" % self.instance_name,
10938                                    errors.ECODE_STATE)
10939       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10940       faulty_secondary = self._FindFaultyDisks(secondary_node)
10941
10942       if faulty_primary and faulty_secondary:
10943         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10944                                    " one node and can not be repaired"
10945                                    " automatically" % self.instance_name,
10946                                    errors.ECODE_STATE)
10947
10948       if faulty_primary:
10949         self.disks = faulty_primary
10950         self.target_node = instance.primary_node
10951         self.other_node = secondary_node
10952         check_nodes = [self.target_node, self.other_node]
10953       elif faulty_secondary:
10954         self.disks = faulty_secondary
10955         self.target_node = secondary_node
10956         self.other_node = instance.primary_node
10957         check_nodes = [self.target_node, self.other_node]
10958       else:
10959         self.disks = []
10960         check_nodes = []
10961
10962     else:
10963       # Non-automatic modes
10964       if self.mode == constants.REPLACE_DISK_PRI:
10965         self.target_node = instance.primary_node
10966         self.other_node = secondary_node
10967         check_nodes = [self.target_node, self.other_node]
10968
10969       elif self.mode == constants.REPLACE_DISK_SEC:
10970         self.target_node = secondary_node
10971         self.other_node = instance.primary_node
10972         check_nodes = [self.target_node, self.other_node]
10973
10974       elif self.mode == constants.REPLACE_DISK_CHG:
10975         self.new_node = remote_node
10976         self.other_node = instance.primary_node
10977         self.target_node = secondary_node
10978         check_nodes = [self.new_node, self.other_node]
10979
10980         _CheckNodeNotDrained(self.lu, remote_node)
10981         _CheckNodeVmCapable(self.lu, remote_node)
10982
10983         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10984         assert old_node_info is not None
10985         if old_node_info.offline and not self.early_release:
10986           # doesn't make sense to delay the release
10987           self.early_release = True
10988           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10989                           " early-release mode", secondary_node)
10990
10991       else:
10992         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10993                                      self.mode)
10994
10995       # If not specified all disks should be replaced
10996       if not self.disks:
10997         self.disks = range(len(self.instance.disks))
10998
10999     # TODO: This is ugly, but right now we can't distinguish between internal
11000     # submitted opcode and external one. We should fix that.
11001     if self.remote_node_info:
11002       # We change the node, lets verify it still meets instance policy
11003       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11004       cluster = self.cfg.GetClusterInfo()
11005       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11006                                                               new_group_info)
11007       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11008                               ignore=self.ignore_ipolicy)
11009
11010     for node in check_nodes:
11011       _CheckNodeOnline(self.lu, node)
11012
11013     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11014                                                           self.other_node,
11015                                                           self.target_node]
11016                               if node_name is not None)
11017
11018     # Release unneeded node and node resource locks
11019     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11020     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11021
11022     # Release any owned node group
11023     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11024       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11025
11026     # Check whether disks are valid
11027     for disk_idx in self.disks:
11028       instance.FindDisk(disk_idx)
11029
11030     # Get secondary node IP addresses
11031     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11032                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11033
11034   def Exec(self, feedback_fn):
11035     """Execute disk replacement.
11036
11037     This dispatches the disk replacement to the appropriate handler.
11038
11039     """
11040     if self.delay_iallocator:
11041       self._CheckPrereq2()
11042
11043     if __debug__:
11044       # Verify owned locks before starting operation
11045       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11046       assert set(owned_nodes) == set(self.node_secondary_ip), \
11047           ("Incorrect node locks, owning %s, expected %s" %
11048            (owned_nodes, self.node_secondary_ip.keys()))
11049       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11050               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11051
11052       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11053       assert list(owned_instances) == [self.instance_name], \
11054           "Instance '%s' not locked" % self.instance_name
11055
11056       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11057           "Should not own any node group lock at this point"
11058
11059     if not self.disks:
11060       feedback_fn("No disks need replacement")
11061       return
11062
11063     feedback_fn("Replacing disk(s) %s for %s" %
11064                 (utils.CommaJoin(self.disks), self.instance.name))
11065
11066     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11067
11068     # Activate the instance disks if we're replacing them on a down instance
11069     if activate_disks:
11070       _StartInstanceDisks(self.lu, self.instance, True)
11071
11072     try:
11073       # Should we replace the secondary node?
11074       if self.new_node is not None:
11075         fn = self._ExecDrbd8Secondary
11076       else:
11077         fn = self._ExecDrbd8DiskOnly
11078
11079       result = fn(feedback_fn)
11080     finally:
11081       # Deactivate the instance disks if we're replacing them on a
11082       # down instance
11083       if activate_disks:
11084         _SafeShutdownInstanceDisks(self.lu, self.instance)
11085
11086     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11087
11088     if __debug__:
11089       # Verify owned locks
11090       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11091       nodes = frozenset(self.node_secondary_ip)
11092       assert ((self.early_release and not owned_nodes) or
11093               (not self.early_release and not (set(owned_nodes) - nodes))), \
11094         ("Not owning the correct locks, early_release=%s, owned=%r,"
11095          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11096
11097     return result
11098
11099   def _CheckVolumeGroup(self, nodes):
11100     self.lu.LogInfo("Checking volume groups")
11101
11102     vgname = self.cfg.GetVGName()
11103
11104     # Make sure volume group exists on all involved nodes
11105     results = self.rpc.call_vg_list(nodes)
11106     if not results:
11107       raise errors.OpExecError("Can't list volume groups on the nodes")
11108
11109     for node in nodes:
11110       res = results[node]
11111       res.Raise("Error checking node %s" % node)
11112       if vgname not in res.payload:
11113         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11114                                  (vgname, node))
11115
11116   def _CheckDisksExistence(self, nodes):
11117     # Check disk existence
11118     for idx, dev in enumerate(self.instance.disks):
11119       if idx not in self.disks:
11120         continue
11121
11122       for node in nodes:
11123         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11124         self.cfg.SetDiskID(dev, node)
11125
11126         result = _BlockdevFind(self, node, dev, self.instance)
11127
11128         msg = result.fail_msg
11129         if msg or not result.payload:
11130           if not msg:
11131             msg = "disk not found"
11132           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11133                                    (idx, node, msg))
11134
11135   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11136     for idx, dev in enumerate(self.instance.disks):
11137       if idx not in self.disks:
11138         continue
11139
11140       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11141                       (idx, node_name))
11142
11143       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11144                                    on_primary, ldisk=ldisk):
11145         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11146                                  " replace disks for instance %s" %
11147                                  (node_name, self.instance.name))
11148
11149   def _CreateNewStorage(self, node_name):
11150     """Create new storage on the primary or secondary node.
11151
11152     This is only used for same-node replaces, not for changing the
11153     secondary node, hence we don't want to modify the existing disk.
11154
11155     """
11156     iv_names = {}
11157
11158     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11159     for idx, dev in enumerate(disks):
11160       if idx not in self.disks:
11161         continue
11162
11163       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11164
11165       self.cfg.SetDiskID(dev, node_name)
11166
11167       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11168       names = _GenerateUniqueNames(self.lu, lv_names)
11169
11170       (data_disk, meta_disk) = dev.children
11171       vg_data = data_disk.logical_id[0]
11172       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11173                              logical_id=(vg_data, names[0]),
11174                              params=data_disk.params)
11175       vg_meta = meta_disk.logical_id[0]
11176       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11177                              size=constants.DRBD_META_SIZE,
11178                              logical_id=(vg_meta, names[1]),
11179                              params=meta_disk.params)
11180
11181       new_lvs = [lv_data, lv_meta]
11182       old_lvs = [child.Copy() for child in dev.children]
11183       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11184
11185       # we pass force_create=True to force the LVM creation
11186       for new_lv in new_lvs:
11187         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11188                              _GetInstanceInfoText(self.instance), False)
11189
11190     return iv_names
11191
11192   def _CheckDevices(self, node_name, iv_names):
11193     for name, (dev, _, _) in iv_names.iteritems():
11194       self.cfg.SetDiskID(dev, node_name)
11195
11196       result = _BlockdevFind(self, node_name, dev, self.instance)
11197
11198       msg = result.fail_msg
11199       if msg or not result.payload:
11200         if not msg:
11201           msg = "disk not found"
11202         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11203                                  (name, msg))
11204
11205       if result.payload.is_degraded:
11206         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11207
11208   def _RemoveOldStorage(self, node_name, iv_names):
11209     for name, (_, old_lvs, _) in iv_names.iteritems():
11210       self.lu.LogInfo("Remove logical volumes for %s" % name)
11211
11212       for lv in old_lvs:
11213         self.cfg.SetDiskID(lv, node_name)
11214
11215         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11216         if msg:
11217           self.lu.LogWarning("Can't remove old LV: %s" % msg,
11218                              hint="remove unused LVs manually")
11219
11220   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11221     """Replace a disk on the primary or secondary for DRBD 8.
11222
11223     The algorithm for replace is quite complicated:
11224
11225       1. for each disk to be replaced:
11226
11227         1. create new LVs on the target node with unique names
11228         1. detach old LVs from the drbd device
11229         1. rename old LVs to name_replaced.<time_t>
11230         1. rename new LVs to old LVs
11231         1. attach the new LVs (with the old names now) to the drbd device
11232
11233       1. wait for sync across all devices
11234
11235       1. for each modified disk:
11236
11237         1. remove old LVs (which have the name name_replaces.<time_t>)
11238
11239     Failures are not very well handled.
11240
11241     """
11242     steps_total = 6
11243
11244     # Step: check device activation
11245     self.lu.LogStep(1, steps_total, "Check device existence")
11246     self._CheckDisksExistence([self.other_node, self.target_node])
11247     self._CheckVolumeGroup([self.target_node, self.other_node])
11248
11249     # Step: check other node consistency
11250     self.lu.LogStep(2, steps_total, "Check peer consistency")
11251     self._CheckDisksConsistency(self.other_node,
11252                                 self.other_node == self.instance.primary_node,
11253                                 False)
11254
11255     # Step: create new storage
11256     self.lu.LogStep(3, steps_total, "Allocate new storage")
11257     iv_names = self._CreateNewStorage(self.target_node)
11258
11259     # Step: for each lv, detach+rename*2+attach
11260     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11261     for dev, old_lvs, new_lvs in iv_names.itervalues():
11262       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11263
11264       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11265                                                      old_lvs)
11266       result.Raise("Can't detach drbd from local storage on node"
11267                    " %s for device %s" % (self.target_node, dev.iv_name))
11268       #dev.children = []
11269       #cfg.Update(instance)
11270
11271       # ok, we created the new LVs, so now we know we have the needed
11272       # storage; as such, we proceed on the target node to rename
11273       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11274       # using the assumption that logical_id == physical_id (which in
11275       # turn is the unique_id on that node)
11276
11277       # FIXME(iustin): use a better name for the replaced LVs
11278       temp_suffix = int(time.time())
11279       ren_fn = lambda d, suff: (d.physical_id[0],
11280                                 d.physical_id[1] + "_replaced-%s" % suff)
11281
11282       # Build the rename list based on what LVs exist on the node
11283       rename_old_to_new = []
11284       for to_ren in old_lvs:
11285         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11286         if not result.fail_msg and result.payload:
11287           # device exists
11288           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11289
11290       self.lu.LogInfo("Renaming the old LVs on the target node")
11291       result = self.rpc.call_blockdev_rename(self.target_node,
11292                                              rename_old_to_new)
11293       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11294
11295       # Now we rename the new LVs to the old LVs
11296       self.lu.LogInfo("Renaming the new LVs on the target node")
11297       rename_new_to_old = [(new, old.physical_id)
11298                            for old, new in zip(old_lvs, new_lvs)]
11299       result = self.rpc.call_blockdev_rename(self.target_node,
11300                                              rename_new_to_old)
11301       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11302
11303       # Intermediate steps of in memory modifications
11304       for old, new in zip(old_lvs, new_lvs):
11305         new.logical_id = old.logical_id
11306         self.cfg.SetDiskID(new, self.target_node)
11307
11308       # We need to modify old_lvs so that removal later removes the
11309       # right LVs, not the newly added ones; note that old_lvs is a
11310       # copy here
11311       for disk in old_lvs:
11312         disk.logical_id = ren_fn(disk, temp_suffix)
11313         self.cfg.SetDiskID(disk, self.target_node)
11314
11315       # Now that the new lvs have the old name, we can add them to the device
11316       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11317       result = self.rpc.call_blockdev_addchildren(self.target_node,
11318                                                   (dev, self.instance), new_lvs)
11319       msg = result.fail_msg
11320       if msg:
11321         for new_lv in new_lvs:
11322           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11323                                                new_lv).fail_msg
11324           if msg2:
11325             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11326                                hint=("cleanup manually the unused logical"
11327                                      "volumes"))
11328         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11329
11330     cstep = itertools.count(5)
11331
11332     if self.early_release:
11333       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11334       self._RemoveOldStorage(self.target_node, iv_names)
11335       # TODO: Check if releasing locks early still makes sense
11336       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11337     else:
11338       # Release all resource locks except those used by the instance
11339       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11340                     keep=self.node_secondary_ip.keys())
11341
11342     # Release all node locks while waiting for sync
11343     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11344
11345     # TODO: Can the instance lock be downgraded here? Take the optional disk
11346     # shutdown in the caller into consideration.
11347
11348     # Wait for sync
11349     # This can fail as the old devices are degraded and _WaitForSync
11350     # does a combined result over all disks, so we don't check its return value
11351     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11352     _WaitForSync(self.lu, self.instance)
11353
11354     # Check all devices manually
11355     self._CheckDevices(self.instance.primary_node, iv_names)
11356
11357     # Step: remove old storage
11358     if not self.early_release:
11359       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11360       self._RemoveOldStorage(self.target_node, iv_names)
11361
11362   def _ExecDrbd8Secondary(self, feedback_fn):
11363     """Replace the secondary node for DRBD 8.
11364
11365     The algorithm for replace is quite complicated:
11366       - for all disks of the instance:
11367         - create new LVs on the new node with same names
11368         - shutdown the drbd device on the old secondary
11369         - disconnect the drbd network on the primary
11370         - create the drbd device on the new secondary
11371         - network attach the drbd on the primary, using an artifice:
11372           the drbd code for Attach() will connect to the network if it
11373           finds a device which is connected to the good local disks but
11374           not network enabled
11375       - wait for sync across all devices
11376       - remove all disks from the old secondary
11377
11378     Failures are not very well handled.
11379
11380     """
11381     steps_total = 6
11382
11383     pnode = self.instance.primary_node
11384
11385     # Step: check device activation
11386     self.lu.LogStep(1, steps_total, "Check device existence")
11387     self._CheckDisksExistence([self.instance.primary_node])
11388     self._CheckVolumeGroup([self.instance.primary_node])
11389
11390     # Step: check other node consistency
11391     self.lu.LogStep(2, steps_total, "Check peer consistency")
11392     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11393
11394     # Step: create new storage
11395     self.lu.LogStep(3, steps_total, "Allocate new storage")
11396     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11397     for idx, dev in enumerate(disks):
11398       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11399                       (self.new_node, idx))
11400       # we pass force_create=True to force LVM creation
11401       for new_lv in dev.children:
11402         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11403                              True, _GetInstanceInfoText(self.instance), False)
11404
11405     # Step 4: dbrd minors and drbd setups changes
11406     # after this, we must manually remove the drbd minors on both the
11407     # error and the success paths
11408     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11409     minors = self.cfg.AllocateDRBDMinor([self.new_node
11410                                          for dev in self.instance.disks],
11411                                         self.instance.name)
11412     logging.debug("Allocated minors %r", minors)
11413
11414     iv_names = {}
11415     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11416       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11417                       (self.new_node, idx))
11418       # create new devices on new_node; note that we create two IDs:
11419       # one without port, so the drbd will be activated without
11420       # networking information on the new node at this stage, and one
11421       # with network, for the latter activation in step 4
11422       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11423       if self.instance.primary_node == o_node1:
11424         p_minor = o_minor1
11425       else:
11426         assert self.instance.primary_node == o_node2, "Three-node instance?"
11427         p_minor = o_minor2
11428
11429       new_alone_id = (self.instance.primary_node, self.new_node, None,
11430                       p_minor, new_minor, o_secret)
11431       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11432                     p_minor, new_minor, o_secret)
11433
11434       iv_names[idx] = (dev, dev.children, new_net_id)
11435       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11436                     new_net_id)
11437       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11438                               logical_id=new_alone_id,
11439                               children=dev.children,
11440                               size=dev.size,
11441                               params={})
11442       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11443                                              self.cfg)
11444       try:
11445         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11446                               anno_new_drbd,
11447                               _GetInstanceInfoText(self.instance), False)
11448       except errors.GenericError:
11449         self.cfg.ReleaseDRBDMinors(self.instance.name)
11450         raise
11451
11452     # We have new devices, shutdown the drbd on the old secondary
11453     for idx, dev in enumerate(self.instance.disks):
11454       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11455       self.cfg.SetDiskID(dev, self.target_node)
11456       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11457                                             (dev, self.instance)).fail_msg
11458       if msg:
11459         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11460                            "node: %s" % (idx, msg),
11461                            hint=("Please cleanup this device manually as"
11462                                  " soon as possible"))
11463
11464     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11465     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11466                                                self.instance.disks)[pnode]
11467
11468     msg = result.fail_msg
11469     if msg:
11470       # detaches didn't succeed (unlikely)
11471       self.cfg.ReleaseDRBDMinors(self.instance.name)
11472       raise errors.OpExecError("Can't detach the disks from the network on"
11473                                " old node: %s" % (msg,))
11474
11475     # if we managed to detach at least one, we update all the disks of
11476     # the instance to point to the new secondary
11477     self.lu.LogInfo("Updating instance configuration")
11478     for dev, _, new_logical_id in iv_names.itervalues():
11479       dev.logical_id = new_logical_id
11480       self.cfg.SetDiskID(dev, self.instance.primary_node)
11481
11482     self.cfg.Update(self.instance, feedback_fn)
11483
11484     # Release all node locks (the configuration has been updated)
11485     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11486
11487     # and now perform the drbd attach
11488     self.lu.LogInfo("Attaching primary drbds to new secondary"
11489                     " (standalone => connected)")
11490     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11491                                             self.new_node],
11492                                            self.node_secondary_ip,
11493                                            (self.instance.disks, self.instance),
11494                                            self.instance.name,
11495                                            False)
11496     for to_node, to_result in result.items():
11497       msg = to_result.fail_msg
11498       if msg:
11499         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11500                            to_node, msg,
11501                            hint=("please do a gnt-instance info to see the"
11502                                  " status of disks"))
11503
11504     cstep = itertools.count(5)
11505
11506     if self.early_release:
11507       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11508       self._RemoveOldStorage(self.target_node, iv_names)
11509       # TODO: Check if releasing locks early still makes sense
11510       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11511     else:
11512       # Release all resource locks except those used by the instance
11513       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11514                     keep=self.node_secondary_ip.keys())
11515
11516     # TODO: Can the instance lock be downgraded here? Take the optional disk
11517     # shutdown in the caller into consideration.
11518
11519     # Wait for sync
11520     # This can fail as the old devices are degraded and _WaitForSync
11521     # does a combined result over all disks, so we don't check its return value
11522     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11523     _WaitForSync(self.lu, self.instance)
11524
11525     # Check all devices manually
11526     self._CheckDevices(self.instance.primary_node, iv_names)
11527
11528     # Step: remove old storage
11529     if not self.early_release:
11530       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11531       self._RemoveOldStorage(self.target_node, iv_names)
11532
11533
11534 class LURepairNodeStorage(NoHooksLU):
11535   """Repairs the volume group on a node.
11536
11537   """
11538   REQ_BGL = False
11539
11540   def CheckArguments(self):
11541     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11542
11543     storage_type = self.op.storage_type
11544
11545     if (constants.SO_FIX_CONSISTENCY not in
11546         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11547       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11548                                  " repaired" % storage_type,
11549                                  errors.ECODE_INVAL)
11550
11551   def ExpandNames(self):
11552     self.needed_locks = {
11553       locking.LEVEL_NODE: [self.op.node_name],
11554       }
11555
11556   def _CheckFaultyDisks(self, instance, node_name):
11557     """Ensure faulty disks abort the opcode or at least warn."""
11558     try:
11559       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11560                                   node_name, True):
11561         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11562                                    " node '%s'" % (instance.name, node_name),
11563                                    errors.ECODE_STATE)
11564     except errors.OpPrereqError, err:
11565       if self.op.ignore_consistency:
11566         self.proc.LogWarning(str(err.args[0]))
11567       else:
11568         raise
11569
11570   def CheckPrereq(self):
11571     """Check prerequisites.
11572
11573     """
11574     # Check whether any instance on this node has faulty disks
11575     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11576       if inst.admin_state != constants.ADMINST_UP:
11577         continue
11578       check_nodes = set(inst.all_nodes)
11579       check_nodes.discard(self.op.node_name)
11580       for inst_node_name in check_nodes:
11581         self._CheckFaultyDisks(inst, inst_node_name)
11582
11583   def Exec(self, feedback_fn):
11584     feedback_fn("Repairing storage unit '%s' on %s ..." %
11585                 (self.op.name, self.op.node_name))
11586
11587     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11588     result = self.rpc.call_storage_execute(self.op.node_name,
11589                                            self.op.storage_type, st_args,
11590                                            self.op.name,
11591                                            constants.SO_FIX_CONSISTENCY)
11592     result.Raise("Failed to repair storage unit '%s' on %s" %
11593                  (self.op.name, self.op.node_name))
11594
11595
11596 class LUNodeEvacuate(NoHooksLU):
11597   """Evacuates instances off a list of nodes.
11598
11599   """
11600   REQ_BGL = False
11601
11602   _MODE2IALLOCATOR = {
11603     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11604     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11605     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11606     }
11607   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11608   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11609           constants.IALLOCATOR_NEVAC_MODES)
11610
11611   def CheckArguments(self):
11612     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11613
11614   def ExpandNames(self):
11615     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11616
11617     if self.op.remote_node is not None:
11618       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11619       assert self.op.remote_node
11620
11621       if self.op.remote_node == self.op.node_name:
11622         raise errors.OpPrereqError("Can not use evacuated node as a new"
11623                                    " secondary node", errors.ECODE_INVAL)
11624
11625       if self.op.mode != constants.NODE_EVAC_SEC:
11626         raise errors.OpPrereqError("Without the use of an iallocator only"
11627                                    " secondary instances can be evacuated",
11628                                    errors.ECODE_INVAL)
11629
11630     # Declare locks
11631     self.share_locks = _ShareAll()
11632     self.needed_locks = {
11633       locking.LEVEL_INSTANCE: [],
11634       locking.LEVEL_NODEGROUP: [],
11635       locking.LEVEL_NODE: [],
11636       }
11637
11638     # Determine nodes (via group) optimistically, needs verification once locks
11639     # have been acquired
11640     self.lock_nodes = self._DetermineNodes()
11641
11642   def _DetermineNodes(self):
11643     """Gets the list of nodes to operate on.
11644
11645     """
11646     if self.op.remote_node is None:
11647       # Iallocator will choose any node(s) in the same group
11648       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11649     else:
11650       group_nodes = frozenset([self.op.remote_node])
11651
11652     # Determine nodes to be locked
11653     return set([self.op.node_name]) | group_nodes
11654
11655   def _DetermineInstances(self):
11656     """Builds list of instances to operate on.
11657
11658     """
11659     assert self.op.mode in constants.NODE_EVAC_MODES
11660
11661     if self.op.mode == constants.NODE_EVAC_PRI:
11662       # Primary instances only
11663       inst_fn = _GetNodePrimaryInstances
11664       assert self.op.remote_node is None, \
11665         "Evacuating primary instances requires iallocator"
11666     elif self.op.mode == constants.NODE_EVAC_SEC:
11667       # Secondary instances only
11668       inst_fn = _GetNodeSecondaryInstances
11669     else:
11670       # All instances
11671       assert self.op.mode == constants.NODE_EVAC_ALL
11672       inst_fn = _GetNodeInstances
11673       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11674       # per instance
11675       raise errors.OpPrereqError("Due to an issue with the iallocator"
11676                                  " interface it is not possible to evacuate"
11677                                  " all instances at once; specify explicitly"
11678                                  " whether to evacuate primary or secondary"
11679                                  " instances",
11680                                  errors.ECODE_INVAL)
11681
11682     return inst_fn(self.cfg, self.op.node_name)
11683
11684   def DeclareLocks(self, level):
11685     if level == locking.LEVEL_INSTANCE:
11686       # Lock instances optimistically, needs verification once node and group
11687       # locks have been acquired
11688       self.needed_locks[locking.LEVEL_INSTANCE] = \
11689         set(i.name for i in self._DetermineInstances())
11690
11691     elif level == locking.LEVEL_NODEGROUP:
11692       # Lock node groups for all potential target nodes optimistically, needs
11693       # verification once nodes have been acquired
11694       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11695         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11696
11697     elif level == locking.LEVEL_NODE:
11698       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11699
11700   def CheckPrereq(self):
11701     # Verify locks
11702     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11703     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11704     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11705
11706     need_nodes = self._DetermineNodes()
11707
11708     if not owned_nodes.issuperset(need_nodes):
11709       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11710                                  " locks were acquired, current nodes are"
11711                                  " are '%s', used to be '%s'; retry the"
11712                                  " operation" %
11713                                  (self.op.node_name,
11714                                   utils.CommaJoin(need_nodes),
11715                                   utils.CommaJoin(owned_nodes)),
11716                                  errors.ECODE_STATE)
11717
11718     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11719     if owned_groups != wanted_groups:
11720       raise errors.OpExecError("Node groups changed since locks were acquired,"
11721                                " current groups are '%s', used to be '%s';"
11722                                " retry the operation" %
11723                                (utils.CommaJoin(wanted_groups),
11724                                 utils.CommaJoin(owned_groups)))
11725
11726     # Determine affected instances
11727     self.instances = self._DetermineInstances()
11728     self.instance_names = [i.name for i in self.instances]
11729
11730     if set(self.instance_names) != owned_instances:
11731       raise errors.OpExecError("Instances on node '%s' changed since locks"
11732                                " were acquired, current instances are '%s',"
11733                                " used to be '%s'; retry the operation" %
11734                                (self.op.node_name,
11735                                 utils.CommaJoin(self.instance_names),
11736                                 utils.CommaJoin(owned_instances)))
11737
11738     if self.instance_names:
11739       self.LogInfo("Evacuating instances from node '%s': %s",
11740                    self.op.node_name,
11741                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11742     else:
11743       self.LogInfo("No instances to evacuate from node '%s'",
11744                    self.op.node_name)
11745
11746     if self.op.remote_node is not None:
11747       for i in self.instances:
11748         if i.primary_node == self.op.remote_node:
11749           raise errors.OpPrereqError("Node %s is the primary node of"
11750                                      " instance %s, cannot use it as"
11751                                      " secondary" %
11752                                      (self.op.remote_node, i.name),
11753                                      errors.ECODE_INVAL)
11754
11755   def Exec(self, feedback_fn):
11756     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11757
11758     if not self.instance_names:
11759       # No instances to evacuate
11760       jobs = []
11761
11762     elif self.op.iallocator is not None:
11763       # TODO: Implement relocation to other group
11764       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11765       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11766                                      instances=list(self.instance_names))
11767       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11768
11769       ial.Run(self.op.iallocator)
11770
11771       if not ial.success:
11772         raise errors.OpPrereqError("Can't compute node evacuation using"
11773                                    " iallocator '%s': %s" %
11774                                    (self.op.iallocator, ial.info),
11775                                    errors.ECODE_NORES)
11776
11777       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11778
11779     elif self.op.remote_node is not None:
11780       assert self.op.mode == constants.NODE_EVAC_SEC
11781       jobs = [
11782         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11783                                         remote_node=self.op.remote_node,
11784                                         disks=[],
11785                                         mode=constants.REPLACE_DISK_CHG,
11786                                         early_release=self.op.early_release)]
11787         for instance_name in self.instance_names
11788         ]
11789
11790     else:
11791       raise errors.ProgrammerError("No iallocator or remote node")
11792
11793     return ResultWithJobs(jobs)
11794
11795
11796 def _SetOpEarlyRelease(early_release, op):
11797   """Sets C{early_release} flag on opcodes if available.
11798
11799   """
11800   try:
11801     op.early_release = early_release
11802   except AttributeError:
11803     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11804
11805   return op
11806
11807
11808 def _NodeEvacDest(use_nodes, group, nodes):
11809   """Returns group or nodes depending on caller's choice.
11810
11811   """
11812   if use_nodes:
11813     return utils.CommaJoin(nodes)
11814   else:
11815     return group
11816
11817
11818 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11819   """Unpacks the result of change-group and node-evacuate iallocator requests.
11820
11821   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11822   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11823
11824   @type lu: L{LogicalUnit}
11825   @param lu: Logical unit instance
11826   @type alloc_result: tuple/list
11827   @param alloc_result: Result from iallocator
11828   @type early_release: bool
11829   @param early_release: Whether to release locks early if possible
11830   @type use_nodes: bool
11831   @param use_nodes: Whether to display node names instead of groups
11832
11833   """
11834   (moved, failed, jobs) = alloc_result
11835
11836   if failed:
11837     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11838                                  for (name, reason) in failed)
11839     lu.LogWarning("Unable to evacuate instances %s", failreason)
11840     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11841
11842   if moved:
11843     lu.LogInfo("Instances to be moved: %s",
11844                utils.CommaJoin("%s (to %s)" %
11845                                (name, _NodeEvacDest(use_nodes, group, nodes))
11846                                for (name, group, nodes) in moved))
11847
11848   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11849               map(opcodes.OpCode.LoadOpCode, ops))
11850           for ops in jobs]
11851
11852
11853 def _DiskSizeInBytesToMebibytes(lu, size):
11854   """Converts a disk size in bytes to mebibytes.
11855
11856   Warns and rounds up if the size isn't an even multiple of 1 MiB.
11857
11858   """
11859   (mib, remainder) = divmod(size, 1024 * 1024)
11860
11861   if remainder != 0:
11862     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
11863                   " to not overwrite existing data (%s bytes will not be"
11864                   " wiped)", (1024 * 1024) - remainder)
11865     mib += 1
11866
11867   return mib
11868
11869
11870 class LUInstanceGrowDisk(LogicalUnit):
11871   """Grow a disk of an instance.
11872
11873   """
11874   HPATH = "disk-grow"
11875   HTYPE = constants.HTYPE_INSTANCE
11876   REQ_BGL = False
11877
11878   def ExpandNames(self):
11879     self._ExpandAndLockInstance()
11880     self.needed_locks[locking.LEVEL_NODE] = []
11881     self.needed_locks[locking.LEVEL_NODE_RES] = []
11882     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11883     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11884
11885   def DeclareLocks(self, level):
11886     if level == locking.LEVEL_NODE:
11887       self._LockInstancesNodes()
11888     elif level == locking.LEVEL_NODE_RES:
11889       # Copy node locks
11890       self.needed_locks[locking.LEVEL_NODE_RES] = \
11891         self.needed_locks[locking.LEVEL_NODE][:]
11892
11893   def BuildHooksEnv(self):
11894     """Build hooks env.
11895
11896     This runs on the master, the primary and all the secondaries.
11897
11898     """
11899     env = {
11900       "DISK": self.op.disk,
11901       "AMOUNT": self.op.amount,
11902       "ABSOLUTE": self.op.absolute,
11903       }
11904     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11905     return env
11906
11907   def BuildHooksNodes(self):
11908     """Build hooks nodes.
11909
11910     """
11911     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11912     return (nl, nl)
11913
11914   def CheckPrereq(self):
11915     """Check prerequisites.
11916
11917     This checks that the instance is in the cluster.
11918
11919     """
11920     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11921     assert instance is not None, \
11922       "Cannot retrieve locked instance %s" % self.op.instance_name
11923     nodenames = list(instance.all_nodes)
11924     for node in nodenames:
11925       _CheckNodeOnline(self, node)
11926
11927     self.instance = instance
11928
11929     if instance.disk_template not in constants.DTS_GROWABLE:
11930       raise errors.OpPrereqError("Instance's disk layout does not support"
11931                                  " growing", errors.ECODE_INVAL)
11932
11933     self.disk = instance.FindDisk(self.op.disk)
11934
11935     if self.op.absolute:
11936       self.target = self.op.amount
11937       self.delta = self.target - self.disk.size
11938       if self.delta < 0:
11939         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11940                                    "current disk size (%s)" %
11941                                    (utils.FormatUnit(self.target, "h"),
11942                                     utils.FormatUnit(self.disk.size, "h")),
11943                                    errors.ECODE_STATE)
11944     else:
11945       self.delta = self.op.amount
11946       self.target = self.disk.size + self.delta
11947       if self.delta < 0:
11948         raise errors.OpPrereqError("Requested increment (%s) is negative" %
11949                                    utils.FormatUnit(self.delta, "h"),
11950                                    errors.ECODE_INVAL)
11951
11952     if instance.disk_template not in (constants.DT_FILE,
11953                                       constants.DT_SHARED_FILE,
11954                                       constants.DT_RBD):
11955       # TODO: check the free disk space for file, when that feature will be
11956       # supported
11957       _CheckNodesFreeDiskPerVG(self, nodenames,
11958                                self.disk.ComputeGrowth(self.delta))
11959
11960   def Exec(self, feedback_fn):
11961     """Execute disk grow.
11962
11963     """
11964     instance = self.instance
11965     disk = self.disk
11966
11967     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11968     assert (self.owned_locks(locking.LEVEL_NODE) ==
11969             self.owned_locks(locking.LEVEL_NODE_RES))
11970
11971     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
11972
11973     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11974     if not disks_ok:
11975       raise errors.OpExecError("Cannot activate block device to grow")
11976
11977     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11978                 (self.op.disk, instance.name,
11979                  utils.FormatUnit(self.delta, "h"),
11980                  utils.FormatUnit(self.target, "h")))
11981
11982     # First run all grow ops in dry-run mode
11983     for node in instance.all_nodes:
11984       self.cfg.SetDiskID(disk, node)
11985       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11986                                            True, True)
11987       result.Raise("Dry-run grow request failed to node %s" % node)
11988
11989     if wipe_disks:
11990       # Get disk size from primary node for wiping
11991       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
11992       result.Raise("Failed to retrieve disk size from node '%s'" %
11993                    instance.primary_node)
11994
11995       (disk_size_in_bytes, ) = result.payload
11996
11997       if disk_size_in_bytes is None:
11998         raise errors.OpExecError("Failed to retrieve disk size from primary"
11999                                  " node '%s'" % instance.primary_node)
12000
12001       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12002
12003       assert old_disk_size >= disk.size, \
12004         ("Retrieved disk size too small (got %s, should be at least %s)" %
12005          (old_disk_size, disk.size))
12006     else:
12007       old_disk_size = None
12008
12009     # We know that (as far as we can test) operations across different
12010     # nodes will succeed, time to run it for real on the backing storage
12011     for node in instance.all_nodes:
12012       self.cfg.SetDiskID(disk, node)
12013       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12014                                            False, True)
12015       result.Raise("Grow request failed to node %s" % node)
12016
12017     # And now execute it for logical storage, on the primary node
12018     node = instance.primary_node
12019     self.cfg.SetDiskID(disk, node)
12020     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12021                                          False, False)
12022     result.Raise("Grow request failed to node %s" % node)
12023
12024     disk.RecordGrow(self.delta)
12025     self.cfg.Update(instance, feedback_fn)
12026
12027     # Changes have been recorded, release node lock
12028     _ReleaseLocks(self, locking.LEVEL_NODE)
12029
12030     # Downgrade lock while waiting for sync
12031     self.glm.downgrade(locking.LEVEL_INSTANCE)
12032
12033     assert wipe_disks ^ (old_disk_size is None)
12034
12035     if wipe_disks:
12036       assert instance.disks[self.op.disk] == disk
12037
12038       # Wipe newly added disk space
12039       _WipeDisks(self, instance,
12040                  disks=[(self.op.disk, disk, old_disk_size)])
12041
12042     if self.op.wait_for_sync:
12043       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12044       if disk_abort:
12045         self.proc.LogWarning("Disk sync-ing has not returned a good"
12046                              " status; please check the instance")
12047       if instance.admin_state != constants.ADMINST_UP:
12048         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12049     elif instance.admin_state != constants.ADMINST_UP:
12050       self.proc.LogWarning("Not shutting down the disk even if the instance is"
12051                            " not supposed to be running because no wait for"
12052                            " sync mode was requested")
12053
12054     assert self.owned_locks(locking.LEVEL_NODE_RES)
12055     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12056
12057
12058 class LUInstanceQueryData(NoHooksLU):
12059   """Query runtime instance data.
12060
12061   """
12062   REQ_BGL = False
12063
12064   def ExpandNames(self):
12065     self.needed_locks = {}
12066
12067     # Use locking if requested or when non-static information is wanted
12068     if not (self.op.static or self.op.use_locking):
12069       self.LogWarning("Non-static data requested, locks need to be acquired")
12070       self.op.use_locking = True
12071
12072     if self.op.instances or not self.op.use_locking:
12073       # Expand instance names right here
12074       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12075     else:
12076       # Will use acquired locks
12077       self.wanted_names = None
12078
12079     if self.op.use_locking:
12080       self.share_locks = _ShareAll()
12081
12082       if self.wanted_names is None:
12083         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12084       else:
12085         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12086
12087       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12088       self.needed_locks[locking.LEVEL_NODE] = []
12089       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12090
12091   def DeclareLocks(self, level):
12092     if self.op.use_locking:
12093       if level == locking.LEVEL_NODEGROUP:
12094         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12095
12096         # Lock all groups used by instances optimistically; this requires going
12097         # via the node before it's locked, requiring verification later on
12098         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12099           frozenset(group_uuid
12100                     for instance_name in owned_instances
12101                     for group_uuid in
12102                       self.cfg.GetInstanceNodeGroups(instance_name))
12103
12104       elif level == locking.LEVEL_NODE:
12105         self._LockInstancesNodes()
12106
12107   def CheckPrereq(self):
12108     """Check prerequisites.
12109
12110     This only checks the optional instance list against the existing names.
12111
12112     """
12113     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12114     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12115     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12116
12117     if self.wanted_names is None:
12118       assert self.op.use_locking, "Locking was not used"
12119       self.wanted_names = owned_instances
12120
12121     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12122
12123     if self.op.use_locking:
12124       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12125                                 None)
12126     else:
12127       assert not (owned_instances or owned_groups or owned_nodes)
12128
12129     self.wanted_instances = instances.values()
12130
12131   def _ComputeBlockdevStatus(self, node, instance, dev):
12132     """Returns the status of a block device
12133
12134     """
12135     if self.op.static or not node:
12136       return None
12137
12138     self.cfg.SetDiskID(dev, node)
12139
12140     result = self.rpc.call_blockdev_find(node, dev)
12141     if result.offline:
12142       return None
12143
12144     result.Raise("Can't compute disk status for %s" % instance.name)
12145
12146     status = result.payload
12147     if status is None:
12148       return None
12149
12150     return (status.dev_path, status.major, status.minor,
12151             status.sync_percent, status.estimated_time,
12152             status.is_degraded, status.ldisk_status)
12153
12154   def _ComputeDiskStatus(self, instance, snode, dev):
12155     """Compute block device status.
12156
12157     """
12158     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12159
12160     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12161
12162   def _ComputeDiskStatusInner(self, instance, snode, dev):
12163     """Compute block device status.
12164
12165     @attention: The device has to be annotated already.
12166
12167     """
12168     if dev.dev_type in constants.LDS_DRBD:
12169       # we change the snode then (otherwise we use the one passed in)
12170       if dev.logical_id[0] == instance.primary_node:
12171         snode = dev.logical_id[1]
12172       else:
12173         snode = dev.logical_id[0]
12174
12175     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12176                                               instance, dev)
12177     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12178
12179     if dev.children:
12180       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12181                                         instance, snode),
12182                          dev.children)
12183     else:
12184       dev_children = []
12185
12186     return {
12187       "iv_name": dev.iv_name,
12188       "dev_type": dev.dev_type,
12189       "logical_id": dev.logical_id,
12190       "physical_id": dev.physical_id,
12191       "pstatus": dev_pstatus,
12192       "sstatus": dev_sstatus,
12193       "children": dev_children,
12194       "mode": dev.mode,
12195       "size": dev.size,
12196       }
12197
12198   def Exec(self, feedback_fn):
12199     """Gather and return data"""
12200     result = {}
12201
12202     cluster = self.cfg.GetClusterInfo()
12203
12204     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12205     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12206
12207     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12208                                                  for node in nodes.values()))
12209
12210     group2name_fn = lambda uuid: groups[uuid].name
12211
12212     for instance in self.wanted_instances:
12213       pnode = nodes[instance.primary_node]
12214
12215       if self.op.static or pnode.offline:
12216         remote_state = None
12217         if pnode.offline:
12218           self.LogWarning("Primary node %s is marked offline, returning static"
12219                           " information only for instance %s" %
12220                           (pnode.name, instance.name))
12221       else:
12222         remote_info = self.rpc.call_instance_info(instance.primary_node,
12223                                                   instance.name,
12224                                                   instance.hypervisor)
12225         remote_info.Raise("Error checking node %s" % instance.primary_node)
12226         remote_info = remote_info.payload
12227         if remote_info and "state" in remote_info:
12228           remote_state = "up"
12229         else:
12230           if instance.admin_state == constants.ADMINST_UP:
12231             remote_state = "down"
12232           else:
12233             remote_state = instance.admin_state
12234
12235       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12236                   instance.disks)
12237
12238       snodes_group_uuids = [nodes[snode_name].group
12239                             for snode_name in instance.secondary_nodes]
12240
12241       result[instance.name] = {
12242         "name": instance.name,
12243         "config_state": instance.admin_state,
12244         "run_state": remote_state,
12245         "pnode": instance.primary_node,
12246         "pnode_group_uuid": pnode.group,
12247         "pnode_group_name": group2name_fn(pnode.group),
12248         "snodes": instance.secondary_nodes,
12249         "snodes_group_uuids": snodes_group_uuids,
12250         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12251         "os": instance.os,
12252         # this happens to be the same format used for hooks
12253         "nics": _NICListToTuple(self, instance.nics),
12254         "disk_template": instance.disk_template,
12255         "disks": disks,
12256         "hypervisor": instance.hypervisor,
12257         "network_port": instance.network_port,
12258         "hv_instance": instance.hvparams,
12259         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12260         "be_instance": instance.beparams,
12261         "be_actual": cluster.FillBE(instance),
12262         "os_instance": instance.osparams,
12263         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12264         "serial_no": instance.serial_no,
12265         "mtime": instance.mtime,
12266         "ctime": instance.ctime,
12267         "uuid": instance.uuid,
12268         }
12269
12270     return result
12271
12272
12273 def PrepareContainerMods(mods, private_fn):
12274   """Prepares a list of container modifications by adding a private data field.
12275
12276   @type mods: list of tuples; (operation, index, parameters)
12277   @param mods: List of modifications
12278   @type private_fn: callable or None
12279   @param private_fn: Callable for constructing a private data field for a
12280     modification
12281   @rtype: list
12282
12283   """
12284   if private_fn is None:
12285     fn = lambda: None
12286   else:
12287     fn = private_fn
12288
12289   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12290
12291
12292 #: Type description for changes as returned by L{ApplyContainerMods}'s
12293 #: callbacks
12294 _TApplyContModsCbChanges = \
12295   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12296     ht.TNonEmptyString,
12297     ht.TAny,
12298     ])))
12299
12300
12301 def ApplyContainerMods(kind, container, chgdesc, mods,
12302                        create_fn, modify_fn, remove_fn):
12303   """Applies descriptions in C{mods} to C{container}.
12304
12305   @type kind: string
12306   @param kind: One-word item description
12307   @type container: list
12308   @param container: Container to modify
12309   @type chgdesc: None or list
12310   @param chgdesc: List of applied changes
12311   @type mods: list
12312   @param mods: Modifications as returned by L{PrepareContainerMods}
12313   @type create_fn: callable
12314   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12315     receives absolute item index, parameters and private data object as added
12316     by L{PrepareContainerMods}, returns tuple containing new item and changes
12317     as list
12318   @type modify_fn: callable
12319   @param modify_fn: Callback for modifying an existing item
12320     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12321     and private data object as added by L{PrepareContainerMods}, returns
12322     changes as list
12323   @type remove_fn: callable
12324   @param remove_fn: Callback on removing item; receives absolute item index,
12325     item and private data object as added by L{PrepareContainerMods}
12326
12327   """
12328   for (op, idx, params, private) in mods:
12329     if idx == -1:
12330       # Append
12331       absidx = len(container) - 1
12332     elif idx < 0:
12333       raise IndexError("Not accepting negative indices other than -1")
12334     elif idx > len(container):
12335       raise IndexError("Got %s index %s, but there are only %s" %
12336                        (kind, idx, len(container)))
12337     else:
12338       absidx = idx
12339
12340     changes = None
12341
12342     if op == constants.DDM_ADD:
12343       # Calculate where item will be added
12344       if idx == -1:
12345         addidx = len(container)
12346       else:
12347         addidx = idx
12348
12349       if create_fn is None:
12350         item = params
12351       else:
12352         (item, changes) = create_fn(addidx, params, private)
12353
12354       if idx == -1:
12355         container.append(item)
12356       else:
12357         assert idx >= 0
12358         assert idx <= len(container)
12359         # list.insert does so before the specified index
12360         container.insert(idx, item)
12361     else:
12362       # Retrieve existing item
12363       try:
12364         item = container[absidx]
12365       except IndexError:
12366         raise IndexError("Invalid %s index %s" % (kind, idx))
12367
12368       if op == constants.DDM_REMOVE:
12369         assert not params
12370
12371         if remove_fn is not None:
12372           remove_fn(absidx, item, private)
12373
12374         changes = [("%s/%s" % (kind, absidx), "remove")]
12375
12376         assert container[absidx] == item
12377         del container[absidx]
12378       elif op == constants.DDM_MODIFY:
12379         if modify_fn is not None:
12380           changes = modify_fn(absidx, item, params, private)
12381       else:
12382         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12383
12384     assert _TApplyContModsCbChanges(changes)
12385
12386     if not (chgdesc is None or changes is None):
12387       chgdesc.extend(changes)
12388
12389
12390 def _UpdateIvNames(base_index, disks):
12391   """Updates the C{iv_name} attribute of disks.
12392
12393   @type disks: list of L{objects.Disk}
12394
12395   """
12396   for (idx, disk) in enumerate(disks):
12397     disk.iv_name = "disk/%s" % (base_index + idx, )
12398
12399
12400 class _InstNicModPrivate:
12401   """Data structure for network interface modifications.
12402
12403   Used by L{LUInstanceSetParams}.
12404
12405   """
12406   def __init__(self):
12407     self.params = None
12408     self.filled = None
12409
12410
12411 class LUInstanceSetParams(LogicalUnit):
12412   """Modifies an instances's parameters.
12413
12414   """
12415   HPATH = "instance-modify"
12416   HTYPE = constants.HTYPE_INSTANCE
12417   REQ_BGL = False
12418
12419   @staticmethod
12420   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12421     assert ht.TList(mods)
12422     assert not mods or len(mods[0]) in (2, 3)
12423
12424     if mods and len(mods[0]) == 2:
12425       result = []
12426
12427       addremove = 0
12428       for op, params in mods:
12429         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12430           result.append((op, -1, params))
12431           addremove += 1
12432
12433           if addremove > 1:
12434             raise errors.OpPrereqError("Only one %s add or remove operation is"
12435                                        " supported at a time" % kind,
12436                                        errors.ECODE_INVAL)
12437         else:
12438           result.append((constants.DDM_MODIFY, op, params))
12439
12440       assert verify_fn(result)
12441     else:
12442       result = mods
12443
12444     return result
12445
12446   @staticmethod
12447   def _CheckMods(kind, mods, key_types, item_fn):
12448     """Ensures requested disk/NIC modifications are valid.
12449
12450     """
12451     for (op, _, params) in mods:
12452       assert ht.TDict(params)
12453
12454       utils.ForceDictType(params, key_types)
12455
12456       if op == constants.DDM_REMOVE:
12457         if params:
12458           raise errors.OpPrereqError("No settings should be passed when"
12459                                      " removing a %s" % kind,
12460                                      errors.ECODE_INVAL)
12461       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12462         item_fn(op, params)
12463       else:
12464         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12465
12466   @staticmethod
12467   def _VerifyDiskModification(op, params):
12468     """Verifies a disk modification.
12469
12470     """
12471     if op == constants.DDM_ADD:
12472       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12473       if mode not in constants.DISK_ACCESS_SET:
12474         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12475                                    errors.ECODE_INVAL)
12476
12477       size = params.get(constants.IDISK_SIZE, None)
12478       if size is None:
12479         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12480                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12481
12482       try:
12483         size = int(size)
12484       except (TypeError, ValueError), err:
12485         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12486                                    errors.ECODE_INVAL)
12487
12488       params[constants.IDISK_SIZE] = size
12489
12490     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12491       raise errors.OpPrereqError("Disk size change not possible, use"
12492                                  " grow-disk", errors.ECODE_INVAL)
12493
12494   @staticmethod
12495   def _VerifyNicModification(op, params):
12496     """Verifies a network interface modification.
12497
12498     """
12499     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12500       ip = params.get(constants.INIC_IP, None)
12501       if ip is None:
12502         pass
12503       elif ip.lower() == constants.VALUE_NONE:
12504         params[constants.INIC_IP] = None
12505       elif not netutils.IPAddress.IsValid(ip):
12506         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12507                                    errors.ECODE_INVAL)
12508
12509       bridge = params.get("bridge", None)
12510       link = params.get(constants.INIC_LINK, None)
12511       if bridge and link:
12512         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12513                                    " at the same time", errors.ECODE_INVAL)
12514       elif bridge and bridge.lower() == constants.VALUE_NONE:
12515         params["bridge"] = None
12516       elif link and link.lower() == constants.VALUE_NONE:
12517         params[constants.INIC_LINK] = None
12518
12519       if op == constants.DDM_ADD:
12520         macaddr = params.get(constants.INIC_MAC, None)
12521         if macaddr is None:
12522           params[constants.INIC_MAC] = constants.VALUE_AUTO
12523
12524       if constants.INIC_MAC in params:
12525         macaddr = params[constants.INIC_MAC]
12526         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12527           macaddr = utils.NormalizeAndValidateMac(macaddr)
12528
12529         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12530           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12531                                      " modifying an existing NIC",
12532                                      errors.ECODE_INVAL)
12533
12534   def CheckArguments(self):
12535     if not (self.op.nics or self.op.disks or self.op.disk_template or
12536             self.op.hvparams or self.op.beparams or self.op.os_name or
12537             self.op.offline is not None or self.op.runtime_mem):
12538       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12539
12540     if self.op.hvparams:
12541       _CheckGlobalHvParams(self.op.hvparams)
12542
12543     self.op.disks = self._UpgradeDiskNicMods(
12544       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12545     self.op.nics = self._UpgradeDiskNicMods(
12546       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12547
12548     # Check disk modifications
12549     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12550                     self._VerifyDiskModification)
12551
12552     if self.op.disks and self.op.disk_template is not None:
12553       raise errors.OpPrereqError("Disk template conversion and other disk"
12554                                  " changes not supported at the same time",
12555                                  errors.ECODE_INVAL)
12556
12557     if (self.op.disk_template and
12558         self.op.disk_template in constants.DTS_INT_MIRROR and
12559         self.op.remote_node is None):
12560       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12561                                  " one requires specifying a secondary node",
12562                                  errors.ECODE_INVAL)
12563
12564     # Check NIC modifications
12565     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12566                     self._VerifyNicModification)
12567
12568   def ExpandNames(self):
12569     self._ExpandAndLockInstance()
12570     # Can't even acquire node locks in shared mode as upcoming changes in
12571     # Ganeti 2.6 will start to modify the node object on disk conversion
12572     self.needed_locks[locking.LEVEL_NODE] = []
12573     self.needed_locks[locking.LEVEL_NODE_RES] = []
12574     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12575
12576   def DeclareLocks(self, level):
12577     # TODO: Acquire group lock in shared mode (disk parameters)
12578     if level == locking.LEVEL_NODE:
12579       self._LockInstancesNodes()
12580       if self.op.disk_template and self.op.remote_node:
12581         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12582         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12583     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12584       # Copy node locks
12585       self.needed_locks[locking.LEVEL_NODE_RES] = \
12586         self.needed_locks[locking.LEVEL_NODE][:]
12587
12588   def BuildHooksEnv(self):
12589     """Build hooks env.
12590
12591     This runs on the master, primary and secondaries.
12592
12593     """
12594     args = dict()
12595     if constants.BE_MINMEM in self.be_new:
12596       args["minmem"] = self.be_new[constants.BE_MINMEM]
12597     if constants.BE_MAXMEM in self.be_new:
12598       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12599     if constants.BE_VCPUS in self.be_new:
12600       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12601     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12602     # information at all.
12603
12604     if self._new_nics is not None:
12605       nics = []
12606
12607       for nic in self._new_nics:
12608         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12609         mode = nicparams[constants.NIC_MODE]
12610         link = nicparams[constants.NIC_LINK]
12611         nics.append((nic.ip, nic.mac, mode, link))
12612
12613       args["nics"] = nics
12614
12615     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12616     if self.op.disk_template:
12617       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12618     if self.op.runtime_mem:
12619       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12620
12621     return env
12622
12623   def BuildHooksNodes(self):
12624     """Build hooks nodes.
12625
12626     """
12627     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12628     return (nl, nl)
12629
12630   def _PrepareNicModification(self, params, private, old_ip, old_params,
12631                               cluster, pnode):
12632     update_params_dict = dict([(key, params[key])
12633                                for key in constants.NICS_PARAMETERS
12634                                if key in params])
12635
12636     if "bridge" in params:
12637       update_params_dict[constants.NIC_LINK] = params["bridge"]
12638
12639     new_params = _GetUpdatedParams(old_params, update_params_dict)
12640     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12641
12642     new_filled_params = cluster.SimpleFillNIC(new_params)
12643     objects.NIC.CheckParameterSyntax(new_filled_params)
12644
12645     new_mode = new_filled_params[constants.NIC_MODE]
12646     if new_mode == constants.NIC_MODE_BRIDGED:
12647       bridge = new_filled_params[constants.NIC_LINK]
12648       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12649       if msg:
12650         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12651         if self.op.force:
12652           self.warn.append(msg)
12653         else:
12654           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12655
12656     elif new_mode == constants.NIC_MODE_ROUTED:
12657       ip = params.get(constants.INIC_IP, old_ip)
12658       if ip is None:
12659         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12660                                    " on a routed NIC", errors.ECODE_INVAL)
12661
12662     if constants.INIC_MAC in params:
12663       mac = params[constants.INIC_MAC]
12664       if mac is None:
12665         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12666                                    errors.ECODE_INVAL)
12667       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12668         # otherwise generate the MAC address
12669         params[constants.INIC_MAC] = \
12670           self.cfg.GenerateMAC(self.proc.GetECId())
12671       else:
12672         # or validate/reserve the current one
12673         try:
12674           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12675         except errors.ReservationError:
12676           raise errors.OpPrereqError("MAC address '%s' already in use"
12677                                      " in cluster" % mac,
12678                                      errors.ECODE_NOTUNIQUE)
12679
12680     private.params = new_params
12681     private.filled = new_filled_params
12682
12683   def CheckPrereq(self):
12684     """Check prerequisites.
12685
12686     This only checks the instance list against the existing names.
12687
12688     """
12689     # checking the new params on the primary/secondary nodes
12690
12691     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12692     cluster = self.cluster = self.cfg.GetClusterInfo()
12693     assert self.instance is not None, \
12694       "Cannot retrieve locked instance %s" % self.op.instance_name
12695     pnode = instance.primary_node
12696     nodelist = list(instance.all_nodes)
12697     pnode_info = self.cfg.GetNodeInfo(pnode)
12698     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12699
12700     # Prepare disk/NIC modifications
12701     self.diskmod = PrepareContainerMods(self.op.disks, None)
12702     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12703
12704     # OS change
12705     if self.op.os_name and not self.op.force:
12706       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12707                       self.op.force_variant)
12708       instance_os = self.op.os_name
12709     else:
12710       instance_os = instance.os
12711
12712     assert not (self.op.disk_template and self.op.disks), \
12713       "Can't modify disk template and apply disk changes at the same time"
12714
12715     if self.op.disk_template:
12716       if instance.disk_template == self.op.disk_template:
12717         raise errors.OpPrereqError("Instance already has disk template %s" %
12718                                    instance.disk_template, errors.ECODE_INVAL)
12719
12720       if (instance.disk_template,
12721           self.op.disk_template) not in self._DISK_CONVERSIONS:
12722         raise errors.OpPrereqError("Unsupported disk template conversion from"
12723                                    " %s to %s" % (instance.disk_template,
12724                                                   self.op.disk_template),
12725                                    errors.ECODE_INVAL)
12726       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12727                           msg="cannot change disk template")
12728       if self.op.disk_template in constants.DTS_INT_MIRROR:
12729         if self.op.remote_node == pnode:
12730           raise errors.OpPrereqError("Given new secondary node %s is the same"
12731                                      " as the primary node of the instance" %
12732                                      self.op.remote_node, errors.ECODE_STATE)
12733         _CheckNodeOnline(self, self.op.remote_node)
12734         _CheckNodeNotDrained(self, self.op.remote_node)
12735         # FIXME: here we assume that the old instance type is DT_PLAIN
12736         assert instance.disk_template == constants.DT_PLAIN
12737         disks = [{constants.IDISK_SIZE: d.size,
12738                   constants.IDISK_VG: d.logical_id[0]}
12739                  for d in instance.disks]
12740         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12741         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12742
12743         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12744         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12745         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12746                                                                 snode_group)
12747         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12748                                 ignore=self.op.ignore_ipolicy)
12749         if pnode_info.group != snode_info.group:
12750           self.LogWarning("The primary and secondary nodes are in two"
12751                           " different node groups; the disk parameters"
12752                           " from the first disk's node group will be"
12753                           " used")
12754
12755     # hvparams processing
12756     if self.op.hvparams:
12757       hv_type = instance.hypervisor
12758       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12759       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12760       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12761
12762       # local check
12763       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12764       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12765       self.hv_proposed = self.hv_new = hv_new # the new actual values
12766       self.hv_inst = i_hvdict # the new dict (without defaults)
12767     else:
12768       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12769                                               instance.hvparams)
12770       self.hv_new = self.hv_inst = {}
12771
12772     # beparams processing
12773     if self.op.beparams:
12774       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12775                                    use_none=True)
12776       objects.UpgradeBeParams(i_bedict)
12777       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12778       be_new = cluster.SimpleFillBE(i_bedict)
12779       self.be_proposed = self.be_new = be_new # the new actual values
12780       self.be_inst = i_bedict # the new dict (without defaults)
12781     else:
12782       self.be_new = self.be_inst = {}
12783       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12784     be_old = cluster.FillBE(instance)
12785
12786     # CPU param validation -- checking every time a parameter is
12787     # changed to cover all cases where either CPU mask or vcpus have
12788     # changed
12789     if (constants.BE_VCPUS in self.be_proposed and
12790         constants.HV_CPU_MASK in self.hv_proposed):
12791       cpu_list = \
12792         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12793       # Verify mask is consistent with number of vCPUs. Can skip this
12794       # test if only 1 entry in the CPU mask, which means same mask
12795       # is applied to all vCPUs.
12796       if (len(cpu_list) > 1 and
12797           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12798         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12799                                    " CPU mask [%s]" %
12800                                    (self.be_proposed[constants.BE_VCPUS],
12801                                     self.hv_proposed[constants.HV_CPU_MASK]),
12802                                    errors.ECODE_INVAL)
12803
12804       # Only perform this test if a new CPU mask is given
12805       if constants.HV_CPU_MASK in self.hv_new:
12806         # Calculate the largest CPU number requested
12807         max_requested_cpu = max(map(max, cpu_list))
12808         # Check that all of the instance's nodes have enough physical CPUs to
12809         # satisfy the requested CPU mask
12810         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12811                                 max_requested_cpu + 1, instance.hypervisor)
12812
12813     # osparams processing
12814     if self.op.osparams:
12815       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12816       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12817       self.os_inst = i_osdict # the new dict (without defaults)
12818     else:
12819       self.os_inst = {}
12820
12821     self.warn = []
12822
12823     #TODO(dynmem): do the appropriate check involving MINMEM
12824     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12825         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12826       mem_check_list = [pnode]
12827       if be_new[constants.BE_AUTO_BALANCE]:
12828         # either we changed auto_balance to yes or it was from before
12829         mem_check_list.extend(instance.secondary_nodes)
12830       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12831                                                   instance.hypervisor)
12832       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12833                                          [instance.hypervisor])
12834       pninfo = nodeinfo[pnode]
12835       msg = pninfo.fail_msg
12836       if msg:
12837         # Assume the primary node is unreachable and go ahead
12838         self.warn.append("Can't get info from primary node %s: %s" %
12839                          (pnode, msg))
12840       else:
12841         (_, _, (pnhvinfo, )) = pninfo.payload
12842         if not isinstance(pnhvinfo.get("memory_free", None), int):
12843           self.warn.append("Node data from primary node %s doesn't contain"
12844                            " free memory information" % pnode)
12845         elif instance_info.fail_msg:
12846           self.warn.append("Can't get instance runtime information: %s" %
12847                            instance_info.fail_msg)
12848         else:
12849           if instance_info.payload:
12850             current_mem = int(instance_info.payload["memory"])
12851           else:
12852             # Assume instance not running
12853             # (there is a slight race condition here, but it's not very
12854             # probable, and we have no other way to check)
12855             # TODO: Describe race condition
12856             current_mem = 0
12857           #TODO(dynmem): do the appropriate check involving MINMEM
12858           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12859                       pnhvinfo["memory_free"])
12860           if miss_mem > 0:
12861             raise errors.OpPrereqError("This change will prevent the instance"
12862                                        " from starting, due to %d MB of memory"
12863                                        " missing on its primary node" %
12864                                        miss_mem, errors.ECODE_NORES)
12865
12866       if be_new[constants.BE_AUTO_BALANCE]:
12867         for node, nres in nodeinfo.items():
12868           if node not in instance.secondary_nodes:
12869             continue
12870           nres.Raise("Can't get info from secondary node %s" % node,
12871                      prereq=True, ecode=errors.ECODE_STATE)
12872           (_, _, (nhvinfo, )) = nres.payload
12873           if not isinstance(nhvinfo.get("memory_free", None), int):
12874             raise errors.OpPrereqError("Secondary node %s didn't return free"
12875                                        " memory information" % node,
12876                                        errors.ECODE_STATE)
12877           #TODO(dynmem): do the appropriate check involving MINMEM
12878           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12879             raise errors.OpPrereqError("This change will prevent the instance"
12880                                        " from failover to its secondary node"
12881                                        " %s, due to not enough memory" % node,
12882                                        errors.ECODE_STATE)
12883
12884     if self.op.runtime_mem:
12885       remote_info = self.rpc.call_instance_info(instance.primary_node,
12886                                                 instance.name,
12887                                                 instance.hypervisor)
12888       remote_info.Raise("Error checking node %s" % instance.primary_node)
12889       if not remote_info.payload: # not running already
12890         raise errors.OpPrereqError("Instance %s is not running" %
12891                                    instance.name, errors.ECODE_STATE)
12892
12893       current_memory = remote_info.payload["memory"]
12894       if (not self.op.force and
12895            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12896             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12897         raise errors.OpPrereqError("Instance %s must have memory between %d"
12898                                    " and %d MB of memory unless --force is"
12899                                    " given" %
12900                                    (instance.name,
12901                                     self.be_proposed[constants.BE_MINMEM],
12902                                     self.be_proposed[constants.BE_MAXMEM]),
12903                                    errors.ECODE_INVAL)
12904
12905       if self.op.runtime_mem > current_memory:
12906         _CheckNodeFreeMemory(self, instance.primary_node,
12907                              "ballooning memory for instance %s" %
12908                              instance.name,
12909                              self.op.memory - current_memory,
12910                              instance.hypervisor)
12911
12912     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12913       raise errors.OpPrereqError("Disk operations not supported for"
12914                                  " diskless instances", errors.ECODE_INVAL)
12915
12916     def _PrepareNicCreate(_, params, private):
12917       self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12918       return (None, None)
12919
12920     def _PrepareNicMod(_, nic, params, private):
12921       self._PrepareNicModification(params, private, nic.ip,
12922                                    nic.nicparams, cluster, pnode)
12923       return None
12924
12925     # Verify NIC changes (operating on copy)
12926     nics = instance.nics[:]
12927     ApplyContainerMods("NIC", nics, None, self.nicmod,
12928                        _PrepareNicCreate, _PrepareNicMod, None)
12929     if len(nics) > constants.MAX_NICS:
12930       raise errors.OpPrereqError("Instance has too many network interfaces"
12931                                  " (%d), cannot add more" % constants.MAX_NICS,
12932                                  errors.ECODE_STATE)
12933
12934     # Verify disk changes (operating on a copy)
12935     disks = instance.disks[:]
12936     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12937     if len(disks) > constants.MAX_DISKS:
12938       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12939                                  " more" % constants.MAX_DISKS,
12940                                  errors.ECODE_STATE)
12941
12942     if self.op.offline is not None:
12943       if self.op.offline:
12944         msg = "can't change to offline"
12945       else:
12946         msg = "can't change to online"
12947       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12948
12949     # Pre-compute NIC changes (necessary to use result in hooks)
12950     self._nic_chgdesc = []
12951     if self.nicmod:
12952       # Operate on copies as this is still in prereq
12953       nics = [nic.Copy() for nic in instance.nics]
12954       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12955                          self._CreateNewNic, self._ApplyNicMods, None)
12956       self._new_nics = nics
12957     else:
12958       self._new_nics = None
12959
12960   def _ConvertPlainToDrbd(self, feedback_fn):
12961     """Converts an instance from plain to drbd.
12962
12963     """
12964     feedback_fn("Converting template to drbd")
12965     instance = self.instance
12966     pnode = instance.primary_node
12967     snode = self.op.remote_node
12968
12969     assert instance.disk_template == constants.DT_PLAIN
12970
12971     # create a fake disk info for _GenerateDiskTemplate
12972     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12973                   constants.IDISK_VG: d.logical_id[0]}
12974                  for d in instance.disks]
12975     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12976                                       instance.name, pnode, [snode],
12977                                       disk_info, None, None, 0, feedback_fn,
12978                                       self.diskparams)
12979     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12980                                         self.diskparams)
12981     info = _GetInstanceInfoText(instance)
12982     feedback_fn("Creating additional volumes...")
12983     # first, create the missing data and meta devices
12984     for disk in anno_disks:
12985       # unfortunately this is... not too nice
12986       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12987                             info, True)
12988       for child in disk.children:
12989         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12990     # at this stage, all new LVs have been created, we can rename the
12991     # old ones
12992     feedback_fn("Renaming original volumes...")
12993     rename_list = [(o, n.children[0].logical_id)
12994                    for (o, n) in zip(instance.disks, new_disks)]
12995     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12996     result.Raise("Failed to rename original LVs")
12997
12998     feedback_fn("Initializing DRBD devices...")
12999     # all child devices are in place, we can now create the DRBD devices
13000     for disk in anno_disks:
13001       for node in [pnode, snode]:
13002         f_create = node == pnode
13003         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13004
13005     # at this point, the instance has been modified
13006     instance.disk_template = constants.DT_DRBD8
13007     instance.disks = new_disks
13008     self.cfg.Update(instance, feedback_fn)
13009
13010     # Release node locks while waiting for sync
13011     _ReleaseLocks(self, locking.LEVEL_NODE)
13012
13013     # disks are created, waiting for sync
13014     disk_abort = not _WaitForSync(self, instance,
13015                                   oneshot=not self.op.wait_for_sync)
13016     if disk_abort:
13017       raise errors.OpExecError("There are some degraded disks for"
13018                                " this instance, please cleanup manually")
13019
13020     # Node resource locks will be released by caller
13021
13022   def _ConvertDrbdToPlain(self, feedback_fn):
13023     """Converts an instance from drbd to plain.
13024
13025     """
13026     instance = self.instance
13027
13028     assert len(instance.secondary_nodes) == 1
13029     assert instance.disk_template == constants.DT_DRBD8
13030
13031     pnode = instance.primary_node
13032     snode = instance.secondary_nodes[0]
13033     feedback_fn("Converting template to plain")
13034
13035     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13036     new_disks = [d.children[0] for d in instance.disks]
13037
13038     # copy over size and mode
13039     for parent, child in zip(old_disks, new_disks):
13040       child.size = parent.size
13041       child.mode = parent.mode
13042
13043     # this is a DRBD disk, return its port to the pool
13044     # NOTE: this must be done right before the call to cfg.Update!
13045     for disk in old_disks:
13046       tcp_port = disk.logical_id[2]
13047       self.cfg.AddTcpUdpPort(tcp_port)
13048
13049     # update instance structure
13050     instance.disks = new_disks
13051     instance.disk_template = constants.DT_PLAIN
13052     self.cfg.Update(instance, feedback_fn)
13053
13054     # Release locks in case removing disks takes a while
13055     _ReleaseLocks(self, locking.LEVEL_NODE)
13056
13057     feedback_fn("Removing volumes on the secondary node...")
13058     for disk in old_disks:
13059       self.cfg.SetDiskID(disk, snode)
13060       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13061       if msg:
13062         self.LogWarning("Could not remove block device %s on node %s,"
13063                         " continuing anyway: %s", disk.iv_name, snode, msg)
13064
13065     feedback_fn("Removing unneeded volumes on the primary node...")
13066     for idx, disk in enumerate(old_disks):
13067       meta = disk.children[1]
13068       self.cfg.SetDiskID(meta, pnode)
13069       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13070       if msg:
13071         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13072                         " continuing anyway: %s", idx, pnode, msg)
13073
13074   def _CreateNewDisk(self, idx, params, _):
13075     """Creates a new disk.
13076
13077     """
13078     instance = self.instance
13079
13080     # add a new disk
13081     if instance.disk_template in constants.DTS_FILEBASED:
13082       (file_driver, file_path) = instance.disks[0].logical_id
13083       file_path = os.path.dirname(file_path)
13084     else:
13085       file_driver = file_path = None
13086
13087     disk = \
13088       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13089                             instance.primary_node, instance.secondary_nodes,
13090                             [params], file_path, file_driver, idx,
13091                             self.Log, self.diskparams)[0]
13092
13093     info = _GetInstanceInfoText(instance)
13094
13095     logging.info("Creating volume %s for instance %s",
13096                  disk.iv_name, instance.name)
13097     # Note: this needs to be kept in sync with _CreateDisks
13098     #HARDCODE
13099     for node in instance.all_nodes:
13100       f_create = (node == instance.primary_node)
13101       try:
13102         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13103       except errors.OpExecError, err:
13104         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13105                         disk.iv_name, disk, node, err)
13106
13107     return (disk, [
13108       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13109       ])
13110
13111   @staticmethod
13112   def _ModifyDisk(idx, disk, params, _):
13113     """Modifies a disk.
13114
13115     """
13116     disk.mode = params[constants.IDISK_MODE]
13117
13118     return [
13119       ("disk.mode/%d" % idx, disk.mode),
13120       ]
13121
13122   def _RemoveDisk(self, idx, root, _):
13123     """Removes a disk.
13124
13125     """
13126     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13127     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13128       self.cfg.SetDiskID(disk, node)
13129       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13130       if msg:
13131         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13132                         " continuing anyway", idx, node, msg)
13133
13134     # if this is a DRBD disk, return its port to the pool
13135     if root.dev_type in constants.LDS_DRBD:
13136       self.cfg.AddTcpUdpPort(root.logical_id[2])
13137
13138   @staticmethod
13139   def _CreateNewNic(idx, params, private):
13140     """Creates data structure for a new network interface.
13141
13142     """
13143     mac = params[constants.INIC_MAC]
13144     ip = params.get(constants.INIC_IP, None)
13145     nicparams = private.params
13146
13147     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
13148       ("nic.%d" % idx,
13149        "add:mac=%s,ip=%s,mode=%s,link=%s" %
13150        (mac, ip, private.filled[constants.NIC_MODE],
13151        private.filled[constants.NIC_LINK])),
13152       ])
13153
13154   @staticmethod
13155   def _ApplyNicMods(idx, nic, params, private):
13156     """Modifies a network interface.
13157
13158     """
13159     changes = []
13160
13161     for key in [constants.INIC_MAC, constants.INIC_IP]:
13162       if key in params:
13163         changes.append(("nic.%s/%d" % (key, idx), params[key]))
13164         setattr(nic, key, params[key])
13165
13166     if private.params:
13167       nic.nicparams = private.params
13168
13169       for (key, val) in params.items():
13170         changes.append(("nic.%s/%d" % (key, idx), val))
13171
13172     return changes
13173
13174   def Exec(self, feedback_fn):
13175     """Modifies an instance.
13176
13177     All parameters take effect only at the next restart of the instance.
13178
13179     """
13180     # Process here the warnings from CheckPrereq, as we don't have a
13181     # feedback_fn there.
13182     # TODO: Replace with self.LogWarning
13183     for warn in self.warn:
13184       feedback_fn("WARNING: %s" % warn)
13185
13186     assert ((self.op.disk_template is None) ^
13187             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13188       "Not owning any node resource locks"
13189
13190     result = []
13191     instance = self.instance
13192
13193     # runtime memory
13194     if self.op.runtime_mem:
13195       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13196                                                      instance,
13197                                                      self.op.runtime_mem)
13198       rpcres.Raise("Cannot modify instance runtime memory")
13199       result.append(("runtime_memory", self.op.runtime_mem))
13200
13201     # Apply disk changes
13202     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13203                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13204     _UpdateIvNames(0, instance.disks)
13205
13206     if self.op.disk_template:
13207       if __debug__:
13208         check_nodes = set(instance.all_nodes)
13209         if self.op.remote_node:
13210           check_nodes.add(self.op.remote_node)
13211         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13212           owned = self.owned_locks(level)
13213           assert not (check_nodes - owned), \
13214             ("Not owning the correct locks, owning %r, expected at least %r" %
13215              (owned, check_nodes))
13216
13217       r_shut = _ShutdownInstanceDisks(self, instance)
13218       if not r_shut:
13219         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13220                                  " proceed with disk template conversion")
13221       mode = (instance.disk_template, self.op.disk_template)
13222       try:
13223         self._DISK_CONVERSIONS[mode](self, feedback_fn)
13224       except:
13225         self.cfg.ReleaseDRBDMinors(instance.name)
13226         raise
13227       result.append(("disk_template", self.op.disk_template))
13228
13229       assert instance.disk_template == self.op.disk_template, \
13230         ("Expected disk template '%s', found '%s'" %
13231          (self.op.disk_template, instance.disk_template))
13232
13233     # Release node and resource locks if there are any (they might already have
13234     # been released during disk conversion)
13235     _ReleaseLocks(self, locking.LEVEL_NODE)
13236     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13237
13238     # Apply NIC changes
13239     if self._new_nics is not None:
13240       instance.nics = self._new_nics
13241       result.extend(self._nic_chgdesc)
13242
13243     # hvparams changes
13244     if self.op.hvparams:
13245       instance.hvparams = self.hv_inst
13246       for key, val in self.op.hvparams.iteritems():
13247         result.append(("hv/%s" % key, val))
13248
13249     # beparams changes
13250     if self.op.beparams:
13251       instance.beparams = self.be_inst
13252       for key, val in self.op.beparams.iteritems():
13253         result.append(("be/%s" % key, val))
13254
13255     # OS change
13256     if self.op.os_name:
13257       instance.os = self.op.os_name
13258
13259     # osparams changes
13260     if self.op.osparams:
13261       instance.osparams = self.os_inst
13262       for key, val in self.op.osparams.iteritems():
13263         result.append(("os/%s" % key, val))
13264
13265     if self.op.offline is None:
13266       # Ignore
13267       pass
13268     elif self.op.offline:
13269       # Mark instance as offline
13270       self.cfg.MarkInstanceOffline(instance.name)
13271       result.append(("admin_state", constants.ADMINST_OFFLINE))
13272     else:
13273       # Mark instance as online, but stopped
13274       self.cfg.MarkInstanceDown(instance.name)
13275       result.append(("admin_state", constants.ADMINST_DOWN))
13276
13277     self.cfg.Update(instance, feedback_fn)
13278
13279     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13280                 self.owned_locks(locking.LEVEL_NODE)), \
13281       "All node locks should have been released by now"
13282
13283     return result
13284
13285   _DISK_CONVERSIONS = {
13286     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13287     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13288     }
13289
13290
13291 class LUInstanceChangeGroup(LogicalUnit):
13292   HPATH = "instance-change-group"
13293   HTYPE = constants.HTYPE_INSTANCE
13294   REQ_BGL = False
13295
13296   def ExpandNames(self):
13297     self.share_locks = _ShareAll()
13298     self.needed_locks = {
13299       locking.LEVEL_NODEGROUP: [],
13300       locking.LEVEL_NODE: [],
13301       }
13302
13303     self._ExpandAndLockInstance()
13304
13305     if self.op.target_groups:
13306       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13307                                   self.op.target_groups)
13308     else:
13309       self.req_target_uuids = None
13310
13311     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13312
13313   def DeclareLocks(self, level):
13314     if level == locking.LEVEL_NODEGROUP:
13315       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13316
13317       if self.req_target_uuids:
13318         lock_groups = set(self.req_target_uuids)
13319
13320         # Lock all groups used by instance optimistically; this requires going
13321         # via the node before it's locked, requiring verification later on
13322         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13323         lock_groups.update(instance_groups)
13324       else:
13325         # No target groups, need to lock all of them
13326         lock_groups = locking.ALL_SET
13327
13328       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13329
13330     elif level == locking.LEVEL_NODE:
13331       if self.req_target_uuids:
13332         # Lock all nodes used by instances
13333         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13334         self._LockInstancesNodes()
13335
13336         # Lock all nodes in all potential target groups
13337         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13338                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13339         member_nodes = [node_name
13340                         for group in lock_groups
13341                         for node_name in self.cfg.GetNodeGroup(group).members]
13342         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13343       else:
13344         # Lock all nodes as all groups are potential targets
13345         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13346
13347   def CheckPrereq(self):
13348     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13349     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13350     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13351
13352     assert (self.req_target_uuids is None or
13353             owned_groups.issuperset(self.req_target_uuids))
13354     assert owned_instances == set([self.op.instance_name])
13355
13356     # Get instance information
13357     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13358
13359     # Check if node groups for locked instance are still correct
13360     assert owned_nodes.issuperset(self.instance.all_nodes), \
13361       ("Instance %s's nodes changed while we kept the lock" %
13362        self.op.instance_name)
13363
13364     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13365                                            owned_groups)
13366
13367     if self.req_target_uuids:
13368       # User requested specific target groups
13369       self.target_uuids = frozenset(self.req_target_uuids)
13370     else:
13371       # All groups except those used by the instance are potential targets
13372       self.target_uuids = owned_groups - inst_groups
13373
13374     conflicting_groups = self.target_uuids & inst_groups
13375     if conflicting_groups:
13376       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13377                                  " used by the instance '%s'" %
13378                                  (utils.CommaJoin(conflicting_groups),
13379                                   self.op.instance_name),
13380                                  errors.ECODE_INVAL)
13381
13382     if not self.target_uuids:
13383       raise errors.OpPrereqError("There are no possible target groups",
13384                                  errors.ECODE_INVAL)
13385
13386   def BuildHooksEnv(self):
13387     """Build hooks env.
13388
13389     """
13390     assert self.target_uuids
13391
13392     env = {
13393       "TARGET_GROUPS": " ".join(self.target_uuids),
13394       }
13395
13396     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13397
13398     return env
13399
13400   def BuildHooksNodes(self):
13401     """Build hooks nodes.
13402
13403     """
13404     mn = self.cfg.GetMasterNode()
13405     return ([mn], [mn])
13406
13407   def Exec(self, feedback_fn):
13408     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13409
13410     assert instances == [self.op.instance_name], "Instance not locked"
13411
13412     req = iallocator.IAReqGroupChange(instances=instances,
13413                                       target_groups=list(self.target_uuids))
13414     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13415
13416     ial.Run(self.op.iallocator)
13417
13418     if not ial.success:
13419       raise errors.OpPrereqError("Can't compute solution for changing group of"
13420                                  " instance '%s' using iallocator '%s': %s" %
13421                                  (self.op.instance_name, self.op.iallocator,
13422                                   ial.info), errors.ECODE_NORES)
13423
13424     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13425
13426     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13427                  " instance '%s'", len(jobs), self.op.instance_name)
13428
13429     return ResultWithJobs(jobs)
13430
13431
13432 class LUBackupQuery(NoHooksLU):
13433   """Query the exports list
13434
13435   """
13436   REQ_BGL = False
13437
13438   def CheckArguments(self):
13439     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13440                              ["node", "export"], self.op.use_locking)
13441
13442   def ExpandNames(self):
13443     self.expq.ExpandNames(self)
13444
13445   def DeclareLocks(self, level):
13446     self.expq.DeclareLocks(self, level)
13447
13448   def Exec(self, feedback_fn):
13449     result = {}
13450
13451     for (node, expname) in self.expq.OldStyleQuery(self):
13452       if expname is None:
13453         result[node] = False
13454       else:
13455         result.setdefault(node, []).append(expname)
13456
13457     return result
13458
13459
13460 class _ExportQuery(_QueryBase):
13461   FIELDS = query.EXPORT_FIELDS
13462
13463   #: The node name is not a unique key for this query
13464   SORT_FIELD = "node"
13465
13466   def ExpandNames(self, lu):
13467     lu.needed_locks = {}
13468
13469     # The following variables interact with _QueryBase._GetNames
13470     if self.names:
13471       self.wanted = _GetWantedNodes(lu, self.names)
13472     else:
13473       self.wanted = locking.ALL_SET
13474
13475     self.do_locking = self.use_locking
13476
13477     if self.do_locking:
13478       lu.share_locks = _ShareAll()
13479       lu.needed_locks = {
13480         locking.LEVEL_NODE: self.wanted,
13481         }
13482
13483   def DeclareLocks(self, lu, level):
13484     pass
13485
13486   def _GetQueryData(self, lu):
13487     """Computes the list of nodes and their attributes.
13488
13489     """
13490     # Locking is not used
13491     # TODO
13492     assert not (compat.any(lu.glm.is_owned(level)
13493                            for level in locking.LEVELS
13494                            if level != locking.LEVEL_CLUSTER) or
13495                 self.do_locking or self.use_locking)
13496
13497     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13498
13499     result = []
13500
13501     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13502       if nres.fail_msg:
13503         result.append((node, None))
13504       else:
13505         result.extend((node, expname) for expname in nres.payload)
13506
13507     return result
13508
13509
13510 class LUBackupPrepare(NoHooksLU):
13511   """Prepares an instance for an export and returns useful information.
13512
13513   """
13514   REQ_BGL = False
13515
13516   def ExpandNames(self):
13517     self._ExpandAndLockInstance()
13518
13519   def CheckPrereq(self):
13520     """Check prerequisites.
13521
13522     """
13523     instance_name = self.op.instance_name
13524
13525     self.instance = self.cfg.GetInstanceInfo(instance_name)
13526     assert self.instance is not None, \
13527           "Cannot retrieve locked instance %s" % self.op.instance_name
13528     _CheckNodeOnline(self, self.instance.primary_node)
13529
13530     self._cds = _GetClusterDomainSecret()
13531
13532   def Exec(self, feedback_fn):
13533     """Prepares an instance for an export.
13534
13535     """
13536     instance = self.instance
13537
13538     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13539       salt = utils.GenerateSecret(8)
13540
13541       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13542       result = self.rpc.call_x509_cert_create(instance.primary_node,
13543                                               constants.RIE_CERT_VALIDITY)
13544       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13545
13546       (name, cert_pem) = result.payload
13547
13548       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13549                                              cert_pem)
13550
13551       return {
13552         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13553         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13554                           salt),
13555         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13556         }
13557
13558     return None
13559
13560
13561 class LUBackupExport(LogicalUnit):
13562   """Export an instance to an image in the cluster.
13563
13564   """
13565   HPATH = "instance-export"
13566   HTYPE = constants.HTYPE_INSTANCE
13567   REQ_BGL = False
13568
13569   def CheckArguments(self):
13570     """Check the arguments.
13571
13572     """
13573     self.x509_key_name = self.op.x509_key_name
13574     self.dest_x509_ca_pem = self.op.destination_x509_ca
13575
13576     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13577       if not self.x509_key_name:
13578         raise errors.OpPrereqError("Missing X509 key name for encryption",
13579                                    errors.ECODE_INVAL)
13580
13581       if not self.dest_x509_ca_pem:
13582         raise errors.OpPrereqError("Missing destination X509 CA",
13583                                    errors.ECODE_INVAL)
13584
13585   def ExpandNames(self):
13586     self._ExpandAndLockInstance()
13587
13588     # Lock all nodes for local exports
13589     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13590       # FIXME: lock only instance primary and destination node
13591       #
13592       # Sad but true, for now we have do lock all nodes, as we don't know where
13593       # the previous export might be, and in this LU we search for it and
13594       # remove it from its current node. In the future we could fix this by:
13595       #  - making a tasklet to search (share-lock all), then create the
13596       #    new one, then one to remove, after
13597       #  - removing the removal operation altogether
13598       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13599
13600   def DeclareLocks(self, level):
13601     """Last minute lock declaration."""
13602     # All nodes are locked anyway, so nothing to do here.
13603
13604   def BuildHooksEnv(self):
13605     """Build hooks env.
13606
13607     This will run on the master, primary node and target node.
13608
13609     """
13610     env = {
13611       "EXPORT_MODE": self.op.mode,
13612       "EXPORT_NODE": self.op.target_node,
13613       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13614       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13615       # TODO: Generic function for boolean env variables
13616       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13617       }
13618
13619     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13620
13621     return env
13622
13623   def BuildHooksNodes(self):
13624     """Build hooks nodes.
13625
13626     """
13627     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13628
13629     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13630       nl.append(self.op.target_node)
13631
13632     return (nl, nl)
13633
13634   def CheckPrereq(self):
13635     """Check prerequisites.
13636
13637     This checks that the instance and node names are valid.
13638
13639     """
13640     instance_name = self.op.instance_name
13641
13642     self.instance = self.cfg.GetInstanceInfo(instance_name)
13643     assert self.instance is not None, \
13644           "Cannot retrieve locked instance %s" % self.op.instance_name
13645     _CheckNodeOnline(self, self.instance.primary_node)
13646
13647     if (self.op.remove_instance and
13648         self.instance.admin_state == constants.ADMINST_UP and
13649         not self.op.shutdown):
13650       raise errors.OpPrereqError("Can not remove instance without shutting it"
13651                                  " down before", errors.ECODE_STATE)
13652
13653     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13654       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13655       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13656       assert self.dst_node is not None
13657
13658       _CheckNodeOnline(self, self.dst_node.name)
13659       _CheckNodeNotDrained(self, self.dst_node.name)
13660
13661       self._cds = None
13662       self.dest_disk_info = None
13663       self.dest_x509_ca = None
13664
13665     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13666       self.dst_node = None
13667
13668       if len(self.op.target_node) != len(self.instance.disks):
13669         raise errors.OpPrereqError(("Received destination information for %s"
13670                                     " disks, but instance %s has %s disks") %
13671                                    (len(self.op.target_node), instance_name,
13672                                     len(self.instance.disks)),
13673                                    errors.ECODE_INVAL)
13674
13675       cds = _GetClusterDomainSecret()
13676
13677       # Check X509 key name
13678       try:
13679         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13680       except (TypeError, ValueError), err:
13681         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13682                                    errors.ECODE_INVAL)
13683
13684       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13685         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13686                                    errors.ECODE_INVAL)
13687
13688       # Load and verify CA
13689       try:
13690         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13691       except OpenSSL.crypto.Error, err:
13692         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13693                                    (err, ), errors.ECODE_INVAL)
13694
13695       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13696       if errcode is not None:
13697         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13698                                    (msg, ), errors.ECODE_INVAL)
13699
13700       self.dest_x509_ca = cert
13701
13702       # Verify target information
13703       disk_info = []
13704       for idx, disk_data in enumerate(self.op.target_node):
13705         try:
13706           (host, port, magic) = \
13707             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13708         except errors.GenericError, err:
13709           raise errors.OpPrereqError("Target info for disk %s: %s" %
13710                                      (idx, err), errors.ECODE_INVAL)
13711
13712         disk_info.append((host, port, magic))
13713
13714       assert len(disk_info) == len(self.op.target_node)
13715       self.dest_disk_info = disk_info
13716
13717     else:
13718       raise errors.ProgrammerError("Unhandled export mode %r" %
13719                                    self.op.mode)
13720
13721     # instance disk type verification
13722     # TODO: Implement export support for file-based disks
13723     for disk in self.instance.disks:
13724       if disk.dev_type == constants.LD_FILE:
13725         raise errors.OpPrereqError("Export not supported for instances with"
13726                                    " file-based disks", errors.ECODE_INVAL)
13727
13728   def _CleanupExports(self, feedback_fn):
13729     """Removes exports of current instance from all other nodes.
13730
13731     If an instance in a cluster with nodes A..D was exported to node C, its
13732     exports will be removed from the nodes A, B and D.
13733
13734     """
13735     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13736
13737     nodelist = self.cfg.GetNodeList()
13738     nodelist.remove(self.dst_node.name)
13739
13740     # on one-node clusters nodelist will be empty after the removal
13741     # if we proceed the backup would be removed because OpBackupQuery
13742     # substitutes an empty list with the full cluster node list.
13743     iname = self.instance.name
13744     if nodelist:
13745       feedback_fn("Removing old exports for instance %s" % iname)
13746       exportlist = self.rpc.call_export_list(nodelist)
13747       for node in exportlist:
13748         if exportlist[node].fail_msg:
13749           continue
13750         if iname in exportlist[node].payload:
13751           msg = self.rpc.call_export_remove(node, iname).fail_msg
13752           if msg:
13753             self.LogWarning("Could not remove older export for instance %s"
13754                             " on node %s: %s", iname, node, msg)
13755
13756   def Exec(self, feedback_fn):
13757     """Export an instance to an image in the cluster.
13758
13759     """
13760     assert self.op.mode in constants.EXPORT_MODES
13761
13762     instance = self.instance
13763     src_node = instance.primary_node
13764
13765     if self.op.shutdown:
13766       # shutdown the instance, but not the disks
13767       feedback_fn("Shutting down instance %s" % instance.name)
13768       result = self.rpc.call_instance_shutdown(src_node, instance,
13769                                                self.op.shutdown_timeout)
13770       # TODO: Maybe ignore failures if ignore_remove_failures is set
13771       result.Raise("Could not shutdown instance %s on"
13772                    " node %s" % (instance.name, src_node))
13773
13774     # set the disks ID correctly since call_instance_start needs the
13775     # correct drbd minor to create the symlinks
13776     for disk in instance.disks:
13777       self.cfg.SetDiskID(disk, src_node)
13778
13779     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13780
13781     if activate_disks:
13782       # Activate the instance disks if we'exporting a stopped instance
13783       feedback_fn("Activating disks for %s" % instance.name)
13784       _StartInstanceDisks(self, instance, None)
13785
13786     try:
13787       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13788                                                      instance)
13789
13790       helper.CreateSnapshots()
13791       try:
13792         if (self.op.shutdown and
13793             instance.admin_state == constants.ADMINST_UP and
13794             not self.op.remove_instance):
13795           assert not activate_disks
13796           feedback_fn("Starting instance %s" % instance.name)
13797           result = self.rpc.call_instance_start(src_node,
13798                                                 (instance, None, None), False)
13799           msg = result.fail_msg
13800           if msg:
13801             feedback_fn("Failed to start instance: %s" % msg)
13802             _ShutdownInstanceDisks(self, instance)
13803             raise errors.OpExecError("Could not start instance: %s" % msg)
13804
13805         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13806           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13807         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13808           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13809           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13810
13811           (key_name, _, _) = self.x509_key_name
13812
13813           dest_ca_pem = \
13814             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13815                                             self.dest_x509_ca)
13816
13817           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13818                                                      key_name, dest_ca_pem,
13819                                                      timeouts)
13820       finally:
13821         helper.Cleanup()
13822
13823       # Check for backwards compatibility
13824       assert len(dresults) == len(instance.disks)
13825       assert compat.all(isinstance(i, bool) for i in dresults), \
13826              "Not all results are boolean: %r" % dresults
13827
13828     finally:
13829       if activate_disks:
13830         feedback_fn("Deactivating disks for %s" % instance.name)
13831         _ShutdownInstanceDisks(self, instance)
13832
13833     if not (compat.all(dresults) and fin_resu):
13834       failures = []
13835       if not fin_resu:
13836         failures.append("export finalization")
13837       if not compat.all(dresults):
13838         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13839                                if not dsk)
13840         failures.append("disk export: disk(s) %s" % fdsk)
13841
13842       raise errors.OpExecError("Export failed, errors in %s" %
13843                                utils.CommaJoin(failures))
13844
13845     # At this point, the export was successful, we can cleanup/finish
13846
13847     # Remove instance if requested
13848     if self.op.remove_instance:
13849       feedback_fn("Removing instance %s" % instance.name)
13850       _RemoveInstance(self, feedback_fn, instance,
13851                       self.op.ignore_remove_failures)
13852
13853     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13854       self._CleanupExports(feedback_fn)
13855
13856     return fin_resu, dresults
13857
13858
13859 class LUBackupRemove(NoHooksLU):
13860   """Remove exports related to the named instance.
13861
13862   """
13863   REQ_BGL = False
13864
13865   def ExpandNames(self):
13866     self.needed_locks = {}
13867     # We need all nodes to be locked in order for RemoveExport to work, but we
13868     # don't need to lock the instance itself, as nothing will happen to it (and
13869     # we can remove exports also for a removed instance)
13870     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13871
13872   def Exec(self, feedback_fn):
13873     """Remove any export.
13874
13875     """
13876     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13877     # If the instance was not found we'll try with the name that was passed in.
13878     # This will only work if it was an FQDN, though.
13879     fqdn_warn = False
13880     if not instance_name:
13881       fqdn_warn = True
13882       instance_name = self.op.instance_name
13883
13884     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13885     exportlist = self.rpc.call_export_list(locked_nodes)
13886     found = False
13887     for node in exportlist:
13888       msg = exportlist[node].fail_msg
13889       if msg:
13890         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13891         continue
13892       if instance_name in exportlist[node].payload:
13893         found = True
13894         result = self.rpc.call_export_remove(node, instance_name)
13895         msg = result.fail_msg
13896         if msg:
13897           logging.error("Could not remove export for instance %s"
13898                         " on node %s: %s", instance_name, node, msg)
13899
13900     if fqdn_warn and not found:
13901       feedback_fn("Export not found. If trying to remove an export belonging"
13902                   " to a deleted instance please use its Fully Qualified"
13903                   " Domain Name.")
13904
13905
13906 class LUGroupAdd(LogicalUnit):
13907   """Logical unit for creating node groups.
13908
13909   """
13910   HPATH = "group-add"
13911   HTYPE = constants.HTYPE_GROUP
13912   REQ_BGL = False
13913
13914   def ExpandNames(self):
13915     # We need the new group's UUID here so that we can create and acquire the
13916     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13917     # that it should not check whether the UUID exists in the configuration.
13918     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13919     self.needed_locks = {}
13920     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13921
13922   def CheckPrereq(self):
13923     """Check prerequisites.
13924
13925     This checks that the given group name is not an existing node group
13926     already.
13927
13928     """
13929     try:
13930       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13931     except errors.OpPrereqError:
13932       pass
13933     else:
13934       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13935                                  " node group (UUID: %s)" %
13936                                  (self.op.group_name, existing_uuid),
13937                                  errors.ECODE_EXISTS)
13938
13939     if self.op.ndparams:
13940       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13941
13942     if self.op.hv_state:
13943       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13944     else:
13945       self.new_hv_state = None
13946
13947     if self.op.disk_state:
13948       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13949     else:
13950       self.new_disk_state = None
13951
13952     if self.op.diskparams:
13953       for templ in constants.DISK_TEMPLATES:
13954         if templ in self.op.diskparams:
13955           utils.ForceDictType(self.op.diskparams[templ],
13956                               constants.DISK_DT_TYPES)
13957       self.new_diskparams = self.op.diskparams
13958       try:
13959         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13960       except errors.OpPrereqError, err:
13961         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13962                                    errors.ECODE_INVAL)
13963     else:
13964       self.new_diskparams = {}
13965
13966     if self.op.ipolicy:
13967       cluster = self.cfg.GetClusterInfo()
13968       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13969       try:
13970         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13971       except errors.ConfigurationError, err:
13972         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13973                                    errors.ECODE_INVAL)
13974
13975   def BuildHooksEnv(self):
13976     """Build hooks env.
13977
13978     """
13979     return {
13980       "GROUP_NAME": self.op.group_name,
13981       }
13982
13983   def BuildHooksNodes(self):
13984     """Build hooks nodes.
13985
13986     """
13987     mn = self.cfg.GetMasterNode()
13988     return ([mn], [mn])
13989
13990   def Exec(self, feedback_fn):
13991     """Add the node group to the cluster.
13992
13993     """
13994     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13995                                   uuid=self.group_uuid,
13996                                   alloc_policy=self.op.alloc_policy,
13997                                   ndparams=self.op.ndparams,
13998                                   diskparams=self.new_diskparams,
13999                                   ipolicy=self.op.ipolicy,
14000                                   hv_state_static=self.new_hv_state,
14001                                   disk_state_static=self.new_disk_state)
14002
14003     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14004     del self.remove_locks[locking.LEVEL_NODEGROUP]
14005
14006
14007 class LUGroupAssignNodes(NoHooksLU):
14008   """Logical unit for assigning nodes to groups.
14009
14010   """
14011   REQ_BGL = False
14012
14013   def ExpandNames(self):
14014     # These raise errors.OpPrereqError on their own:
14015     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14016     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14017
14018     # We want to lock all the affected nodes and groups. We have readily
14019     # available the list of nodes, and the *destination* group. To gather the
14020     # list of "source" groups, we need to fetch node information later on.
14021     self.needed_locks = {
14022       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14023       locking.LEVEL_NODE: self.op.nodes,
14024       }
14025
14026   def DeclareLocks(self, level):
14027     if level == locking.LEVEL_NODEGROUP:
14028       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14029
14030       # Try to get all affected nodes' groups without having the group or node
14031       # lock yet. Needs verification later in the code flow.
14032       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14033
14034       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14035
14036   def CheckPrereq(self):
14037     """Check prerequisites.
14038
14039     """
14040     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14041     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14042             frozenset(self.op.nodes))
14043
14044     expected_locks = (set([self.group_uuid]) |
14045                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14046     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14047     if actual_locks != expected_locks:
14048       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14049                                " current groups are '%s', used to be '%s'" %
14050                                (utils.CommaJoin(expected_locks),
14051                                 utils.CommaJoin(actual_locks)))
14052
14053     self.node_data = self.cfg.GetAllNodesInfo()
14054     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14055     instance_data = self.cfg.GetAllInstancesInfo()
14056
14057     if self.group is None:
14058       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14059                                (self.op.group_name, self.group_uuid))
14060
14061     (new_splits, previous_splits) = \
14062       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14063                                              for node in self.op.nodes],
14064                                             self.node_data, instance_data)
14065
14066     if new_splits:
14067       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14068
14069       if not self.op.force:
14070         raise errors.OpExecError("The following instances get split by this"
14071                                  " change and --force was not given: %s" %
14072                                  fmt_new_splits)
14073       else:
14074         self.LogWarning("This operation will split the following instances: %s",
14075                         fmt_new_splits)
14076
14077         if previous_splits:
14078           self.LogWarning("In addition, these already-split instances continue"
14079                           " to be split across groups: %s",
14080                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14081
14082   def Exec(self, feedback_fn):
14083     """Assign nodes to a new group.
14084
14085     """
14086     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14087
14088     self.cfg.AssignGroupNodes(mods)
14089
14090   @staticmethod
14091   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14092     """Check for split instances after a node assignment.
14093
14094     This method considers a series of node assignments as an atomic operation,
14095     and returns information about split instances after applying the set of
14096     changes.
14097
14098     In particular, it returns information about newly split instances, and
14099     instances that were already split, and remain so after the change.
14100
14101     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14102     considered.
14103
14104     @type changes: list of (node_name, new_group_uuid) pairs.
14105     @param changes: list of node assignments to consider.
14106     @param node_data: a dict with data for all nodes
14107     @param instance_data: a dict with all instances to consider
14108     @rtype: a two-tuple
14109     @return: a list of instances that were previously okay and result split as a
14110       consequence of this change, and a list of instances that were previously
14111       split and this change does not fix.
14112
14113     """
14114     changed_nodes = dict((node, group) for node, group in changes
14115                          if node_data[node].group != group)
14116
14117     all_split_instances = set()
14118     previously_split_instances = set()
14119
14120     def InstanceNodes(instance):
14121       return [instance.primary_node] + list(instance.secondary_nodes)
14122
14123     for inst in instance_data.values():
14124       if inst.disk_template not in constants.DTS_INT_MIRROR:
14125         continue
14126
14127       instance_nodes = InstanceNodes(inst)
14128
14129       if len(set(node_data[node].group for node in instance_nodes)) > 1:
14130         previously_split_instances.add(inst.name)
14131
14132       if len(set(changed_nodes.get(node, node_data[node].group)
14133                  for node in instance_nodes)) > 1:
14134         all_split_instances.add(inst.name)
14135
14136     return (list(all_split_instances - previously_split_instances),
14137             list(previously_split_instances & all_split_instances))
14138
14139
14140 class _GroupQuery(_QueryBase):
14141   FIELDS = query.GROUP_FIELDS
14142
14143   def ExpandNames(self, lu):
14144     lu.needed_locks = {}
14145
14146     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14147     self._cluster = lu.cfg.GetClusterInfo()
14148     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14149
14150     if not self.names:
14151       self.wanted = [name_to_uuid[name]
14152                      for name in utils.NiceSort(name_to_uuid.keys())]
14153     else:
14154       # Accept names to be either names or UUIDs.
14155       missing = []
14156       self.wanted = []
14157       all_uuid = frozenset(self._all_groups.keys())
14158
14159       for name in self.names:
14160         if name in all_uuid:
14161           self.wanted.append(name)
14162         elif name in name_to_uuid:
14163           self.wanted.append(name_to_uuid[name])
14164         else:
14165           missing.append(name)
14166
14167       if missing:
14168         raise errors.OpPrereqError("Some groups do not exist: %s" %
14169                                    utils.CommaJoin(missing),
14170                                    errors.ECODE_NOENT)
14171
14172   def DeclareLocks(self, lu, level):
14173     pass
14174
14175   def _GetQueryData(self, lu):
14176     """Computes the list of node groups and their attributes.
14177
14178     """
14179     do_nodes = query.GQ_NODE in self.requested_data
14180     do_instances = query.GQ_INST in self.requested_data
14181
14182     group_to_nodes = None
14183     group_to_instances = None
14184
14185     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14186     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14187     # latter GetAllInstancesInfo() is not enough, for we have to go through
14188     # instance->node. Hence, we will need to process nodes even if we only need
14189     # instance information.
14190     if do_nodes or do_instances:
14191       all_nodes = lu.cfg.GetAllNodesInfo()
14192       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14193       node_to_group = {}
14194
14195       for node in all_nodes.values():
14196         if node.group in group_to_nodes:
14197           group_to_nodes[node.group].append(node.name)
14198           node_to_group[node.name] = node.group
14199
14200       if do_instances:
14201         all_instances = lu.cfg.GetAllInstancesInfo()
14202         group_to_instances = dict((uuid, []) for uuid in self.wanted)
14203
14204         for instance in all_instances.values():
14205           node = instance.primary_node
14206           if node in node_to_group:
14207             group_to_instances[node_to_group[node]].append(instance.name)
14208
14209         if not do_nodes:
14210           # Do not pass on node information if it was not requested.
14211           group_to_nodes = None
14212
14213     return query.GroupQueryData(self._cluster,
14214                                 [self._all_groups[uuid]
14215                                  for uuid in self.wanted],
14216                                 group_to_nodes, group_to_instances,
14217                                 query.GQ_DISKPARAMS in self.requested_data)
14218
14219
14220 class LUGroupQuery(NoHooksLU):
14221   """Logical unit for querying node groups.
14222
14223   """
14224   REQ_BGL = False
14225
14226   def CheckArguments(self):
14227     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14228                           self.op.output_fields, False)
14229
14230   def ExpandNames(self):
14231     self.gq.ExpandNames(self)
14232
14233   def DeclareLocks(self, level):
14234     self.gq.DeclareLocks(self, level)
14235
14236   def Exec(self, feedback_fn):
14237     return self.gq.OldStyleQuery(self)
14238
14239
14240 class LUGroupSetParams(LogicalUnit):
14241   """Modifies the parameters of a node group.
14242
14243   """
14244   HPATH = "group-modify"
14245   HTYPE = constants.HTYPE_GROUP
14246   REQ_BGL = False
14247
14248   def CheckArguments(self):
14249     all_changes = [
14250       self.op.ndparams,
14251       self.op.diskparams,
14252       self.op.alloc_policy,
14253       self.op.hv_state,
14254       self.op.disk_state,
14255       self.op.ipolicy,
14256       ]
14257
14258     if all_changes.count(None) == len(all_changes):
14259       raise errors.OpPrereqError("Please pass at least one modification",
14260                                  errors.ECODE_INVAL)
14261
14262   def ExpandNames(self):
14263     # This raises errors.OpPrereqError on its own:
14264     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14265
14266     self.needed_locks = {
14267       locking.LEVEL_INSTANCE: [],
14268       locking.LEVEL_NODEGROUP: [self.group_uuid],
14269       }
14270
14271     self.share_locks[locking.LEVEL_INSTANCE] = 1
14272
14273   def DeclareLocks(self, level):
14274     if level == locking.LEVEL_INSTANCE:
14275       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14276
14277       # Lock instances optimistically, needs verification once group lock has
14278       # been acquired
14279       self.needed_locks[locking.LEVEL_INSTANCE] = \
14280           self.cfg.GetNodeGroupInstances(self.group_uuid)
14281
14282   @staticmethod
14283   def _UpdateAndVerifyDiskParams(old, new):
14284     """Updates and verifies disk parameters.
14285
14286     """
14287     new_params = _GetUpdatedParams(old, new)
14288     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14289     return new_params
14290
14291   def CheckPrereq(self):
14292     """Check prerequisites.
14293
14294     """
14295     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14296
14297     # Check if locked instances are still correct
14298     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14299
14300     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14301     cluster = self.cfg.GetClusterInfo()
14302
14303     if self.group is None:
14304       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14305                                (self.op.group_name, self.group_uuid))
14306
14307     if self.op.ndparams:
14308       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14309       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14310       self.new_ndparams = new_ndparams
14311
14312     if self.op.diskparams:
14313       diskparams = self.group.diskparams
14314       uavdp = self._UpdateAndVerifyDiskParams
14315       # For each disktemplate subdict update and verify the values
14316       new_diskparams = dict((dt,
14317                              uavdp(diskparams.get(dt, {}),
14318                                    self.op.diskparams[dt]))
14319                             for dt in constants.DISK_TEMPLATES
14320                             if dt in self.op.diskparams)
14321       # As we've all subdicts of diskparams ready, lets merge the actual
14322       # dict with all updated subdicts
14323       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14324       try:
14325         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14326       except errors.OpPrereqError, err:
14327         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14328                                    errors.ECODE_INVAL)
14329
14330     if self.op.hv_state:
14331       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14332                                                  self.group.hv_state_static)
14333
14334     if self.op.disk_state:
14335       self.new_disk_state = \
14336         _MergeAndVerifyDiskState(self.op.disk_state,
14337                                  self.group.disk_state_static)
14338
14339     if self.op.ipolicy:
14340       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14341                                             self.op.ipolicy,
14342                                             group_policy=True)
14343
14344       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14345       inst_filter = lambda inst: inst.name in owned_instances
14346       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14347       gmi = ganeti.masterd.instance
14348       violations = \
14349           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14350                                                                   self.group),
14351                                         new_ipolicy, instances)
14352
14353       if violations:
14354         self.LogWarning("After the ipolicy change the following instances"
14355                         " violate them: %s",
14356                         utils.CommaJoin(violations))
14357
14358   def BuildHooksEnv(self):
14359     """Build hooks env.
14360
14361     """
14362     return {
14363       "GROUP_NAME": self.op.group_name,
14364       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14365       }
14366
14367   def BuildHooksNodes(self):
14368     """Build hooks nodes.
14369
14370     """
14371     mn = self.cfg.GetMasterNode()
14372     return ([mn], [mn])
14373
14374   def Exec(self, feedback_fn):
14375     """Modifies the node group.
14376
14377     """
14378     result = []
14379
14380     if self.op.ndparams:
14381       self.group.ndparams = self.new_ndparams
14382       result.append(("ndparams", str(self.group.ndparams)))
14383
14384     if self.op.diskparams:
14385       self.group.diskparams = self.new_diskparams
14386       result.append(("diskparams", str(self.group.diskparams)))
14387
14388     if self.op.alloc_policy:
14389       self.group.alloc_policy = self.op.alloc_policy
14390
14391     if self.op.hv_state:
14392       self.group.hv_state_static = self.new_hv_state
14393
14394     if self.op.disk_state:
14395       self.group.disk_state_static = self.new_disk_state
14396
14397     if self.op.ipolicy:
14398       self.group.ipolicy = self.new_ipolicy
14399
14400     self.cfg.Update(self.group, feedback_fn)
14401     return result
14402
14403
14404 class LUGroupRemove(LogicalUnit):
14405   HPATH = "group-remove"
14406   HTYPE = constants.HTYPE_GROUP
14407   REQ_BGL = False
14408
14409   def ExpandNames(self):
14410     # This will raises errors.OpPrereqError on its own:
14411     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14412     self.needed_locks = {
14413       locking.LEVEL_NODEGROUP: [self.group_uuid],
14414       }
14415
14416   def CheckPrereq(self):
14417     """Check prerequisites.
14418
14419     This checks that the given group name exists as a node group, that is
14420     empty (i.e., contains no nodes), and that is not the last group of the
14421     cluster.
14422
14423     """
14424     # Verify that the group is empty.
14425     group_nodes = [node.name
14426                    for node in self.cfg.GetAllNodesInfo().values()
14427                    if node.group == self.group_uuid]
14428
14429     if group_nodes:
14430       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14431                                  " nodes: %s" %
14432                                  (self.op.group_name,
14433                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14434                                  errors.ECODE_STATE)
14435
14436     # Verify the cluster would not be left group-less.
14437     if len(self.cfg.GetNodeGroupList()) == 1:
14438       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14439                                  " removed" % self.op.group_name,
14440                                  errors.ECODE_STATE)
14441
14442   def BuildHooksEnv(self):
14443     """Build hooks env.
14444
14445     """
14446     return {
14447       "GROUP_NAME": self.op.group_name,
14448       }
14449
14450   def BuildHooksNodes(self):
14451     """Build hooks nodes.
14452
14453     """
14454     mn = self.cfg.GetMasterNode()
14455     return ([mn], [mn])
14456
14457   def Exec(self, feedback_fn):
14458     """Remove the node group.
14459
14460     """
14461     try:
14462       self.cfg.RemoveNodeGroup(self.group_uuid)
14463     except errors.ConfigurationError:
14464       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14465                                (self.op.group_name, self.group_uuid))
14466
14467     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14468
14469
14470 class LUGroupRename(LogicalUnit):
14471   HPATH = "group-rename"
14472   HTYPE = constants.HTYPE_GROUP
14473   REQ_BGL = False
14474
14475   def ExpandNames(self):
14476     # This raises errors.OpPrereqError on its own:
14477     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14478
14479     self.needed_locks = {
14480       locking.LEVEL_NODEGROUP: [self.group_uuid],
14481       }
14482
14483   def CheckPrereq(self):
14484     """Check prerequisites.
14485
14486     Ensures requested new name is not yet used.
14487
14488     """
14489     try:
14490       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14491     except errors.OpPrereqError:
14492       pass
14493     else:
14494       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14495                                  " node group (UUID: %s)" %
14496                                  (self.op.new_name, new_name_uuid),
14497                                  errors.ECODE_EXISTS)
14498
14499   def BuildHooksEnv(self):
14500     """Build hooks env.
14501
14502     """
14503     return {
14504       "OLD_NAME": self.op.group_name,
14505       "NEW_NAME": self.op.new_name,
14506       }
14507
14508   def BuildHooksNodes(self):
14509     """Build hooks nodes.
14510
14511     """
14512     mn = self.cfg.GetMasterNode()
14513
14514     all_nodes = self.cfg.GetAllNodesInfo()
14515     all_nodes.pop(mn, None)
14516
14517     run_nodes = [mn]
14518     run_nodes.extend(node.name for node in all_nodes.values()
14519                      if node.group == self.group_uuid)
14520
14521     return (run_nodes, run_nodes)
14522
14523   def Exec(self, feedback_fn):
14524     """Rename the node group.
14525
14526     """
14527     group = self.cfg.GetNodeGroup(self.group_uuid)
14528
14529     if group is None:
14530       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14531                                (self.op.group_name, self.group_uuid))
14532
14533     group.name = self.op.new_name
14534     self.cfg.Update(group, feedback_fn)
14535
14536     return self.op.new_name
14537
14538
14539 class LUGroupEvacuate(LogicalUnit):
14540   HPATH = "group-evacuate"
14541   HTYPE = constants.HTYPE_GROUP
14542   REQ_BGL = False
14543
14544   def ExpandNames(self):
14545     # This raises errors.OpPrereqError on its own:
14546     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14547
14548     if self.op.target_groups:
14549       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14550                                   self.op.target_groups)
14551     else:
14552       self.req_target_uuids = []
14553
14554     if self.group_uuid in self.req_target_uuids:
14555       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14556                                  " as a target group (targets are %s)" %
14557                                  (self.group_uuid,
14558                                   utils.CommaJoin(self.req_target_uuids)),
14559                                  errors.ECODE_INVAL)
14560
14561     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14562
14563     self.share_locks = _ShareAll()
14564     self.needed_locks = {
14565       locking.LEVEL_INSTANCE: [],
14566       locking.LEVEL_NODEGROUP: [],
14567       locking.LEVEL_NODE: [],
14568       }
14569
14570   def DeclareLocks(self, level):
14571     if level == locking.LEVEL_INSTANCE:
14572       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14573
14574       # Lock instances optimistically, needs verification once node and group
14575       # locks have been acquired
14576       self.needed_locks[locking.LEVEL_INSTANCE] = \
14577         self.cfg.GetNodeGroupInstances(self.group_uuid)
14578
14579     elif level == locking.LEVEL_NODEGROUP:
14580       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14581
14582       if self.req_target_uuids:
14583         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14584
14585         # Lock all groups used by instances optimistically; this requires going
14586         # via the node before it's locked, requiring verification later on
14587         lock_groups.update(group_uuid
14588                            for instance_name in
14589                              self.owned_locks(locking.LEVEL_INSTANCE)
14590                            for group_uuid in
14591                              self.cfg.GetInstanceNodeGroups(instance_name))
14592       else:
14593         # No target groups, need to lock all of them
14594         lock_groups = locking.ALL_SET
14595
14596       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14597
14598     elif level == locking.LEVEL_NODE:
14599       # This will only lock the nodes in the group to be evacuated which
14600       # contain actual instances
14601       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14602       self._LockInstancesNodes()
14603
14604       # Lock all nodes in group to be evacuated and target groups
14605       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14606       assert self.group_uuid in owned_groups
14607       member_nodes = [node_name
14608                       for group in owned_groups
14609                       for node_name in self.cfg.GetNodeGroup(group).members]
14610       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14611
14612   def CheckPrereq(self):
14613     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14614     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14615     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14616
14617     assert owned_groups.issuperset(self.req_target_uuids)
14618     assert self.group_uuid in owned_groups
14619
14620     # Check if locked instances are still correct
14621     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14622
14623     # Get instance information
14624     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14625
14626     # Check if node groups for locked instances are still correct
14627     _CheckInstancesNodeGroups(self.cfg, self.instances,
14628                               owned_groups, owned_nodes, self.group_uuid)
14629
14630     if self.req_target_uuids:
14631       # User requested specific target groups
14632       self.target_uuids = self.req_target_uuids
14633     else:
14634       # All groups except the one to be evacuated are potential targets
14635       self.target_uuids = [group_uuid for group_uuid in owned_groups
14636                            if group_uuid != self.group_uuid]
14637
14638       if not self.target_uuids:
14639         raise errors.OpPrereqError("There are no possible target groups",
14640                                    errors.ECODE_INVAL)
14641
14642   def BuildHooksEnv(self):
14643     """Build hooks env.
14644
14645     """
14646     return {
14647       "GROUP_NAME": self.op.group_name,
14648       "TARGET_GROUPS": " ".join(self.target_uuids),
14649       }
14650
14651   def BuildHooksNodes(self):
14652     """Build hooks nodes.
14653
14654     """
14655     mn = self.cfg.GetMasterNode()
14656
14657     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14658
14659     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14660
14661     return (run_nodes, run_nodes)
14662
14663   def Exec(self, feedback_fn):
14664     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14665
14666     assert self.group_uuid not in self.target_uuids
14667
14668     req = iallocator.IAReqGroupChange(instances=instances,
14669                                       target_groups=self.target_uuids)
14670     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14671
14672     ial.Run(self.op.iallocator)
14673
14674     if not ial.success:
14675       raise errors.OpPrereqError("Can't compute group evacuation using"
14676                                  " iallocator '%s': %s" %
14677                                  (self.op.iallocator, ial.info),
14678                                  errors.ECODE_NORES)
14679
14680     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14681
14682     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14683                  len(jobs), self.op.group_name)
14684
14685     return ResultWithJobs(jobs)
14686
14687
14688 class TagsLU(NoHooksLU): # pylint: disable=W0223
14689   """Generic tags LU.
14690
14691   This is an abstract class which is the parent of all the other tags LUs.
14692
14693   """
14694   def ExpandNames(self):
14695     self.group_uuid = None
14696     self.needed_locks = {}
14697
14698     if self.op.kind == constants.TAG_NODE:
14699       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14700       lock_level = locking.LEVEL_NODE
14701       lock_name = self.op.name
14702     elif self.op.kind == constants.TAG_INSTANCE:
14703       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14704       lock_level = locking.LEVEL_INSTANCE
14705       lock_name = self.op.name
14706     elif self.op.kind == constants.TAG_NODEGROUP:
14707       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14708       lock_level = locking.LEVEL_NODEGROUP
14709       lock_name = self.group_uuid
14710     else:
14711       lock_level = None
14712       lock_name = None
14713
14714     if lock_level and getattr(self.op, "use_locking", True):
14715       self.needed_locks[lock_level] = lock_name
14716
14717     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14718     # not possible to acquire the BGL based on opcode parameters)
14719
14720   def CheckPrereq(self):
14721     """Check prerequisites.
14722
14723     """
14724     if self.op.kind == constants.TAG_CLUSTER:
14725       self.target = self.cfg.GetClusterInfo()
14726     elif self.op.kind == constants.TAG_NODE:
14727       self.target = self.cfg.GetNodeInfo(self.op.name)
14728     elif self.op.kind == constants.TAG_INSTANCE:
14729       self.target = self.cfg.GetInstanceInfo(self.op.name)
14730     elif self.op.kind == constants.TAG_NODEGROUP:
14731       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14732     else:
14733       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14734                                  str(self.op.kind), errors.ECODE_INVAL)
14735
14736
14737 class LUTagsGet(TagsLU):
14738   """Returns the tags of a given object.
14739
14740   """
14741   REQ_BGL = False
14742
14743   def ExpandNames(self):
14744     TagsLU.ExpandNames(self)
14745
14746     # Share locks as this is only a read operation
14747     self.share_locks = _ShareAll()
14748
14749   def Exec(self, feedback_fn):
14750     """Returns the tag list.
14751
14752     """
14753     return list(self.target.GetTags())
14754
14755
14756 class LUTagsSearch(NoHooksLU):
14757   """Searches the tags for a given pattern.
14758
14759   """
14760   REQ_BGL = False
14761
14762   def ExpandNames(self):
14763     self.needed_locks = {}
14764
14765   def CheckPrereq(self):
14766     """Check prerequisites.
14767
14768     This checks the pattern passed for validity by compiling it.
14769
14770     """
14771     try:
14772       self.re = re.compile(self.op.pattern)
14773     except re.error, err:
14774       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14775                                  (self.op.pattern, err), errors.ECODE_INVAL)
14776
14777   def Exec(self, feedback_fn):
14778     """Returns the tag list.
14779
14780     """
14781     cfg = self.cfg
14782     tgts = [("/cluster", cfg.GetClusterInfo())]
14783     ilist = cfg.GetAllInstancesInfo().values()
14784     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14785     nlist = cfg.GetAllNodesInfo().values()
14786     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14787     tgts.extend(("/nodegroup/%s" % n.name, n)
14788                 for n in cfg.GetAllNodeGroupsInfo().values())
14789     results = []
14790     for path, target in tgts:
14791       for tag in target.GetTags():
14792         if self.re.search(tag):
14793           results.append((path, tag))
14794     return results
14795
14796
14797 class LUTagsSet(TagsLU):
14798   """Sets a tag on a given object.
14799
14800   """
14801   REQ_BGL = False
14802
14803   def CheckPrereq(self):
14804     """Check prerequisites.
14805
14806     This checks the type and length of the tag name and value.
14807
14808     """
14809     TagsLU.CheckPrereq(self)
14810     for tag in self.op.tags:
14811       objects.TaggableObject.ValidateTag(tag)
14812
14813   def Exec(self, feedback_fn):
14814     """Sets the tag.
14815
14816     """
14817     try:
14818       for tag in self.op.tags:
14819         self.target.AddTag(tag)
14820     except errors.TagError, err:
14821       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14822     self.cfg.Update(self.target, feedback_fn)
14823
14824
14825 class LUTagsDel(TagsLU):
14826   """Delete a list of tags from a given object.
14827
14828   """
14829   REQ_BGL = False
14830
14831   def CheckPrereq(self):
14832     """Check prerequisites.
14833
14834     This checks that we have the given tag.
14835
14836     """
14837     TagsLU.CheckPrereq(self)
14838     for tag in self.op.tags:
14839       objects.TaggableObject.ValidateTag(tag)
14840     del_tags = frozenset(self.op.tags)
14841     cur_tags = self.target.GetTags()
14842
14843     diff_tags = del_tags - cur_tags
14844     if diff_tags:
14845       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14846       raise errors.OpPrereqError("Tag(s) %s not found" %
14847                                  (utils.CommaJoin(diff_names), ),
14848                                  errors.ECODE_NOENT)
14849
14850   def Exec(self, feedback_fn):
14851     """Remove the tag from the object.
14852
14853     """
14854     for tag in self.op.tags:
14855       self.target.RemoveTag(tag)
14856     self.cfg.Update(self.target, feedback_fn)
14857
14858
14859 class LUTestDelay(NoHooksLU):
14860   """Sleep for a specified amount of time.
14861
14862   This LU sleeps on the master and/or nodes for a specified amount of
14863   time.
14864
14865   """
14866   REQ_BGL = False
14867
14868   def ExpandNames(self):
14869     """Expand names and set required locks.
14870
14871     This expands the node list, if any.
14872
14873     """
14874     self.needed_locks = {}
14875     if self.op.on_nodes:
14876       # _GetWantedNodes can be used here, but is not always appropriate to use
14877       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14878       # more information.
14879       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14880       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14881
14882   def _TestDelay(self):
14883     """Do the actual sleep.
14884
14885     """
14886     if self.op.on_master:
14887       if not utils.TestDelay(self.op.duration):
14888         raise errors.OpExecError("Error during master delay test")
14889     if self.op.on_nodes:
14890       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14891       for node, node_result in result.items():
14892         node_result.Raise("Failure during rpc call to node %s" % node)
14893
14894   def Exec(self, feedback_fn):
14895     """Execute the test delay opcode, with the wanted repetitions.
14896
14897     """
14898     if self.op.repeat == 0:
14899       self._TestDelay()
14900     else:
14901       top_value = self.op.repeat - 1
14902       for i in range(self.op.repeat):
14903         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14904         self._TestDelay()
14905
14906
14907 class LUTestJqueue(NoHooksLU):
14908   """Utility LU to test some aspects of the job queue.
14909
14910   """
14911   REQ_BGL = False
14912
14913   # Must be lower than default timeout for WaitForJobChange to see whether it
14914   # notices changed jobs
14915   _CLIENT_CONNECT_TIMEOUT = 20.0
14916   _CLIENT_CONFIRM_TIMEOUT = 60.0
14917
14918   @classmethod
14919   def _NotifyUsingSocket(cls, cb, errcls):
14920     """Opens a Unix socket and waits for another program to connect.
14921
14922     @type cb: callable
14923     @param cb: Callback to send socket name to client
14924     @type errcls: class
14925     @param errcls: Exception class to use for errors
14926
14927     """
14928     # Using a temporary directory as there's no easy way to create temporary
14929     # sockets without writing a custom loop around tempfile.mktemp and
14930     # socket.bind
14931     tmpdir = tempfile.mkdtemp()
14932     try:
14933       tmpsock = utils.PathJoin(tmpdir, "sock")
14934
14935       logging.debug("Creating temporary socket at %s", tmpsock)
14936       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14937       try:
14938         sock.bind(tmpsock)
14939         sock.listen(1)
14940
14941         # Send details to client
14942         cb(tmpsock)
14943
14944         # Wait for client to connect before continuing
14945         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14946         try:
14947           (conn, _) = sock.accept()
14948         except socket.error, err:
14949           raise errcls("Client didn't connect in time (%s)" % err)
14950       finally:
14951         sock.close()
14952     finally:
14953       # Remove as soon as client is connected
14954       shutil.rmtree(tmpdir)
14955
14956     # Wait for client to close
14957     try:
14958       try:
14959         # pylint: disable=E1101
14960         # Instance of '_socketobject' has no ... member
14961         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14962         conn.recv(1)
14963       except socket.error, err:
14964         raise errcls("Client failed to confirm notification (%s)" % err)
14965     finally:
14966       conn.close()
14967
14968   def _SendNotification(self, test, arg, sockname):
14969     """Sends a notification to the client.
14970
14971     @type test: string
14972     @param test: Test name
14973     @param arg: Test argument (depends on test)
14974     @type sockname: string
14975     @param sockname: Socket path
14976
14977     """
14978     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14979
14980   def _Notify(self, prereq, test, arg):
14981     """Notifies the client of a test.
14982
14983     @type prereq: bool
14984     @param prereq: Whether this is a prereq-phase test
14985     @type test: string
14986     @param test: Test name
14987     @param arg: Test argument (depends on test)
14988
14989     """
14990     if prereq:
14991       errcls = errors.OpPrereqError
14992     else:
14993       errcls = errors.OpExecError
14994
14995     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14996                                                   test, arg),
14997                                    errcls)
14998
14999   def CheckArguments(self):
15000     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15001     self.expandnames_calls = 0
15002
15003   def ExpandNames(self):
15004     checkargs_calls = getattr(self, "checkargs_calls", 0)
15005     if checkargs_calls < 1:
15006       raise errors.ProgrammerError("CheckArguments was not called")
15007
15008     self.expandnames_calls += 1
15009
15010     if self.op.notify_waitlock:
15011       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15012
15013     self.LogInfo("Expanding names")
15014
15015     # Get lock on master node (just to get a lock, not for a particular reason)
15016     self.needed_locks = {
15017       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15018       }
15019
15020   def Exec(self, feedback_fn):
15021     if self.expandnames_calls < 1:
15022       raise errors.ProgrammerError("ExpandNames was not called")
15023
15024     if self.op.notify_exec:
15025       self._Notify(False, constants.JQT_EXEC, None)
15026
15027     self.LogInfo("Executing")
15028
15029     if self.op.log_messages:
15030       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15031       for idx, msg in enumerate(self.op.log_messages):
15032         self.LogInfo("Sending log message %s", idx + 1)
15033         feedback_fn(constants.JQT_MSGPREFIX + msg)
15034         # Report how many test messages have been sent
15035         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15036
15037     if self.op.fail:
15038       raise errors.OpExecError("Opcode failure was requested")
15039
15040     return True
15041
15042
15043 class LUTestAllocator(NoHooksLU):
15044   """Run allocator tests.
15045
15046   This LU runs the allocator tests
15047
15048   """
15049   def CheckPrereq(self):
15050     """Check prerequisites.
15051
15052     This checks the opcode parameters depending on the director and mode test.
15053
15054     """
15055     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15056                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
15057       for attr in ["memory", "disks", "disk_template",
15058                    "os", "tags", "nics", "vcpus"]:
15059         if not hasattr(self.op, attr):
15060           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15061                                      attr, errors.ECODE_INVAL)
15062       iname = self.cfg.ExpandInstanceName(self.op.name)
15063       if iname is not None:
15064         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15065                                    iname, errors.ECODE_EXISTS)
15066       if not isinstance(self.op.nics, list):
15067         raise errors.OpPrereqError("Invalid parameter 'nics'",
15068                                    errors.ECODE_INVAL)
15069       if not isinstance(self.op.disks, list):
15070         raise errors.OpPrereqError("Invalid parameter 'disks'",
15071                                    errors.ECODE_INVAL)
15072       for row in self.op.disks:
15073         if (not isinstance(row, dict) or
15074             constants.IDISK_SIZE not in row or
15075             not isinstance(row[constants.IDISK_SIZE], int) or
15076             constants.IDISK_MODE not in row or
15077             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15078           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15079                                      " parameter", errors.ECODE_INVAL)
15080       if self.op.hypervisor is None:
15081         self.op.hypervisor = self.cfg.GetHypervisorType()
15082     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15083       fname = _ExpandInstanceName(self.cfg, self.op.name)
15084       self.op.name = fname
15085       self.relocate_from = \
15086           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15087     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15088                           constants.IALLOCATOR_MODE_NODE_EVAC):
15089       if not self.op.instances:
15090         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15091       self.op.instances = _GetWantedInstances(self, self.op.instances)
15092     else:
15093       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15094                                  self.op.mode, errors.ECODE_INVAL)
15095
15096     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15097       if self.op.allocator is None:
15098         raise errors.OpPrereqError("Missing allocator name",
15099                                    errors.ECODE_INVAL)
15100     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15101       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15102                                  self.op.direction, errors.ECODE_INVAL)
15103
15104   def Exec(self, feedback_fn):
15105     """Run the allocator test.
15106
15107     """
15108     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15109       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15110                                           memory=self.op.memory,
15111                                           disks=self.op.disks,
15112                                           disk_template=self.op.disk_template,
15113                                           os=self.op.os,
15114                                           tags=self.op.tags,
15115                                           nics=self.op.nics,
15116                                           vcpus=self.op.vcpus,
15117                                           spindle_use=self.op.spindle_use,
15118                                           hypervisor=self.op.hypervisor)
15119     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15120       req = iallocator.IAReqRelocate(name=self.op.name,
15121                                      relocate_from=list(self.relocate_from))
15122     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15123       req = iallocator.IAReqGroupChange(instances=self.op.instances,
15124                                         target_groups=self.op.target_groups)
15125     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15126       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15127                                      evac_mode=self.op.evac_mode)
15128     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15129       disk_template = self.op.disk_template
15130       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15131                                              memory=self.op.memory,
15132                                              disks=self.op.disks,
15133                                              disk_template=disk_template,
15134                                              os=self.op.os,
15135                                              tags=self.op.tags,
15136                                              nics=self.op.nics,
15137                                              vcpus=self.op.vcpus,
15138                                              spindle_use=self.op.spindle_use,
15139                                              hypervisor=self.op.hypervisor)
15140                for idx in range(self.op.count)]
15141       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15142     else:
15143       raise errors.ProgrammerError("Uncatched mode %s in"
15144                                    " LUTestAllocator.Exec", self.op.mode)
15145
15146     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15147     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15148       result = ial.in_text
15149     else:
15150       ial.Run(self.op.allocator, validate=False)
15151       result = ial.out_text
15152     return result
15153
15154
15155 #: Query type implementations
15156 _QUERY_IMPL = {
15157   constants.QR_CLUSTER: _ClusterQuery,
15158   constants.QR_INSTANCE: _InstanceQuery,
15159   constants.QR_NODE: _NodeQuery,
15160   constants.QR_GROUP: _GroupQuery,
15161   constants.QR_OS: _OsQuery,
15162   constants.QR_EXPORT: _ExportQuery,
15163   }
15164
15165 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15166
15167
15168 def _GetQueryImplementation(name):
15169   """Returns the implemtnation for a query type.
15170
15171   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15172
15173   """
15174   try:
15175     return _QUERY_IMPL[name]
15176   except KeyError:
15177     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15178                                errors.ECODE_INVAL)