code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60 from ganeti import ht
  61 from ganeti import rpc
  62 from ganeti import runtime
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   #: Field to sort by
 497   SORT_FIELD = "name"
 498
 499   def __init__(self, qfilter, fields, use_locking):
 500     """Initializes this class.
 501
 502     """
 503     self.use_locking = use_locking
 504
 505     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 506                              namefield=self.SORT_FIELD)
 507     self.requested_data = self.query.RequestedData()
 508     self.names = self.query.RequestedNames()
 509
 510     # Sort only if no names were requested
 511     self.sort_by_name = not self.names
 512
 513     self.do_locking = None
 514     self.wanted = None
 515
 516   def _GetNames(self, lu, all_names, lock_level):
 517     """Helper function to determine names asked for in the query.
 518
 519     """
 520     if self.do_locking:
 521       names = lu.owned_locks(lock_level)
 522     else:
 523       names = all_names
 524
 525     if self.wanted == locking.ALL_SET:
 526       assert not self.names
 527       # caller didn't specify names, so ordering is not important
 528       return utils.NiceSort(names)
 529
 530     # caller specified names and we must keep the same order
 531     assert self.names
 532     assert not self.do_locking or lu.glm.is_owned(lock_level)
 533
 534     missing = set(self.wanted).difference(names)
 535     if missing:
 536       raise errors.OpExecError("Some items were removed before retrieving"
 537                                " their data: %s" % missing)
 538
 539     # Return expanded names
 540     return self.wanted
 541
 542   def ExpandNames(self, lu):
 543     """Expand names for this query.
 544
 545     See L{LogicalUnit.ExpandNames}.
 546
 547     """
 548     raise NotImplementedError()
 549
 550   def DeclareLocks(self, lu, level):
 551     """Declare locks for this query.
 552
 553     See L{LogicalUnit.DeclareLocks}.
 554
 555     """
 556     raise NotImplementedError()
 557
 558   def _GetQueryData(self, lu):
 559     """Collects all data for this query.
 560
 561     @return: Query data object
 562
 563     """
 564     raise NotImplementedError()
 565
 566   def NewStyleQuery(self, lu):
 567     """Collect data and execute query.
 568
 569     """
 570     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 571                                   sort_by_name=self.sort_by_name)
 572
 573   def OldStyleQuery(self, lu):
 574     """Collect data and execute query.
 575
 576     """
 577     return self.query.OldStyleQuery(self._GetQueryData(lu),
 578                                     sort_by_name=self.sort_by_name)
 579
 580
 581 def _ShareAll():
 582   """Returns a dict declaring all lock levels shared.
 583
 584   """
 585   return dict.fromkeys(locking.LEVELS, 1)
 586
 587
 588 def _MakeLegacyNodeInfo(data):
 589   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 590
 591   Converts the data into a single dictionary. This is fine for most use cases,
 592   but some require information from more than one volume group or hypervisor.
 593
 594   """
 595   (bootid, (vg_info, ), (hv_info, )) = data
 596
 597   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 598     "bootid": bootid,
 599     })
 600
 601
 602 def _AnnotateDiskParams(instance, devs, cfg):
 603   """Little helper wrapper to the rpc annotation method.
 604
 605   @param instance: The instance object
 606   @type devs: List of L{objects.Disk}
 607   @param devs: The root devices (not any of its children!)
 608   @param cfg: The config object
 609   @returns The annotated disk copies
 610   @see L{rpc.AnnotateDiskParams}
 611
 612   """
 613   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 614                                 cfg.GetInstanceDiskParams(instance))
 615
 616
 617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 618                               cur_group_uuid):
 619   """Checks if node groups for locked instances are still correct.
 620
 621   @type cfg: L{config.ConfigWriter}
 622   @param cfg: Cluster configuration
 623   @type instances: dict; string as key, L{objects.Instance} as value
 624   @param instances: Dictionary, instance name as key, instance object as value
 625   @type owned_groups: iterable of string
 626   @param owned_groups: List of owned groups
 627   @type owned_nodes: iterable of string
 628   @param owned_nodes: List of owned nodes
 629   @type cur_group_uuid: string or None
 630   @param cur_group_uuid: Optional group UUID to check against instance's groups
 631
 632   """
 633   for (name, inst) in instances.items():
 634     assert owned_nodes.issuperset(inst.all_nodes), \
 635       "Instance %s's nodes changed while we kept the lock" % name
 636
 637     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 638
 639     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 640       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 641
 642
 643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 644   """Checks if the owned node groups are still correct for an instance.
 645
 646   @type cfg: L{config.ConfigWriter}
 647   @param cfg: The cluster configuration
 648   @type instance_name: string
 649   @param instance_name: Instance name
 650   @type owned_groups: set or frozenset
 651   @param owned_groups: List of currently owned node groups
 652
 653   """
 654   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 655
 656   if not owned_groups.issuperset(inst_groups):
 657     raise errors.OpPrereqError("Instance %s's node groups changed since"
 658                                " locks were acquired, current groups are"
 659                                " are '%s', owning groups '%s'; retry the"
 660                                " operation" %
 661                                (instance_name,
 662                                 utils.CommaJoin(inst_groups),
 663                                 utils.CommaJoin(owned_groups)),
 664                                errors.ECODE_STATE)
 665
 666   return inst_groups
 667
 668
 669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 670   """Checks if the instances in a node group are still correct.
 671
 672   @type cfg: L{config.ConfigWriter}
 673   @param cfg: The cluster configuration
 674   @type group_uuid: string
 675   @param group_uuid: Node group UUID
 676   @type owned_instances: set or frozenset
 677   @param owned_instances: List of currently owned instances
 678
 679   """
 680   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 681   if owned_instances != wanted_instances:
 682     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 683                                " locks were acquired, wanted '%s', have '%s';"
 684                                " retry the operation" %
 685                                (group_uuid,
 686                                 utils.CommaJoin(wanted_instances),
 687                                 utils.CommaJoin(owned_instances)),
 688                                errors.ECODE_STATE)
 689
 690   return wanted_instances
 691
 692
 693 def _SupportsOob(cfg, node):
 694   """Tells if node supports OOB.
 695
 696   @type cfg: L{config.ConfigWriter}
 697   @param cfg: The cluster configuration
 698   @type node: L{objects.Node}
 699   @param node: The node
 700   @return: The OOB script if supported or an empty string otherwise
 701
 702   """
 703   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 704
 705
 706 def _GetWantedNodes(lu, nodes):
 707   """Returns list of checked and expanded node names.
 708
 709   @type lu: L{LogicalUnit}
 710   @param lu: the logical unit on whose behalf we execute
 711   @type nodes: list
 712   @param nodes: list of node names or None for all nodes
 713   @rtype: list
 714   @return: the list of nodes, sorted
 715   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 716
 717   """
 718   if nodes:
 719     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 720
 721   return utils.NiceSort(lu.cfg.GetNodeList())
 722
 723
 724 def _GetWantedInstances(lu, instances):
 725   """Returns list of checked and expanded instance names.
 726
 727   @type lu: L{LogicalUnit}
 728   @param lu: the logical unit on whose behalf we execute
 729   @type instances: list
 730   @param instances: list of instance names or None for all instances
 731   @rtype: list
 732   @return: the list of instances, sorted
 733   @raise errors.OpPrereqError: if the instances parameter is wrong type
 734   @raise errors.OpPrereqError: if any of the passed instances is not found
 735
 736   """
 737   if instances:
 738     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 739   else:
 740     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 741   return wanted
 742
 743
 744 def _GetUpdatedParams(old_params, update_dict,
 745                       use_default=True, use_none=False):
 746   """Return the new version of a parameter dictionary.
 747
 748   @type old_params: dict
 749   @param old_params: old parameters
 750   @type update_dict: dict
 751   @param update_dict: dict containing new parameter values, or
 752       constants.VALUE_DEFAULT to reset the parameter to its default
 753       value
 754   @param use_default: boolean
 755   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 756       values as 'to be deleted' values
 757   @param use_none: boolean
 758   @type use_none: whether to recognise C{None} values as 'to be
 759       deleted' values
 760   @rtype: dict
 761   @return: the new parameter dictionary
 762
 763   """
 764   params_copy = copy.deepcopy(old_params)
 765   for key, val in update_dict.iteritems():
 766     if ((use_default and val == constants.VALUE_DEFAULT) or
 767         (use_none and val is None)):
 768       try:
 769         del params_copy[key]
 770       except KeyError:
 771         pass
 772     else:
 773       params_copy[key] = val
 774   return params_copy
 775
 776
 777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 778   """Return the new version of a instance policy.
 779
 780   @param group_policy: whether this policy applies to a group and thus
 781     we should support removal of policy entries
 782
 783   """
 784   use_none = use_default = group_policy
 785   ipolicy = copy.deepcopy(old_ipolicy)
 786   for key, value in new_ipolicy.items():
 787     if key not in constants.IPOLICY_ALL_KEYS:
 788       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 789                                  errors.ECODE_INVAL)
 790     if key in constants.IPOLICY_ISPECS:
 791       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 792       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 793                                        use_none=use_none,
 794                                        use_default=use_default)
 795     else:
 796       if (not value or value == [constants.VALUE_DEFAULT] or
 797           value == constants.VALUE_DEFAULT):
 798         if group_policy:
 799           del ipolicy[key]
 800         else:
 801           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 802                                      " on the cluster'" % key,
 803                                      errors.ECODE_INVAL)
 804       else:
 805         if key in constants.IPOLICY_PARAMETERS:
 806           # FIXME: we assume all such values are float
 807           try:
 808             ipolicy[key] = float(value)
 809           except (TypeError, ValueError), err:
 810             raise errors.OpPrereqError("Invalid value for attribute"
 811                                        " '%s': '%s', error: %s" %
 812                                        (key, value, err), errors.ECODE_INVAL)
 813         else:
 814           # FIXME: we assume all others are lists; this should be redone
 815           # in a nicer way
 816           ipolicy[key] = list(value)
 817   try:
 818     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 819   except errors.ConfigurationError, err:
 820     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 821                                errors.ECODE_INVAL)
 822   return ipolicy
 823
 824
 825 def _UpdateAndVerifySubDict(base, updates, type_check):
 826   """Updates and verifies a dict with sub dicts of the same type.
 827
 828   @param base: The dict with the old data
 829   @param updates: The dict with the new data
 830   @param type_check: Dict suitable to ForceDictType to verify correct types
 831   @returns: A new dict with updated and verified values
 832
 833   """
 834   def fn(old, value):
 835     new = _GetUpdatedParams(old, value)
 836     utils.ForceDictType(new, type_check)
 837     return new
 838
 839   ret = copy.deepcopy(base)
 840   ret.update(dict((key, fn(base.get(key, {}), value))
 841                   for key, value in updates.items()))
 842   return ret
 843
 844
 845 def _MergeAndVerifyHvState(op_input, obj_input):
 846   """Combines the hv state from an opcode with the one of the object
 847
 848   @param op_input: The input dict from the opcode
 849   @param obj_input: The input dict from the objects
 850   @return: The verified and updated dict
 851
 852   """
 853   if op_input:
 854     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 855     if invalid_hvs:
 856       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 857                                  " %s" % utils.CommaJoin(invalid_hvs),
 858                                  errors.ECODE_INVAL)
 859     if obj_input is None:
 860       obj_input = {}
 861     type_check = constants.HVSTS_PARAMETER_TYPES
 862     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 863
 864   return None
 865
 866
 867 def _MergeAndVerifyDiskState(op_input, obj_input):
 868   """Combines the disk state from an opcode with the one of the object
 869
 870   @param op_input: The input dict from the opcode
 871   @param obj_input: The input dict from the objects
 872   @return: The verified and updated dict
 873   """
 874   if op_input:
 875     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 876     if invalid_dst:
 877       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 878                                  utils.CommaJoin(invalid_dst),
 879                                  errors.ECODE_INVAL)
 880     type_check = constants.DSS_PARAMETER_TYPES
 881     if obj_input is None:
 882       obj_input = {}
 883     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 884                                               type_check))
 885                 for key, value in op_input.items())
 886
 887   return None
 888
 889
 890 def _ReleaseLocks(lu, level, names=None, keep=None):
 891   """Releases locks owned by an LU.
 892
 893   @type lu: L{LogicalUnit}
 894   @param level: Lock level
 895   @type names: list or None
 896   @param names: Names of locks to release
 897   @type keep: list or None
 898   @param keep: Names of locks to retain
 899
 900   """
 901   assert not (keep is not None and names is not None), \
 902          "Only one of the 'names' and the 'keep' parameters can be given"
 903
 904   if names is not None:
 905     should_release = names.__contains__
 906   elif keep:
 907     should_release = lambda name: name not in keep
 908   else:
 909     should_release = None
 910
 911   owned = lu.owned_locks(level)
 912   if not owned:
 913     # Not owning any lock at this level, do nothing
 914     pass
 915
 916   elif should_release:
 917     retain = []
 918     release = []
 919
 920     # Determine which locks to release
 921     for name in owned:
 922       if should_release(name):
 923         release.append(name)
 924       else:
 925         retain.append(name)
 926
 927     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 928
 929     # Release just some locks
 930     lu.glm.release(level, names=release)
 931
 932     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 933   else:
 934     # Release everything
 935     lu.glm.release(level)
 936
 937     assert not lu.glm.is_owned(level), "No locks should be owned"
 938
 939
 940 def _MapInstanceDisksToNodes(instances):
 941   """Creates a map from (node, volume) to instance name.
 942
 943   @type instances: list of L{objects.Instance}
 944   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 945
 946   """
 947   return dict(((node, vol), inst.name)
 948               for inst in instances
 949               for (node, vols) in inst.MapLVsByNode().items()
 950               for vol in vols)
 951
 952
 953 def _RunPostHook(lu, node_name):
 954   """Runs the post-hook for an opcode on a single node.
 955
 956   """
 957   hm = lu.proc.BuildHooksManager(lu)
 958   try:
 959     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 960   except:
 961     # pylint: disable=W0702
 962     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 963
 964
 965 def _CheckOutputFields(static, dynamic, selected):
 966   """Checks whether all selected fields are valid.
 967
 968   @type static: L{utils.FieldSet}
 969   @param static: static fields set
 970   @type dynamic: L{utils.FieldSet}
 971   @param dynamic: dynamic fields set
 972
 973   """
 974   f = utils.FieldSet()
 975   f.Extend(static)
 976   f.Extend(dynamic)
 977
 978   delta = f.NonMatching(selected)
 979   if delta:
 980     raise errors.OpPrereqError("Unknown output fields selected: %s"
 981                                % ",".join(delta), errors.ECODE_INVAL)
 982
 983
 984 def _CheckGlobalHvParams(params):
 985   """Validates that given hypervisor params are not global ones.
 986
 987   This will ensure that instances don't get customised versions of
 988   global params.
 989
 990   """
 991   used_globals = constants.HVC_GLOBALS.intersection(params)
 992   if used_globals:
 993     msg = ("The following hypervisor parameters are global and cannot"
 994            " be customized at instance level, please modify them at"
 995            " cluster level: %s" % utils.CommaJoin(used_globals))
 996     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 997
 998
 999 def _CheckNodeOnline(lu, node, msg=None):
1000   """Ensure that a given node is online.
1001
1002   @param lu: the LU on behalf of which we make the check
1003   @param node: the node to check
1004   @param msg: if passed, should be a message to replace the default one
1005   @raise errors.OpPrereqError: if the node is offline
1006
1007   """
1008   if msg is None:
1009     msg = "Can't use offline node"
1010   if lu.cfg.GetNodeInfo(node).offline:
1011     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1012
1013
1014 def _CheckNodeNotDrained(lu, node):
1015   """Ensure that a given node is not drained.
1016
1017   @param lu: the LU on behalf of which we make the check
1018   @param node: the node to check
1019   @raise errors.OpPrereqError: if the node is drained
1020
1021   """
1022   if lu.cfg.GetNodeInfo(node).drained:
1023     raise errors.OpPrereqError("Can't use drained node %s" % node,
1024                                errors.ECODE_STATE)
1025
1026
1027 def _CheckNodeVmCapable(lu, node):
1028   """Ensure that a given node is vm capable.
1029
1030   @param lu: the LU on behalf of which we make the check
1031   @param node: the node to check
1032   @raise errors.OpPrereqError: if the node is not vm capable
1033
1034   """
1035   if not lu.cfg.GetNodeInfo(node).vm_capable:
1036     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1037                                errors.ECODE_STATE)
1038
1039
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041   """Ensure that a node supports a given OS.
1042
1043   @param lu: the LU on behalf of which we make the check
1044   @param node: the node to check
1045   @param os_name: the OS to query about
1046   @param force_variant: whether to ignore variant errors
1047   @raise errors.OpPrereqError: if the node is not supporting the OS
1048
1049   """
1050   result = lu.rpc.call_os_get(node, os_name)
1051   result.Raise("OS '%s' not in supported OS list for node %s" %
1052                (os_name, node),
1053                prereq=True, ecode=errors.ECODE_INVAL)
1054   if not force_variant:
1055     _CheckOSVariant(result.payload, os_name)
1056
1057
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059   """Ensure that a node has the given secondary ip.
1060
1061   @type lu: L{LogicalUnit}
1062   @param lu: the LU on behalf of which we make the check
1063   @type node: string
1064   @param node: the node to check
1065   @type secondary_ip: string
1066   @param secondary_ip: the ip to check
1067   @type prereq: boolean
1068   @param prereq: whether to throw a prerequisite or an execute error
1069   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1071
1072   """
1073   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074   result.Raise("Failure checking secondary ip on node %s" % node,
1075                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076   if not result.payload:
1077     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078            " please fix and re-run this command" % secondary_ip)
1079     if prereq:
1080       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1081     else:
1082       raise errors.OpExecError(msg)
1083
1084
1085 def _GetClusterDomainSecret():
1086   """Reads the cluster domain secret.
1087
1088   """
1089   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1090                                strict=True)
1091
1092
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094   """Ensure that an instance is in one of the required states.
1095
1096   @param lu: the LU on behalf of which we make the check
1097   @param instance: the instance to check
1098   @param msg: if passed, should be a message to replace the default one
1099   @raise errors.OpPrereqError: if the instance is not in the required state
1100
1101   """
1102   if msg is None:
1103     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1104   if instance.admin_state not in req_states:
1105     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1106                                (instance.name, instance.admin_state, msg),
1107                                errors.ECODE_STATE)
1108
1109   if constants.ADMINST_UP not in req_states:
1110     pnode = instance.primary_node
1111     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113                 prereq=True, ecode=errors.ECODE_ENVIRON)
1114
1115     if instance.name in ins_l.payload:
1116       raise errors.OpPrereqError("Instance %s is running, %s" %
1117                                  (instance.name, msg), errors.ECODE_STATE)
1118
1119
1120 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1121   """Computes if value is in the desired range.
1122
1123   @param name: name of the parameter for which we perform the check
1124   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1125       not just 'disk')
1126   @param ipolicy: dictionary containing min, max and std values
1127   @param value: actual value that we want to use
1128   @return: None or element not meeting the criteria
1129
1130
1131   """
1132   if value in [None, constants.VALUE_AUTO]:
1133     return None
1134   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1135   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1136   if value > max_v or min_v > value:
1137     if qualifier:
1138       fqn = "%s/%s" % (name, qualifier)
1139     else:
1140       fqn = name
1141     return ("%s value %s is not in range [%s, %s]" %
1142             (fqn, value, min_v, max_v))
1143   return None
1144
1145
1146 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1147                                  nic_count, disk_sizes, spindle_use,
1148                                  _compute_fn=_ComputeMinMaxSpec):
1149   """Verifies ipolicy against provided specs.
1150
1151   @type ipolicy: dict
1152   @param ipolicy: The ipolicy
1153   @type mem_size: int
1154   @param mem_size: The memory size
1155   @type cpu_count: int
1156   @param cpu_count: Used cpu cores
1157   @type disk_count: int
1158   @param disk_count: Number of disks used
1159   @type nic_count: int
1160   @param nic_count: Number of nics used
1161   @type disk_sizes: list of ints
1162   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1163   @type spindle_use: int
1164   @param spindle_use: The number of spindles this instance uses
1165   @param _compute_fn: The compute function (unittest only)
1166   @return: A list of violations, or an empty list of no violations are found
1167
1168   """
1169   assert disk_count == len(disk_sizes)
1170
1171   test_settings = [
1172     (constants.ISPEC_MEM_SIZE, "", mem_size),
1173     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1174     (constants.ISPEC_DISK_COUNT, "", disk_count),
1175     (constants.ISPEC_NIC_COUNT, "", nic_count),
1176     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1177     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1178          for idx, d in enumerate(disk_sizes)]
1179
1180   return filter(None,
1181                 (_compute_fn(name, qualifier, ipolicy, value)
1182                  for (name, qualifier, value) in test_settings))
1183
1184
1185 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1186                                      _compute_fn=_ComputeIPolicySpecViolation):
1187   """Compute if instance meets the specs of ipolicy.
1188
1189   @type ipolicy: dict
1190   @param ipolicy: The ipolicy to verify against
1191   @type instance: L{objects.Instance}
1192   @param instance: The instance to verify
1193   @param _compute_fn: The function to verify ipolicy (unittest only)
1194   @see: L{_ComputeIPolicySpecViolation}
1195
1196   """
1197   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1198   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1199   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1200   disk_count = len(instance.disks)
1201   disk_sizes = [disk.size for disk in instance.disks]
1202   nic_count = len(instance.nics)
1203
1204   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1205                      disk_sizes, spindle_use)
1206
1207
1208 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1209     _compute_fn=_ComputeIPolicySpecViolation):
1210   """Compute if instance specs meets the specs of ipolicy.
1211
1212   @type ipolicy: dict
1213   @param ipolicy: The ipolicy to verify against
1214   @param instance_spec: dict
1215   @param instance_spec: The instance spec to verify
1216   @param _compute_fn: The function to verify ipolicy (unittest only)
1217   @see: L{_ComputeIPolicySpecViolation}
1218
1219   """
1220   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1221   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1222   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1223   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1224   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1225   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1226
1227   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1228                      disk_sizes, spindle_use)
1229
1230
1231 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1232                                  target_group,
1233                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1234   """Compute if instance meets the specs of the new target group.
1235
1236   @param ipolicy: The ipolicy to verify
1237   @param instance: The instance object to verify
1238   @param current_group: The current group of the instance
1239   @param target_group: The new group of the instance
1240   @param _compute_fn: The function to verify ipolicy (unittest only)
1241   @see: L{_ComputeIPolicySpecViolation}
1242
1243   """
1244   if current_group == target_group:
1245     return []
1246   else:
1247     return _compute_fn(ipolicy, instance)
1248
1249
1250 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1251                             _compute_fn=_ComputeIPolicyNodeViolation):
1252   """Checks that the target node is correct in terms of instance policy.
1253
1254   @param ipolicy: The ipolicy to verify
1255   @param instance: The instance object to verify
1256   @param node: The new node to relocate
1257   @param ignore: Ignore violations of the ipolicy
1258   @param _compute_fn: The function to verify ipolicy (unittest only)
1259   @see: L{_ComputeIPolicySpecViolation}
1260
1261   """
1262   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1263   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1264
1265   if res:
1266     msg = ("Instance does not meet target node group's (%s) instance"
1267            " policy: %s") % (node.group, utils.CommaJoin(res))
1268     if ignore:
1269       lu.LogWarning(msg)
1270     else:
1271       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1272
1273
1274 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1275   """Computes a set of any instances that would violate the new ipolicy.
1276
1277   @param old_ipolicy: The current (still in-place) ipolicy
1278   @param new_ipolicy: The new (to become) ipolicy
1279   @param instances: List of instances to verify
1280   @return: A list of instances which violates the new ipolicy but
1281       did not before
1282
1283   """
1284   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1285           _ComputeViolatingInstances(old_ipolicy, instances))
1286
1287
1288 def _ExpandItemName(fn, name, kind):
1289   """Expand an item name.
1290
1291   @param fn: the function to use for expansion
1292   @param name: requested item name
1293   @param kind: text description ('Node' or 'Instance')
1294   @return: the resolved (full) name
1295   @raise errors.OpPrereqError: if the item is not found
1296
1297   """
1298   full_name = fn(name)
1299   if full_name is None:
1300     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1301                                errors.ECODE_NOENT)
1302   return full_name
1303
1304
1305 def _ExpandNodeName(cfg, name):
1306   """Wrapper over L{_ExpandItemName} for nodes."""
1307   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1308
1309
1310 def _ExpandInstanceName(cfg, name):
1311   """Wrapper over L{_ExpandItemName} for instance."""
1312   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1313
1314
1315 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1316                           minmem, maxmem, vcpus, nics, disk_template, disks,
1317                           bep, hvp, hypervisor_name, tags):
1318   """Builds instance related env variables for hooks
1319
1320   This builds the hook environment from individual variables.
1321
1322   @type name: string
1323   @param name: the name of the instance
1324   @type primary_node: string
1325   @param primary_node: the name of the instance's primary node
1326   @type secondary_nodes: list
1327   @param secondary_nodes: list of secondary nodes as strings
1328   @type os_type: string
1329   @param os_type: the name of the instance's OS
1330   @type status: string
1331   @param status: the desired status of the instance
1332   @type minmem: string
1333   @param minmem: the minimum memory size of the instance
1334   @type maxmem: string
1335   @param maxmem: the maximum memory size of the instance
1336   @type vcpus: string
1337   @param vcpus: the count of VCPUs the instance has
1338   @type nics: list
1339   @param nics: list of tuples (ip, mac, mode, link) representing
1340       the NICs the instance has
1341   @type disk_template: string
1342   @param disk_template: the disk template of the instance
1343   @type disks: list
1344   @param disks: the list of (size, mode) pairs
1345   @type bep: dict
1346   @param bep: the backend parameters for the instance
1347   @type hvp: dict
1348   @param hvp: the hypervisor parameters for the instance
1349   @type hypervisor_name: string
1350   @param hypervisor_name: the hypervisor for the instance
1351   @type tags: list
1352   @param tags: list of instance tags as strings
1353   @rtype: dict
1354   @return: the hook environment for this instance
1355
1356   """
1357   env = {
1358     "OP_TARGET": name,
1359     "INSTANCE_NAME": name,
1360     "INSTANCE_PRIMARY": primary_node,
1361     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1362     "INSTANCE_OS_TYPE": os_type,
1363     "INSTANCE_STATUS": status,
1364     "INSTANCE_MINMEM": minmem,
1365     "INSTANCE_MAXMEM": maxmem,
1366     # TODO(2.7) remove deprecated "memory" value
1367     "INSTANCE_MEMORY": maxmem,
1368     "INSTANCE_VCPUS": vcpus,
1369     "INSTANCE_DISK_TEMPLATE": disk_template,
1370     "INSTANCE_HYPERVISOR": hypervisor_name,
1371   }
1372   if nics:
1373     nic_count = len(nics)
1374     for idx, (ip, mac, mode, link) in enumerate(nics):
1375       if ip is None:
1376         ip = ""
1377       env["INSTANCE_NIC%d_IP" % idx] = ip
1378       env["INSTANCE_NIC%d_MAC" % idx] = mac
1379       env["INSTANCE_NIC%d_MODE" % idx] = mode
1380       env["INSTANCE_NIC%d_LINK" % idx] = link
1381       if mode == constants.NIC_MODE_BRIDGED:
1382         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1383   else:
1384     nic_count = 0
1385
1386   env["INSTANCE_NIC_COUNT"] = nic_count
1387
1388   if disks:
1389     disk_count = len(disks)
1390     for idx, (size, mode) in enumerate(disks):
1391       env["INSTANCE_DISK%d_SIZE" % idx] = size
1392       env["INSTANCE_DISK%d_MODE" % idx] = mode
1393   else:
1394     disk_count = 0
1395
1396   env["INSTANCE_DISK_COUNT"] = disk_count
1397
1398   if not tags:
1399     tags = []
1400
1401   env["INSTANCE_TAGS"] = " ".join(tags)
1402
1403   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1404     for key, value in source.items():
1405       env["INSTANCE_%s_%s" % (kind, key)] = value
1406
1407   return env
1408
1409
1410 def _NICListToTuple(lu, nics):
1411   """Build a list of nic information tuples.
1412
1413   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1414   value in LUInstanceQueryData.
1415
1416   @type lu:  L{LogicalUnit}
1417   @param lu: the logical unit on whose behalf we execute
1418   @type nics: list of L{objects.NIC}
1419   @param nics: list of nics to convert to hooks tuples
1420
1421   """
1422   hooks_nics = []
1423   cluster = lu.cfg.GetClusterInfo()
1424   for nic in nics:
1425     ip = nic.ip
1426     mac = nic.mac
1427     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1428     mode = filled_params[constants.NIC_MODE]
1429     link = filled_params[constants.NIC_LINK]
1430     hooks_nics.append((ip, mac, mode, link))
1431   return hooks_nics
1432
1433
1434 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1435   """Builds instance related env variables for hooks from an object.
1436
1437   @type lu: L{LogicalUnit}
1438   @param lu: the logical unit on whose behalf we execute
1439   @type instance: L{objects.Instance}
1440   @param instance: the instance for which we should build the
1441       environment
1442   @type override: dict
1443   @param override: dictionary with key/values that will override
1444       our values
1445   @rtype: dict
1446   @return: the hook environment dictionary
1447
1448   """
1449   cluster = lu.cfg.GetClusterInfo()
1450   bep = cluster.FillBE(instance)
1451   hvp = cluster.FillHV(instance)
1452   args = {
1453     "name": instance.name,
1454     "primary_node": instance.primary_node,
1455     "secondary_nodes": instance.secondary_nodes,
1456     "os_type": instance.os,
1457     "status": instance.admin_state,
1458     "maxmem": bep[constants.BE_MAXMEM],
1459     "minmem": bep[constants.BE_MINMEM],
1460     "vcpus": bep[constants.BE_VCPUS],
1461     "nics": _NICListToTuple(lu, instance.nics),
1462     "disk_template": instance.disk_template,
1463     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1464     "bep": bep,
1465     "hvp": hvp,
1466     "hypervisor_name": instance.hypervisor,
1467     "tags": instance.tags,
1468   }
1469   if override:
1470     args.update(override)
1471   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1472
1473
1474 def _AdjustCandidatePool(lu, exceptions):
1475   """Adjust the candidate pool after node operations.
1476
1477   """
1478   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1479   if mod_list:
1480     lu.LogInfo("Promoted nodes to master candidate role: %s",
1481                utils.CommaJoin(node.name for node in mod_list))
1482     for name in mod_list:
1483       lu.context.ReaddNode(name)
1484   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1485   if mc_now > mc_max:
1486     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1487                (mc_now, mc_max))
1488
1489
1490 def _DecideSelfPromotion(lu, exceptions=None):
1491   """Decide whether I should promote myself as a master candidate.
1492
1493   """
1494   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1495   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1496   # the new node will increase mc_max with one, so:
1497   mc_should = min(mc_should + 1, cp_size)
1498   return mc_now < mc_should
1499
1500
1501 def _CalculateGroupIPolicy(cluster, group):
1502   """Calculate instance policy for group.
1503
1504   """
1505   return cluster.SimpleFillIPolicy(group.ipolicy)
1506
1507
1508 def _ComputeViolatingInstances(ipolicy, instances):
1509   """Computes a set of instances who violates given ipolicy.
1510
1511   @param ipolicy: The ipolicy to verify
1512   @type instances: object.Instance
1513   @param instances: List of instances to verify
1514   @return: A frozenset of instance names violating the ipolicy
1515
1516   """
1517   return frozenset([inst.name for inst in instances
1518                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1519
1520
1521 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1522   """Check that the brigdes needed by a list of nics exist.
1523
1524   """
1525   cluster = lu.cfg.GetClusterInfo()
1526   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1527   brlist = [params[constants.NIC_LINK] for params in paramslist
1528             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1529   if brlist:
1530     result = lu.rpc.call_bridges_exist(target_node, brlist)
1531     result.Raise("Error checking bridges on destination node '%s'" %
1532                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1533
1534
1535 def _CheckInstanceBridgesExist(lu, instance, node=None):
1536   """Check that the brigdes needed by an instance exist.
1537
1538   """
1539   if node is None:
1540     node = instance.primary_node
1541   _CheckNicsBridgesExist(lu, instance.nics, node)
1542
1543
1544 def _CheckOSVariant(os_obj, name):
1545   """Check whether an OS name conforms to the os variants specification.
1546
1547   @type os_obj: L{objects.OS}
1548   @param os_obj: OS object to check
1549   @type name: string
1550   @param name: OS name passed by the user, to check for validity
1551
1552   """
1553   variant = objects.OS.GetVariant(name)
1554   if not os_obj.supported_variants:
1555     if variant:
1556       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1557                                  " passed)" % (os_obj.name, variant),
1558                                  errors.ECODE_INVAL)
1559     return
1560   if not variant:
1561     raise errors.OpPrereqError("OS name must include a variant",
1562                                errors.ECODE_INVAL)
1563
1564   if variant not in os_obj.supported_variants:
1565     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1566
1567
1568 def _GetNodeInstancesInner(cfg, fn):
1569   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1570
1571
1572 def _GetNodeInstances(cfg, node_name):
1573   """Returns a list of all primary and secondary instances on a node.
1574
1575   """
1576
1577   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1578
1579
1580 def _GetNodePrimaryInstances(cfg, node_name):
1581   """Returns primary instances on a node.
1582
1583   """
1584   return _GetNodeInstancesInner(cfg,
1585                                 lambda inst: node_name == inst.primary_node)
1586
1587
1588 def _GetNodeSecondaryInstances(cfg, node_name):
1589   """Returns secondary instances on a node.
1590
1591   """
1592   return _GetNodeInstancesInner(cfg,
1593                                 lambda inst: node_name in inst.secondary_nodes)
1594
1595
1596 def _GetStorageTypeArgs(cfg, storage_type):
1597   """Returns the arguments for a storage type.
1598
1599   """
1600   # Special case for file storage
1601   if storage_type == constants.ST_FILE:
1602     # storage.FileStorage wants a list of storage directories
1603     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1604
1605   return []
1606
1607
1608 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1609   faulty = []
1610
1611   for dev in instance.disks:
1612     cfg.SetDiskID(dev, node_name)
1613
1614   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1615                                                                 instance))
1616   result.Raise("Failed to get disk status from node %s" % node_name,
1617                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1618
1619   for idx, bdev_status in enumerate(result.payload):
1620     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1621       faulty.append(idx)
1622
1623   return faulty
1624
1625
1626 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1627   """Check the sanity of iallocator and node arguments and use the
1628   cluster-wide iallocator if appropriate.
1629
1630   Check that at most one of (iallocator, node) is specified. If none is
1631   specified, then the LU's opcode's iallocator slot is filled with the
1632   cluster-wide default iallocator.
1633
1634   @type iallocator_slot: string
1635   @param iallocator_slot: the name of the opcode iallocator slot
1636   @type node_slot: string
1637   @param node_slot: the name of the opcode target node slot
1638
1639   """
1640   node = getattr(lu.op, node_slot, None)
1641   iallocator = getattr(lu.op, iallocator_slot, None)
1642
1643   if node is not None and iallocator is not None:
1644     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1645                                errors.ECODE_INVAL)
1646   elif node is None and iallocator is None:
1647     default_iallocator = lu.cfg.GetDefaultIAllocator()
1648     if default_iallocator:
1649       setattr(lu.op, iallocator_slot, default_iallocator)
1650     else:
1651       raise errors.OpPrereqError("No iallocator or node given and no"
1652                                  " cluster-wide default iallocator found;"
1653                                  " please specify either an iallocator or a"
1654                                  " node, or set a cluster-wide default"
1655                                  " iallocator")
1656
1657
1658 def _GetDefaultIAllocator(cfg, iallocator):
1659   """Decides on which iallocator to use.
1660
1661   @type cfg: L{config.ConfigWriter}
1662   @param cfg: Cluster configuration object
1663   @type iallocator: string or None
1664   @param iallocator: Iallocator specified in opcode
1665   @rtype: string
1666   @return: Iallocator name
1667
1668   """
1669   if not iallocator:
1670     # Use default iallocator
1671     iallocator = cfg.GetDefaultIAllocator()
1672
1673   if not iallocator:
1674     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1675                                " opcode nor as a cluster-wide default",
1676                                errors.ECODE_INVAL)
1677
1678   return iallocator
1679
1680
1681 class LUClusterPostInit(LogicalUnit):
1682   """Logical unit for running hooks after cluster initialization.
1683
1684   """
1685   HPATH = "cluster-init"
1686   HTYPE = constants.HTYPE_CLUSTER
1687
1688   def BuildHooksEnv(self):
1689     """Build hooks env.
1690
1691     """
1692     return {
1693       "OP_TARGET": self.cfg.GetClusterName(),
1694       }
1695
1696   def BuildHooksNodes(self):
1697     """Build hooks nodes.
1698
1699     """
1700     return ([], [self.cfg.GetMasterNode()])
1701
1702   def Exec(self, feedback_fn):
1703     """Nothing to do.
1704
1705     """
1706     return True
1707
1708
1709 class LUClusterDestroy(LogicalUnit):
1710   """Logical unit for destroying the cluster.
1711
1712   """
1713   HPATH = "cluster-destroy"
1714   HTYPE = constants.HTYPE_CLUSTER
1715
1716   def BuildHooksEnv(self):
1717     """Build hooks env.
1718
1719     """
1720     return {
1721       "OP_TARGET": self.cfg.GetClusterName(),
1722       }
1723
1724   def BuildHooksNodes(self):
1725     """Build hooks nodes.
1726
1727     """
1728     return ([], [])
1729
1730   def CheckPrereq(self):
1731     """Check prerequisites.
1732
1733     This checks whether the cluster is empty.
1734
1735     Any errors are signaled by raising errors.OpPrereqError.
1736
1737     """
1738     master = self.cfg.GetMasterNode()
1739
1740     nodelist = self.cfg.GetNodeList()
1741     if len(nodelist) != 1 or nodelist[0] != master:
1742       raise errors.OpPrereqError("There are still %d node(s) in"
1743                                  " this cluster." % (len(nodelist) - 1),
1744                                  errors.ECODE_INVAL)
1745     instancelist = self.cfg.GetInstanceList()
1746     if instancelist:
1747       raise errors.OpPrereqError("There are still %d instance(s) in"
1748                                  " this cluster." % len(instancelist),
1749                                  errors.ECODE_INVAL)
1750
1751   def Exec(self, feedback_fn):
1752     """Destroys the cluster.
1753
1754     """
1755     master_params = self.cfg.GetMasterNetworkParameters()
1756
1757     # Run post hooks on master node before it's removed
1758     _RunPostHook(self, master_params.name)
1759
1760     ems = self.cfg.GetUseExternalMipScript()
1761     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1762                                                      master_params, ems)
1763     if result.fail_msg:
1764       self.LogWarning("Error disabling the master IP address: %s",
1765                       result.fail_msg)
1766
1767     return master_params.name
1768
1769
1770 def _VerifyCertificate(filename):
1771   """Verifies a certificate for L{LUClusterVerifyConfig}.
1772
1773   @type filename: string
1774   @param filename: Path to PEM file
1775
1776   """
1777   try:
1778     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1779                                            utils.ReadFile(filename))
1780   except Exception, err: # pylint: disable=W0703
1781     return (LUClusterVerifyConfig.ETYPE_ERROR,
1782             "Failed to load X509 certificate %s: %s" % (filename, err))
1783
1784   (errcode, msg) = \
1785     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1786                                 constants.SSL_CERT_EXPIRATION_ERROR)
1787
1788   if msg:
1789     fnamemsg = "While verifying %s: %s" % (filename, msg)
1790   else:
1791     fnamemsg = None
1792
1793   if errcode is None:
1794     return (None, fnamemsg)
1795   elif errcode == utils.CERT_WARNING:
1796     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1797   elif errcode == utils.CERT_ERROR:
1798     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1799
1800   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1801
1802
1803 def _GetAllHypervisorParameters(cluster, instances):
1804   """Compute the set of all hypervisor parameters.
1805
1806   @type cluster: L{objects.Cluster}
1807   @param cluster: the cluster object
1808   @param instances: list of L{objects.Instance}
1809   @param instances: additional instances from which to obtain parameters
1810   @rtype: list of (origin, hypervisor, parameters)
1811   @return: a list with all parameters found, indicating the hypervisor they
1812        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1813
1814   """
1815   hvp_data = []
1816
1817   for hv_name in cluster.enabled_hypervisors:
1818     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1819
1820   for os_name, os_hvp in cluster.os_hvp.items():
1821     for hv_name, hv_params in os_hvp.items():
1822       if hv_params:
1823         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1824         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1825
1826   # TODO: collapse identical parameter values in a single one
1827   for instance in instances:
1828     if instance.hvparams:
1829       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1830                        cluster.FillHV(instance)))
1831
1832   return hvp_data
1833
1834
1835 class _VerifyErrors(object):
1836   """Mix-in for cluster/group verify LUs.
1837
1838   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1839   self.op and self._feedback_fn to be available.)
1840
1841   """
1842
1843   ETYPE_FIELD = "code"
1844   ETYPE_ERROR = "ERROR"
1845   ETYPE_WARNING = "WARNING"
1846
1847   def _Error(self, ecode, item, msg, *args, **kwargs):
1848     """Format an error message.
1849
1850     Based on the opcode's error_codes parameter, either format a
1851     parseable error code, or a simpler error string.
1852
1853     This must be called only from Exec and functions called from Exec.
1854
1855     """
1856     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1857     itype, etxt, _ = ecode
1858     # first complete the msg
1859     if args:
1860       msg = msg % args
1861     # then format the whole message
1862     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1863       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1864     else:
1865       if item:
1866         item = " " + item
1867       else:
1868         item = ""
1869       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1870     # and finally report it via the feedback_fn
1871     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1872
1873   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1874     """Log an error message if the passed condition is True.
1875
1876     """
1877     cond = (bool(cond)
1878             or self.op.debug_simulate_errors) # pylint: disable=E1101
1879
1880     # If the error code is in the list of ignored errors, demote the error to a
1881     # warning
1882     (_, etxt, _) = ecode
1883     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1884       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1885
1886     if cond:
1887       self._Error(ecode, *args, **kwargs)
1888
1889     # do not mark the operation as failed for WARN cases only
1890     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1891       self.bad = self.bad or cond
1892
1893
1894 class LUClusterVerify(NoHooksLU):
1895   """Submits all jobs necessary to verify the cluster.
1896
1897   """
1898   REQ_BGL = False
1899
1900   def ExpandNames(self):
1901     self.needed_locks = {}
1902
1903   def Exec(self, feedback_fn):
1904     jobs = []
1905
1906     if self.op.group_name:
1907       groups = [self.op.group_name]
1908       depends_fn = lambda: None
1909     else:
1910       groups = self.cfg.GetNodeGroupList()
1911
1912       # Verify global configuration
1913       jobs.append([
1914         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1915         ])
1916
1917       # Always depend on global verification
1918       depends_fn = lambda: [(-len(jobs), [])]
1919
1920     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1921                                             ignore_errors=self.op.ignore_errors,
1922                                             depends=depends_fn())]
1923                 for group in groups)
1924
1925     # Fix up all parameters
1926     for op in itertools.chain(*jobs): # pylint: disable=W0142
1927       op.debug_simulate_errors = self.op.debug_simulate_errors
1928       op.verbose = self.op.verbose
1929       op.error_codes = self.op.error_codes
1930       try:
1931         op.skip_checks = self.op.skip_checks
1932       except AttributeError:
1933         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1934
1935     return ResultWithJobs(jobs)
1936
1937
1938 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1939   """Verifies the cluster config.
1940
1941   """
1942   REQ_BGL = False
1943
1944   def _VerifyHVP(self, hvp_data):
1945     """Verifies locally the syntax of the hypervisor parameters.
1946
1947     """
1948     for item, hv_name, hv_params in hvp_data:
1949       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1950              (item, hv_name))
1951       try:
1952         hv_class = hypervisor.GetHypervisor(hv_name)
1953         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1954         hv_class.CheckParameterSyntax(hv_params)
1955       except errors.GenericError, err:
1956         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1957
1958   def ExpandNames(self):
1959     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1960     self.share_locks = _ShareAll()
1961
1962   def CheckPrereq(self):
1963     """Check prerequisites.
1964
1965     """
1966     # Retrieve all information
1967     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1968     self.all_node_info = self.cfg.GetAllNodesInfo()
1969     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1970
1971   def Exec(self, feedback_fn):
1972     """Verify integrity of cluster, performing various test on nodes.
1973
1974     """
1975     self.bad = False
1976     self._feedback_fn = feedback_fn
1977
1978     feedback_fn("* Verifying cluster config")
1979
1980     for msg in self.cfg.VerifyConfig():
1981       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1982
1983     feedback_fn("* Verifying cluster certificate files")
1984
1985     for cert_filename in constants.ALL_CERT_FILES:
1986       (errcode, msg) = _VerifyCertificate(cert_filename)
1987       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1988
1989     feedback_fn("* Verifying hypervisor parameters")
1990
1991     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1992                                                 self.all_inst_info.values()))
1993
1994     feedback_fn("* Verifying all nodes belong to an existing group")
1995
1996     # We do this verification here because, should this bogus circumstance
1997     # occur, it would never be caught by VerifyGroup, which only acts on
1998     # nodes/instances reachable from existing node groups.
1999
2000     dangling_nodes = set(node.name for node in self.all_node_info.values()
2001                          if node.group not in self.all_group_info)
2002
2003     dangling_instances = {}
2004     no_node_instances = []
2005
2006     for inst in self.all_inst_info.values():
2007       if inst.primary_node in dangling_nodes:
2008         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2009       elif inst.primary_node not in self.all_node_info:
2010         no_node_instances.append(inst.name)
2011
2012     pretty_dangling = [
2013         "%s (%s)" %
2014         (node.name,
2015          utils.CommaJoin(dangling_instances.get(node.name,
2016                                                 ["no instances"])))
2017         for node in dangling_nodes]
2018
2019     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2020                   None,
2021                   "the following nodes (and their instances) belong to a non"
2022                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2023
2024     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2025                   None,
2026                   "the following instances have a non-existing primary-node:"
2027                   " %s", utils.CommaJoin(no_node_instances))
2028
2029     return not self.bad
2030
2031
2032 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2033   """Verifies the status of a node group.
2034
2035   """
2036   HPATH = "cluster-verify"
2037   HTYPE = constants.HTYPE_CLUSTER
2038   REQ_BGL = False
2039
2040   _HOOKS_INDENT_RE = re.compile("^", re.M)
2041
2042   class NodeImage(object):
2043     """A class representing the logical and physical status of a node.
2044
2045     @type name: string
2046     @ivar name: the node name to which this object refers
2047     @ivar volumes: a structure as returned from
2048         L{ganeti.backend.GetVolumeList} (runtime)
2049     @ivar instances: a list of running instances (runtime)
2050     @ivar pinst: list of configured primary instances (config)
2051     @ivar sinst: list of configured secondary instances (config)
2052     @ivar sbp: dictionary of {primary-node: list of instances} for all
2053         instances for which this node is secondary (config)
2054     @ivar mfree: free memory, as reported by hypervisor (runtime)
2055     @ivar dfree: free disk, as reported by the node (runtime)
2056     @ivar offline: the offline status (config)
2057     @type rpc_fail: boolean
2058     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2059         not whether the individual keys were correct) (runtime)
2060     @type lvm_fail: boolean
2061     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2062     @type hyp_fail: boolean
2063     @ivar hyp_fail: whether the RPC call didn't return the instance list
2064     @type ghost: boolean
2065     @ivar ghost: whether this is a known node or not (config)
2066     @type os_fail: boolean
2067     @ivar os_fail: whether the RPC call didn't return valid OS data
2068     @type oslist: list
2069     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2070     @type vm_capable: boolean
2071     @ivar vm_capable: whether the node can host instances
2072
2073     """
2074     def __init__(self, offline=False, name=None, vm_capable=True):
2075       self.name = name
2076       self.volumes = {}
2077       self.instances = []
2078       self.pinst = []
2079       self.sinst = []
2080       self.sbp = {}
2081       self.mfree = 0
2082       self.dfree = 0
2083       self.offline = offline
2084       self.vm_capable = vm_capable
2085       self.rpc_fail = False
2086       self.lvm_fail = False
2087       self.hyp_fail = False
2088       self.ghost = False
2089       self.os_fail = False
2090       self.oslist = {}
2091
2092   def ExpandNames(self):
2093     # This raises errors.OpPrereqError on its own:
2094     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2095
2096     # Get instances in node group; this is unsafe and needs verification later
2097     inst_names = \
2098       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2099
2100     self.needed_locks = {
2101       locking.LEVEL_INSTANCE: inst_names,
2102       locking.LEVEL_NODEGROUP: [self.group_uuid],
2103       locking.LEVEL_NODE: [],
2104       }
2105
2106     self.share_locks = _ShareAll()
2107
2108   def DeclareLocks(self, level):
2109     if level == locking.LEVEL_NODE:
2110       # Get members of node group; this is unsafe and needs verification later
2111       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2112
2113       all_inst_info = self.cfg.GetAllInstancesInfo()
2114
2115       # In Exec(), we warn about mirrored instances that have primary and
2116       # secondary living in separate node groups. To fully verify that
2117       # volumes for these instances are healthy, we will need to do an
2118       # extra call to their secondaries. We ensure here those nodes will
2119       # be locked.
2120       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2121         # Important: access only the instances whose lock is owned
2122         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2123           nodes.update(all_inst_info[inst].secondary_nodes)
2124
2125       self.needed_locks[locking.LEVEL_NODE] = nodes
2126
2127   def CheckPrereq(self):
2128     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2129     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2130
2131     group_nodes = set(self.group_info.members)
2132     group_instances = \
2133       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2134
2135     unlocked_nodes = \
2136         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2137
2138     unlocked_instances = \
2139         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2140
2141     if unlocked_nodes:
2142       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2143                                  utils.CommaJoin(unlocked_nodes),
2144                                  errors.ECODE_STATE)
2145
2146     if unlocked_instances:
2147       raise errors.OpPrereqError("Missing lock for instances: %s" %
2148                                  utils.CommaJoin(unlocked_instances),
2149                                  errors.ECODE_STATE)
2150
2151     self.all_node_info = self.cfg.GetAllNodesInfo()
2152     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2153
2154     self.my_node_names = utils.NiceSort(group_nodes)
2155     self.my_inst_names = utils.NiceSort(group_instances)
2156
2157     self.my_node_info = dict((name, self.all_node_info[name])
2158                              for name in self.my_node_names)
2159
2160     self.my_inst_info = dict((name, self.all_inst_info[name])
2161                              for name in self.my_inst_names)
2162
2163     # We detect here the nodes that will need the extra RPC calls for verifying
2164     # split LV volumes; they should be locked.
2165     extra_lv_nodes = set()
2166
2167     for inst in self.my_inst_info.values():
2168       if inst.disk_template in constants.DTS_INT_MIRROR:
2169         for nname in inst.all_nodes:
2170           if self.all_node_info[nname].group != self.group_uuid:
2171             extra_lv_nodes.add(nname)
2172
2173     unlocked_lv_nodes = \
2174         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2175
2176     if unlocked_lv_nodes:
2177       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2178                                  utils.CommaJoin(unlocked_lv_nodes),
2179                                  errors.ECODE_STATE)
2180     self.extra_lv_nodes = list(extra_lv_nodes)
2181
2182   def _VerifyNode(self, ninfo, nresult):
2183     """Perform some basic validation on data returned from a node.
2184
2185       - check the result data structure is well formed and has all the
2186         mandatory fields
2187       - check ganeti version
2188
2189     @type ninfo: L{objects.Node}
2190     @param ninfo: the node to check
2191     @param nresult: the results from the node
2192     @rtype: boolean
2193     @return: whether overall this call was successful (and we can expect
2194          reasonable values in the respose)
2195
2196     """
2197     node = ninfo.name
2198     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2199
2200     # main result, nresult should be a non-empty dict
2201     test = not nresult or not isinstance(nresult, dict)
2202     _ErrorIf(test, constants.CV_ENODERPC, node,
2203                   "unable to verify node: no data returned")
2204     if test:
2205       return False
2206
2207     # compares ganeti version
2208     local_version = constants.PROTOCOL_VERSION
2209     remote_version = nresult.get("version", None)
2210     test = not (remote_version and
2211                 isinstance(remote_version, (list, tuple)) and
2212                 len(remote_version) == 2)
2213     _ErrorIf(test, constants.CV_ENODERPC, node,
2214              "connection to node returned invalid data")
2215     if test:
2216       return False
2217
2218     test = local_version != remote_version[0]
2219     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2220              "incompatible protocol versions: master %s,"
2221              " node %s", local_version, remote_version[0])
2222     if test:
2223       return False
2224
2225     # node seems compatible, we can actually try to look into its results
2226
2227     # full package version
2228     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2229                   constants.CV_ENODEVERSION, node,
2230                   "software version mismatch: master %s, node %s",
2231                   constants.RELEASE_VERSION, remote_version[1],
2232                   code=self.ETYPE_WARNING)
2233
2234     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2235     if ninfo.vm_capable and isinstance(hyp_result, dict):
2236       for hv_name, hv_result in hyp_result.iteritems():
2237         test = hv_result is not None
2238         _ErrorIf(test, constants.CV_ENODEHV, node,
2239                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2240
2241     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2242     if ninfo.vm_capable and isinstance(hvp_result, list):
2243       for item, hv_name, hv_result in hvp_result:
2244         _ErrorIf(True, constants.CV_ENODEHV, node,
2245                  "hypervisor %s parameter verify failure (source %s): %s",
2246                  hv_name, item, hv_result)
2247
2248     test = nresult.get(constants.NV_NODESETUP,
2249                        ["Missing NODESETUP results"])
2250     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2251              "; ".join(test))
2252
2253     return True
2254
2255   def _VerifyNodeTime(self, ninfo, nresult,
2256                       nvinfo_starttime, nvinfo_endtime):
2257     """Check the node time.
2258
2259     @type ninfo: L{objects.Node}
2260     @param ninfo: the node to check
2261     @param nresult: the remote results for the node
2262     @param nvinfo_starttime: the start time of the RPC call
2263     @param nvinfo_endtime: the end time of the RPC call
2264
2265     """
2266     node = ninfo.name
2267     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2268
2269     ntime = nresult.get(constants.NV_TIME, None)
2270     try:
2271       ntime_merged = utils.MergeTime(ntime)
2272     except (ValueError, TypeError):
2273       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2274       return
2275
2276     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2277       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2278     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2279       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2280     else:
2281       ntime_diff = None
2282
2283     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2284              "Node time diverges by at least %s from master node time",
2285              ntime_diff)
2286
2287   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2288     """Check the node LVM results.
2289
2290     @type ninfo: L{objects.Node}
2291     @param ninfo: the node to check
2292     @param nresult: the remote results for the node
2293     @param vg_name: the configured VG name
2294
2295     """
2296     if vg_name is None:
2297       return
2298
2299     node = ninfo.name
2300     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2301
2302     # checks vg existence and size > 20G
2303     vglist = nresult.get(constants.NV_VGLIST, None)
2304     test = not vglist
2305     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2306     if not test:
2307       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2308                                             constants.MIN_VG_SIZE)
2309       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2310
2311     # check pv names
2312     pvlist = nresult.get(constants.NV_PVLIST, None)
2313     test = pvlist is None
2314     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2315     if not test:
2316       # check that ':' is not present in PV names, since it's a
2317       # special character for lvcreate (denotes the range of PEs to
2318       # use on the PV)
2319       for _, pvname, owner_vg in pvlist:
2320         test = ":" in pvname
2321         _ErrorIf(test, constants.CV_ENODELVM, node,
2322                  "Invalid character ':' in PV '%s' of VG '%s'",
2323                  pvname, owner_vg)
2324
2325   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2326     """Check the node bridges.
2327
2328     @type ninfo: L{objects.Node}
2329     @param ninfo: the node to check
2330     @param nresult: the remote results for the node
2331     @param bridges: the expected list of bridges
2332
2333     """
2334     if not bridges:
2335       return
2336
2337     node = ninfo.name
2338     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2339
2340     missing = nresult.get(constants.NV_BRIDGES, None)
2341     test = not isinstance(missing, list)
2342     _ErrorIf(test, constants.CV_ENODENET, node,
2343              "did not return valid bridge information")
2344     if not test:
2345       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2346                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2347
2348   def _VerifyNodeUserScripts(self, ninfo, nresult):
2349     """Check the results of user scripts presence and executability on the node
2350
2351     @type ninfo: L{objects.Node}
2352     @param ninfo: the node to check
2353     @param nresult: the remote results for the node
2354
2355     """
2356     node = ninfo.name
2357
2358     test = not constants.NV_USERSCRIPTS in nresult
2359     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2360                   "did not return user scripts information")
2361
2362     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2363     if not test:
2364       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2365                     "user scripts not present or not executable: %s" %
2366                     utils.CommaJoin(sorted(broken_scripts)))
2367
2368   def _VerifyNodeNetwork(self, ninfo, nresult):
2369     """Check the node network connectivity results.
2370
2371     @type ninfo: L{objects.Node}
2372     @param ninfo: the node to check
2373     @param nresult: the remote results for the node
2374
2375     """
2376     node = ninfo.name
2377     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2378
2379     test = constants.NV_NODELIST not in nresult
2380     _ErrorIf(test, constants.CV_ENODESSH, node,
2381              "node hasn't returned node ssh connectivity data")
2382     if not test:
2383       if nresult[constants.NV_NODELIST]:
2384         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2385           _ErrorIf(True, constants.CV_ENODESSH, node,
2386                    "ssh communication with node '%s': %s", a_node, a_msg)
2387
2388     test = constants.NV_NODENETTEST not in nresult
2389     _ErrorIf(test, constants.CV_ENODENET, node,
2390              "node hasn't returned node tcp connectivity data")
2391     if not test:
2392       if nresult[constants.NV_NODENETTEST]:
2393         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2394         for anode in nlist:
2395           _ErrorIf(True, constants.CV_ENODENET, node,
2396                    "tcp communication with node '%s': %s",
2397                    anode, nresult[constants.NV_NODENETTEST][anode])
2398
2399     test = constants.NV_MASTERIP not in nresult
2400     _ErrorIf(test, constants.CV_ENODENET, node,
2401              "node hasn't returned node master IP reachability data")
2402     if not test:
2403       if not nresult[constants.NV_MASTERIP]:
2404         if node == self.master_node:
2405           msg = "the master node cannot reach the master IP (not configured?)"
2406         else:
2407           msg = "cannot reach the master IP"
2408         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2409
2410   def _VerifyInstance(self, instance, instanceconfig, node_image,
2411                       diskstatus):
2412     """Verify an instance.
2413
2414     This function checks to see if the required block devices are
2415     available on the instance's node.
2416
2417     """
2418     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2419     node_current = instanceconfig.primary_node
2420
2421     node_vol_should = {}
2422     instanceconfig.MapLVsByNode(node_vol_should)
2423
2424     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2425     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2426     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2427
2428     for node in node_vol_should:
2429       n_img = node_image[node]
2430       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2431         # ignore missing volumes on offline or broken nodes
2432         continue
2433       for volume in node_vol_should[node]:
2434         test = volume not in n_img.volumes
2435         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2436                  "volume %s missing on node %s", volume, node)
2437
2438     if instanceconfig.admin_state == constants.ADMINST_UP:
2439       pri_img = node_image[node_current]
2440       test = instance not in pri_img.instances and not pri_img.offline
2441       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2442                "instance not running on its primary node %s",
2443                node_current)
2444
2445     diskdata = [(nname, success, status, idx)
2446                 for (nname, disks) in diskstatus.items()
2447                 for idx, (success, status) in enumerate(disks)]
2448
2449     for nname, success, bdev_status, idx in diskdata:
2450       # the 'ghost node' construction in Exec() ensures that we have a
2451       # node here
2452       snode = node_image[nname]
2453       bad_snode = snode.ghost or snode.offline
2454       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2455                not success and not bad_snode,
2456                constants.CV_EINSTANCEFAULTYDISK, instance,
2457                "couldn't retrieve status for disk/%s on %s: %s",
2458                idx, nname, bdev_status)
2459       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2460                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2461                constants.CV_EINSTANCEFAULTYDISK, instance,
2462                "disk/%s on %s is faulty", idx, nname)
2463
2464   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2465     """Verify if there are any unknown volumes in the cluster.
2466
2467     The .os, .swap and backup volumes are ignored. All other volumes are
2468     reported as unknown.
2469
2470     @type reserved: L{ganeti.utils.FieldSet}
2471     @param reserved: a FieldSet of reserved volume names
2472
2473     """
2474     for node, n_img in node_image.items():
2475       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2476           self.all_node_info[node].group != self.group_uuid):
2477         # skip non-healthy nodes
2478         continue
2479       for volume in n_img.volumes:
2480         test = ((node not in node_vol_should or
2481                 volume not in node_vol_should[node]) and
2482                 not reserved.Matches(volume))
2483         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2484                       "volume %s is unknown", volume)
2485
2486   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2487     """Verify N+1 Memory Resilience.
2488
2489     Check that if one single node dies we can still start all the
2490     instances it was primary for.
2491
2492     """
2493     cluster_info = self.cfg.GetClusterInfo()
2494     for node, n_img in node_image.items():
2495       # This code checks that every node which is now listed as
2496       # secondary has enough memory to host all instances it is
2497       # supposed to should a single other node in the cluster fail.
2498       # FIXME: not ready for failover to an arbitrary node
2499       # FIXME: does not support file-backed instances
2500       # WARNING: we currently take into account down instances as well
2501       # as up ones, considering that even if they're down someone
2502       # might want to start them even in the event of a node failure.
2503       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2504         # we're skipping nodes marked offline and nodes in other groups from
2505         # the N+1 warning, since most likely we don't have good memory
2506         # infromation from them; we already list instances living on such
2507         # nodes, and that's enough warning
2508         continue
2509       #TODO(dynmem): also consider ballooning out other instances
2510       for prinode, instances in n_img.sbp.items():
2511         needed_mem = 0
2512         for instance in instances:
2513           bep = cluster_info.FillBE(instance_cfg[instance])
2514           if bep[constants.BE_AUTO_BALANCE]:
2515             needed_mem += bep[constants.BE_MINMEM]
2516         test = n_img.mfree < needed_mem
2517         self._ErrorIf(test, constants.CV_ENODEN1, node,
2518                       "not enough memory to accomodate instance failovers"
2519                       " should node %s fail (%dMiB needed, %dMiB available)",
2520                       prinode, needed_mem, n_img.mfree)
2521
2522   @classmethod
2523   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2524                    (files_all, files_opt, files_mc, files_vm)):
2525     """Verifies file checksums collected from all nodes.
2526
2527     @param errorif: Callback for reporting errors
2528     @param nodeinfo: List of L{objects.Node} objects
2529     @param master_node: Name of master node
2530     @param all_nvinfo: RPC results
2531
2532     """
2533     # Define functions determining which nodes to consider for a file
2534     files2nodefn = [
2535       (files_all, None),
2536       (files_mc, lambda node: (node.master_candidate or
2537                                node.name == master_node)),
2538       (files_vm, lambda node: node.vm_capable),
2539       ]
2540
2541     # Build mapping from filename to list of nodes which should have the file
2542     nodefiles = {}
2543     for (files, fn) in files2nodefn:
2544       if fn is None:
2545         filenodes = nodeinfo
2546       else:
2547         filenodes = filter(fn, nodeinfo)
2548       nodefiles.update((filename,
2549                         frozenset(map(operator.attrgetter("name"), filenodes)))
2550                        for filename in files)
2551
2552     assert set(nodefiles) == (files_all | files_mc | files_vm)
2553
2554     fileinfo = dict((filename, {}) for filename in nodefiles)
2555     ignore_nodes = set()
2556
2557     for node in nodeinfo:
2558       if node.offline:
2559         ignore_nodes.add(node.name)
2560         continue
2561
2562       nresult = all_nvinfo[node.name]
2563
2564       if nresult.fail_msg or not nresult.payload:
2565         node_files = None
2566       else:
2567         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2568
2569       test = not (node_files and isinstance(node_files, dict))
2570       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2571               "Node did not return file checksum data")
2572       if test:
2573         ignore_nodes.add(node.name)
2574         continue
2575
2576       # Build per-checksum mapping from filename to nodes having it
2577       for (filename, checksum) in node_files.items():
2578         assert filename in nodefiles
2579         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2580
2581     for (filename, checksums) in fileinfo.items():
2582       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2583
2584       # Nodes having the file
2585       with_file = frozenset(node_name
2586                             for nodes in fileinfo[filename].values()
2587                             for node_name in nodes) - ignore_nodes
2588
2589       expected_nodes = nodefiles[filename] - ignore_nodes
2590
2591       # Nodes missing file
2592       missing_file = expected_nodes - with_file
2593
2594       if filename in files_opt:
2595         # All or no nodes
2596         errorif(missing_file and missing_file != expected_nodes,
2597                 constants.CV_ECLUSTERFILECHECK, None,
2598                 "File %s is optional, but it must exist on all or no"
2599                 " nodes (not found on %s)",
2600                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2601       else:
2602         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2603                 "File %s is missing from node(s) %s", filename,
2604                 utils.CommaJoin(utils.NiceSort(missing_file)))
2605
2606         # Warn if a node has a file it shouldn't
2607         unexpected = with_file - expected_nodes
2608         errorif(unexpected,
2609                 constants.CV_ECLUSTERFILECHECK, None,
2610                 "File %s should not exist on node(s) %s",
2611                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2612
2613       # See if there are multiple versions of the file
2614       test = len(checksums) > 1
2615       if test:
2616         variants = ["variant %s on %s" %
2617                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2618                     for (idx, (checksum, nodes)) in
2619                       enumerate(sorted(checksums.items()))]
2620       else:
2621         variants = []
2622
2623       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2624               "File %s found with %s different checksums (%s)",
2625               filename, len(checksums), "; ".join(variants))
2626
2627   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2628                       drbd_map):
2629     """Verifies and the node DRBD status.
2630
2631     @type ninfo: L{objects.Node}
2632     @param ninfo: the node to check
2633     @param nresult: the remote results for the node
2634     @param instanceinfo: the dict of instances
2635     @param drbd_helper: the configured DRBD usermode helper
2636     @param drbd_map: the DRBD map as returned by
2637         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2638
2639     """
2640     node = ninfo.name
2641     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2642
2643     if drbd_helper:
2644       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2645       test = (helper_result == None)
2646       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2647                "no drbd usermode helper returned")
2648       if helper_result:
2649         status, payload = helper_result
2650         test = not status
2651         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2652                  "drbd usermode helper check unsuccessful: %s", payload)
2653         test = status and (payload != drbd_helper)
2654         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2655                  "wrong drbd usermode helper: %s", payload)
2656
2657     # compute the DRBD minors
2658     node_drbd = {}
2659     for minor, instance in drbd_map[node].items():
2660       test = instance not in instanceinfo
2661       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2662                "ghost instance '%s' in temporary DRBD map", instance)
2663         # ghost instance should not be running, but otherwise we
2664         # don't give double warnings (both ghost instance and
2665         # unallocated minor in use)
2666       if test:
2667         node_drbd[minor] = (instance, False)
2668       else:
2669         instance = instanceinfo[instance]
2670         node_drbd[minor] = (instance.name,
2671                             instance.admin_state == constants.ADMINST_UP)
2672
2673     # and now check them
2674     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2675     test = not isinstance(used_minors, (tuple, list))
2676     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2677              "cannot parse drbd status file: %s", str(used_minors))
2678     if test:
2679       # we cannot check drbd status
2680       return
2681
2682     for minor, (iname, must_exist) in node_drbd.items():
2683       test = minor not in used_minors and must_exist
2684       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2685                "drbd minor %d of instance %s is not active", minor, iname)
2686     for minor in used_minors:
2687       test = minor not in node_drbd
2688       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2689                "unallocated drbd minor %d is in use", minor)
2690
2691   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2692     """Builds the node OS structures.
2693
2694     @type ninfo: L{objects.Node}
2695     @param ninfo: the node to check
2696     @param nresult: the remote results for the node
2697     @param nimg: the node image object
2698
2699     """
2700     node = ninfo.name
2701     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2702
2703     remote_os = nresult.get(constants.NV_OSLIST, None)
2704     test = (not isinstance(remote_os, list) or
2705             not compat.all(isinstance(v, list) and len(v) == 7
2706                            for v in remote_os))
2707
2708     _ErrorIf(test, constants.CV_ENODEOS, node,
2709              "node hasn't returned valid OS data")
2710
2711     nimg.os_fail = test
2712
2713     if test:
2714       return
2715
2716     os_dict = {}
2717
2718     for (name, os_path, status, diagnose,
2719          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2720
2721       if name not in os_dict:
2722         os_dict[name] = []
2723
2724       # parameters is a list of lists instead of list of tuples due to
2725       # JSON lacking a real tuple type, fix it:
2726       parameters = [tuple(v) for v in parameters]
2727       os_dict[name].append((os_path, status, diagnose,
2728                             set(variants), set(parameters), set(api_ver)))
2729
2730     nimg.oslist = os_dict
2731
2732   def _VerifyNodeOS(self, ninfo, nimg, base):
2733     """Verifies the node OS list.
2734
2735     @type ninfo: L{objects.Node}
2736     @param ninfo: the node to check
2737     @param nimg: the node image object
2738     @param base: the 'template' node we match against (e.g. from the master)
2739
2740     """
2741     node = ninfo.name
2742     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2743
2744     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2745
2746     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2747     for os_name, os_data in nimg.oslist.items():
2748       assert os_data, "Empty OS status for OS %s?!" % os_name
2749       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2750       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2751                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2752       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2753                "OS '%s' has multiple entries (first one shadows the rest): %s",
2754                os_name, utils.CommaJoin([v[0] for v in os_data]))
2755       # comparisons with the 'base' image
2756       test = os_name not in base.oslist
2757       _ErrorIf(test, constants.CV_ENODEOS, node,
2758                "Extra OS %s not present on reference node (%s)",
2759                os_name, base.name)
2760       if test:
2761         continue
2762       assert base.oslist[os_name], "Base node has empty OS status?"
2763       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2764       if not b_status:
2765         # base OS is invalid, skipping
2766         continue
2767       for kind, a, b in [("API version", f_api, b_api),
2768                          ("variants list", f_var, b_var),
2769                          ("parameters", beautify_params(f_param),
2770                           beautify_params(b_param))]:
2771         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2772                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2773                  kind, os_name, base.name,
2774                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2775
2776     # check any missing OSes
2777     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2778     _ErrorIf(missing, constants.CV_ENODEOS, node,
2779              "OSes present on reference node %s but missing on this node: %s",
2780              base.name, utils.CommaJoin(missing))
2781
2782   def _VerifyOob(self, ninfo, nresult):
2783     """Verifies out of band functionality of a node.
2784
2785     @type ninfo: L{objects.Node}
2786     @param ninfo: the node to check
2787     @param nresult: the remote results for the node
2788
2789     """
2790     node = ninfo.name
2791     # We just have to verify the paths on master and/or master candidates
2792     # as the oob helper is invoked on the master
2793     if ((ninfo.master_candidate or ninfo.master_capable) and
2794         constants.NV_OOB_PATHS in nresult):
2795       for path_result in nresult[constants.NV_OOB_PATHS]:
2796         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2797
2798   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2799     """Verifies and updates the node volume data.
2800
2801     This function will update a L{NodeImage}'s internal structures
2802     with data from the remote call.
2803
2804     @type ninfo: L{objects.Node}
2805     @param ninfo: the node to check
2806     @param nresult: the remote results for the node
2807     @param nimg: the node image object
2808     @param vg_name: the configured VG name
2809
2810     """
2811     node = ninfo.name
2812     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2813
2814     nimg.lvm_fail = True
2815     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2816     if vg_name is None:
2817       pass
2818     elif isinstance(lvdata, basestring):
2819       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2820                utils.SafeEncode(lvdata))
2821     elif not isinstance(lvdata, dict):
2822       _ErrorIf(True, constants.CV_ENODELVM, node,
2823                "rpc call to node failed (lvlist)")
2824     else:
2825       nimg.volumes = lvdata
2826       nimg.lvm_fail = False
2827
2828   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2829     """Verifies and updates the node instance list.
2830
2831     If the listing was successful, then updates this node's instance
2832     list. Otherwise, it marks the RPC call as failed for the instance
2833     list key.
2834
2835     @type ninfo: L{objects.Node}
2836     @param ninfo: the node to check
2837     @param nresult: the remote results for the node
2838     @param nimg: the node image object
2839
2840     """
2841     idata = nresult.get(constants.NV_INSTANCELIST, None)
2842     test = not isinstance(idata, list)
2843     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2844                   "rpc call to node failed (instancelist): %s",
2845                   utils.SafeEncode(str(idata)))
2846     if test:
2847       nimg.hyp_fail = True
2848     else:
2849       nimg.instances = idata
2850
2851   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2852     """Verifies and computes a node information map
2853
2854     @type ninfo: L{objects.Node}
2855     @param ninfo: the node to check
2856     @param nresult: the remote results for the node
2857     @param nimg: the node image object
2858     @param vg_name: the configured VG name
2859
2860     """
2861     node = ninfo.name
2862     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2863
2864     # try to read free memory (from the hypervisor)
2865     hv_info = nresult.get(constants.NV_HVINFO, None)
2866     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2867     _ErrorIf(test, constants.CV_ENODEHV, node,
2868              "rpc call to node failed (hvinfo)")
2869     if not test:
2870       try:
2871         nimg.mfree = int(hv_info["memory_free"])
2872       except (ValueError, TypeError):
2873         _ErrorIf(True, constants.CV_ENODERPC, node,
2874                  "node returned invalid nodeinfo, check hypervisor")
2875
2876     # FIXME: devise a free space model for file based instances as well
2877     if vg_name is not None:
2878       test = (constants.NV_VGLIST not in nresult or
2879               vg_name not in nresult[constants.NV_VGLIST])
2880       _ErrorIf(test, constants.CV_ENODELVM, node,
2881                "node didn't return data for the volume group '%s'"
2882                " - it is either missing or broken", vg_name)
2883       if not test:
2884         try:
2885           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2886         except (ValueError, TypeError):
2887           _ErrorIf(True, constants.CV_ENODERPC, node,
2888                    "node returned invalid LVM info, check LVM status")
2889
2890   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2891     """Gets per-disk status information for all instances.
2892
2893     @type nodelist: list of strings
2894     @param nodelist: Node names
2895     @type node_image: dict of (name, L{objects.Node})
2896     @param node_image: Node objects
2897     @type instanceinfo: dict of (name, L{objects.Instance})
2898     @param instanceinfo: Instance objects
2899     @rtype: {instance: {node: [(succes, payload)]}}
2900     @return: a dictionary of per-instance dictionaries with nodes as
2901         keys and disk information as values; the disk information is a
2902         list of tuples (success, payload)
2903
2904     """
2905     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2906
2907     node_disks = {}
2908     node_disks_devonly = {}
2909     diskless_instances = set()
2910     diskless = constants.DT_DISKLESS
2911
2912     for nname in nodelist:
2913       node_instances = list(itertools.chain(node_image[nname].pinst,
2914                                             node_image[nname].sinst))
2915       diskless_instances.update(inst for inst in node_instances
2916                                 if instanceinfo[inst].disk_template == diskless)
2917       disks = [(inst, disk)
2918                for inst in node_instances
2919                for disk in instanceinfo[inst].disks]
2920
2921       if not disks:
2922         # No need to collect data
2923         continue
2924
2925       node_disks[nname] = disks
2926
2927       # _AnnotateDiskParams makes already copies of the disks
2928       devonly = []
2929       for (inst, dev) in disks:
2930         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2931         self.cfg.SetDiskID(anno_disk, nname)
2932         devonly.append(anno_disk)
2933
2934       node_disks_devonly[nname] = devonly
2935
2936     assert len(node_disks) == len(node_disks_devonly)
2937
2938     # Collect data from all nodes with disks
2939     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2940                                                           node_disks_devonly)
2941
2942     assert len(result) == len(node_disks)
2943
2944     instdisk = {}
2945
2946     for (nname, nres) in result.items():
2947       disks = node_disks[nname]
2948
2949       if nres.offline:
2950         # No data from this node
2951         data = len(disks) * [(False, "node offline")]
2952       else:
2953         msg = nres.fail_msg
2954         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2955                  "while getting disk information: %s", msg)
2956         if msg:
2957           # No data from this node
2958           data = len(disks) * [(False, msg)]
2959         else:
2960           data = []
2961           for idx, i in enumerate(nres.payload):
2962             if isinstance(i, (tuple, list)) and len(i) == 2:
2963               data.append(i)
2964             else:
2965               logging.warning("Invalid result from node %s, entry %d: %s",
2966                               nname, idx, i)
2967               data.append((False, "Invalid result from the remote node"))
2968
2969       for ((inst, _), status) in zip(disks, data):
2970         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2971
2972     # Add empty entries for diskless instances.
2973     for inst in diskless_instances:
2974       assert inst not in instdisk
2975       instdisk[inst] = {}
2976
2977     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2978                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2979                       compat.all(isinstance(s, (tuple, list)) and
2980                                  len(s) == 2 for s in statuses)
2981                       for inst, nnames in instdisk.items()
2982                       for nname, statuses in nnames.items())
2983     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2984
2985     return instdisk
2986
2987   @staticmethod
2988   def _SshNodeSelector(group_uuid, all_nodes):
2989     """Create endless iterators for all potential SSH check hosts.
2990
2991     """
2992     nodes = [node for node in all_nodes
2993              if (node.group != group_uuid and
2994                  not node.offline)]
2995     keyfunc = operator.attrgetter("group")
2996
2997     return map(itertools.cycle,
2998                [sorted(map(operator.attrgetter("name"), names))
2999                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3000                                                   keyfunc)])
3001
3002   @classmethod
3003   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3004     """Choose which nodes should talk to which other nodes.
3005
3006     We will make nodes contact all nodes in their group, and one node from
3007     every other group.
3008
3009     @warning: This algorithm has a known issue if one node group is much
3010       smaller than others (e.g. just one node). In such a case all other
3011       nodes will talk to the single node.
3012
3013     """
3014     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3015     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3016
3017     return (online_nodes,
3018             dict((name, sorted([i.next() for i in sel]))
3019                  for name in online_nodes))
3020
3021   def BuildHooksEnv(self):
3022     """Build hooks env.
3023
3024     Cluster-Verify hooks just ran in the post phase and their failure makes
3025     the output be logged in the verify output and the verification to fail.
3026
3027     """
3028     env = {
3029       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3030       }
3031
3032     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3033                for node in self.my_node_info.values())
3034
3035     return env
3036
3037   def BuildHooksNodes(self):
3038     """Build hooks nodes.
3039
3040     """
3041     return ([], self.my_node_names)
3042
3043   def Exec(self, feedback_fn):
3044     """Verify integrity of the node group, performing various test on nodes.
3045
3046     """
3047     # This method has too many local variables. pylint: disable=R0914
3048     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3049
3050     if not self.my_node_names:
3051       # empty node group
3052       feedback_fn("* Empty node group, skipping verification")
3053       return True
3054
3055     self.bad = False
3056     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3057     verbose = self.op.verbose
3058     self._feedback_fn = feedback_fn
3059
3060     vg_name = self.cfg.GetVGName()
3061     drbd_helper = self.cfg.GetDRBDHelper()
3062     cluster = self.cfg.GetClusterInfo()
3063     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3064     hypervisors = cluster.enabled_hypervisors
3065     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3066
3067     i_non_redundant = [] # Non redundant instances
3068     i_non_a_balanced = [] # Non auto-balanced instances
3069     i_offline = 0 # Count of offline instances
3070     n_offline = 0 # Count of offline nodes
3071     n_drained = 0 # Count of nodes being drained
3072     node_vol_should = {}
3073
3074     # FIXME: verify OS list
3075
3076     # File verification
3077     filemap = _ComputeAncillaryFiles(cluster, False)
3078
3079     # do local checksums
3080     master_node = self.master_node = self.cfg.GetMasterNode()
3081     master_ip = self.cfg.GetMasterIP()
3082
3083     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3084
3085     user_scripts = []
3086     if self.cfg.GetUseExternalMipScript():
3087       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3088
3089     node_verify_param = {
3090       constants.NV_FILELIST:
3091         utils.UniqueSequence(filename
3092                              for files in filemap
3093                              for filename in files),
3094       constants.NV_NODELIST:
3095         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3096                                   self.all_node_info.values()),
3097       constants.NV_HYPERVISOR: hypervisors,
3098       constants.NV_HVPARAMS:
3099         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3100       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3101                                  for node in node_data_list
3102                                  if not node.offline],
3103       constants.NV_INSTANCELIST: hypervisors,
3104       constants.NV_VERSION: None,
3105       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3106       constants.NV_NODESETUP: None,
3107       constants.NV_TIME: None,
3108       constants.NV_MASTERIP: (master_node, master_ip),
3109       constants.NV_OSLIST: None,
3110       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3111       constants.NV_USERSCRIPTS: user_scripts,
3112       }
3113
3114     if vg_name is not None:
3115       node_verify_param[constants.NV_VGLIST] = None
3116       node_verify_param[constants.NV_LVLIST] = vg_name
3117       node_verify_param[constants.NV_PVLIST] = [vg_name]
3118       node_verify_param[constants.NV_DRBDLIST] = None
3119
3120     if drbd_helper:
3121       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3122
3123     # bridge checks
3124     # FIXME: this needs to be changed per node-group, not cluster-wide
3125     bridges = set()
3126     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3127     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3128       bridges.add(default_nicpp[constants.NIC_LINK])
3129     for instance in self.my_inst_info.values():
3130       for nic in instance.nics:
3131         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3132         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3133           bridges.add(full_nic[constants.NIC_LINK])
3134
3135     if bridges:
3136       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3137
3138     # Build our expected cluster state
3139     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3140                                                  name=node.name,
3141                                                  vm_capable=node.vm_capable))
3142                       for node in node_data_list)
3143
3144     # Gather OOB paths
3145     oob_paths = []
3146     for node in self.all_node_info.values():
3147       path = _SupportsOob(self.cfg, node)
3148       if path and path not in oob_paths:
3149         oob_paths.append(path)
3150
3151     if oob_paths:
3152       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3153
3154     for instance in self.my_inst_names:
3155       inst_config = self.my_inst_info[instance]
3156       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3157         i_offline += 1
3158
3159       for nname in inst_config.all_nodes:
3160         if nname not in node_image:
3161           gnode = self.NodeImage(name=nname)
3162           gnode.ghost = (nname not in self.all_node_info)
3163           node_image[nname] = gnode
3164
3165       inst_config.MapLVsByNode(node_vol_should)
3166
3167       pnode = inst_config.primary_node
3168       node_image[pnode].pinst.append(instance)
3169
3170       for snode in inst_config.secondary_nodes:
3171         nimg = node_image[snode]
3172         nimg.sinst.append(instance)
3173         if pnode not in nimg.sbp:
3174           nimg.sbp[pnode] = []
3175         nimg.sbp[pnode].append(instance)
3176
3177     # At this point, we have the in-memory data structures complete,
3178     # except for the runtime information, which we'll gather next
3179
3180     # Due to the way our RPC system works, exact response times cannot be
3181     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3182     # time before and after executing the request, we can at least have a time
3183     # window.
3184     nvinfo_starttime = time.time()
3185     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3186                                            node_verify_param,
3187                                            self.cfg.GetClusterName())
3188     nvinfo_endtime = time.time()
3189
3190     if self.extra_lv_nodes and vg_name is not None:
3191       extra_lv_nvinfo = \
3192           self.rpc.call_node_verify(self.extra_lv_nodes,
3193                                     {constants.NV_LVLIST: vg_name},
3194                                     self.cfg.GetClusterName())
3195     else:
3196       extra_lv_nvinfo = {}
3197
3198     all_drbd_map = self.cfg.ComputeDRBDMap()
3199
3200     feedback_fn("* Gathering disk information (%s nodes)" %
3201                 len(self.my_node_names))
3202     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3203                                      self.my_inst_info)
3204
3205     feedback_fn("* Verifying configuration file consistency")
3206
3207     # If not all nodes are being checked, we need to make sure the master node
3208     # and a non-checked vm_capable node are in the list.
3209     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3210     if absent_nodes:
3211       vf_nvinfo = all_nvinfo.copy()
3212       vf_node_info = list(self.my_node_info.values())
3213       additional_nodes = []
3214       if master_node not in self.my_node_info:
3215         additional_nodes.append(master_node)
3216         vf_node_info.append(self.all_node_info[master_node])
3217       # Add the first vm_capable node we find which is not included
3218       for node in absent_nodes:
3219         nodeinfo = self.all_node_info[node]
3220         if nodeinfo.vm_capable and not nodeinfo.offline:
3221           additional_nodes.append(node)
3222           vf_node_info.append(self.all_node_info[node])
3223           break
3224       key = constants.NV_FILELIST
3225       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3226                                                  {key: node_verify_param[key]},
3227                                                  self.cfg.GetClusterName()))
3228     else:
3229       vf_nvinfo = all_nvinfo
3230       vf_node_info = self.my_node_info.values()
3231
3232     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3233
3234     feedback_fn("* Verifying node status")
3235
3236     refos_img = None
3237
3238     for node_i in node_data_list:
3239       node = node_i.name
3240       nimg = node_image[node]
3241
3242       if node_i.offline:
3243         if verbose:
3244           feedback_fn("* Skipping offline node %s" % (node,))
3245         n_offline += 1
3246         continue
3247
3248       if node == master_node:
3249         ntype = "master"
3250       elif node_i.master_candidate:
3251         ntype = "master candidate"
3252       elif node_i.drained:
3253         ntype = "drained"
3254         n_drained += 1
3255       else:
3256         ntype = "regular"
3257       if verbose:
3258         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3259
3260       msg = all_nvinfo[node].fail_msg
3261       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3262                msg)
3263       if msg:
3264         nimg.rpc_fail = True
3265         continue
3266
3267       nresult = all_nvinfo[node].payload
3268
3269       nimg.call_ok = self._VerifyNode(node_i, nresult)
3270       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3271       self._VerifyNodeNetwork(node_i, nresult)
3272       self._VerifyNodeUserScripts(node_i, nresult)
3273       self._VerifyOob(node_i, nresult)
3274
3275       if nimg.vm_capable:
3276         self._VerifyNodeLVM(node_i, nresult, vg_name)
3277         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3278                              all_drbd_map)
3279
3280         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3281         self._UpdateNodeInstances(node_i, nresult, nimg)
3282         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3283         self._UpdateNodeOS(node_i, nresult, nimg)
3284
3285         if not nimg.os_fail:
3286           if refos_img is None:
3287             refos_img = nimg
3288           self._VerifyNodeOS(node_i, nimg, refos_img)
3289         self._VerifyNodeBridges(node_i, nresult, bridges)
3290
3291         # Check whether all running instancies are primary for the node. (This
3292         # can no longer be done from _VerifyInstance below, since some of the
3293         # wrong instances could be from other node groups.)
3294         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3295
3296         for inst in non_primary_inst:
3297           test = inst in self.all_inst_info
3298           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3299                    "instance should not run on node %s", node_i.name)
3300           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3301                    "node is running unknown instance %s", inst)
3302
3303     for node, result in extra_lv_nvinfo.items():
3304       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3305                               node_image[node], vg_name)
3306
3307     feedback_fn("* Verifying instance status")
3308     for instance in self.my_inst_names:
3309       if verbose:
3310         feedback_fn("* Verifying instance %s" % instance)
3311       inst_config = self.my_inst_info[instance]
3312       self._VerifyInstance(instance, inst_config, node_image,
3313                            instdisk[instance])
3314       inst_nodes_offline = []
3315
3316       pnode = inst_config.primary_node
3317       pnode_img = node_image[pnode]
3318       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3319                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3320                " primary node failed", instance)
3321
3322       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3323                pnode_img.offline,
3324                constants.CV_EINSTANCEBADNODE, instance,
3325                "instance is marked as running and lives on offline node %s",
3326                inst_config.primary_node)
3327
3328       # If the instance is non-redundant we cannot survive losing its primary
3329       # node, so we are not N+1 compliant. On the other hand we have no disk
3330       # templates with more than one secondary so that situation is not well
3331       # supported either.
3332       # FIXME: does not support file-backed instances
3333       if not inst_config.secondary_nodes:
3334         i_non_redundant.append(instance)
3335
3336       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3337                constants.CV_EINSTANCELAYOUT,
3338                instance, "instance has multiple secondary nodes: %s",
3339                utils.CommaJoin(inst_config.secondary_nodes),
3340                code=self.ETYPE_WARNING)
3341
3342       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3343         pnode = inst_config.primary_node
3344         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3345         instance_groups = {}
3346
3347         for node in instance_nodes:
3348           instance_groups.setdefault(self.all_node_info[node].group,
3349                                      []).append(node)
3350
3351         pretty_list = [
3352           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3353           # Sort so that we always list the primary node first.
3354           for group, nodes in sorted(instance_groups.items(),
3355                                      key=lambda (_, nodes): pnode in nodes,
3356                                      reverse=True)]
3357
3358         self._ErrorIf(len(instance_groups) > 1,
3359                       constants.CV_EINSTANCESPLITGROUPS,
3360                       instance, "instance has primary and secondary nodes in"
3361                       " different groups: %s", utils.CommaJoin(pretty_list),
3362                       code=self.ETYPE_WARNING)
3363
3364       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3365         i_non_a_balanced.append(instance)
3366
3367       for snode in inst_config.secondary_nodes:
3368         s_img = node_image[snode]
3369         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3370                  snode, "instance %s, connection to secondary node failed",
3371                  instance)
3372
3373         if s_img.offline:
3374           inst_nodes_offline.append(snode)
3375
3376       # warn that the instance lives on offline nodes
3377       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3378                "instance has offline secondary node(s) %s",
3379                utils.CommaJoin(inst_nodes_offline))
3380       # ... or ghost/non-vm_capable nodes
3381       for node in inst_config.all_nodes:
3382         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3383                  instance, "instance lives on ghost node %s", node)
3384         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3385                  instance, "instance lives on non-vm_capable node %s", node)
3386
3387     feedback_fn("* Verifying orphan volumes")
3388     reserved = utils.FieldSet(*cluster.reserved_lvs)
3389
3390     # We will get spurious "unknown volume" warnings if any node of this group
3391     # is secondary for an instance whose primary is in another group. To avoid
3392     # them, we find these instances and add their volumes to node_vol_should.
3393     for inst in self.all_inst_info.values():
3394       for secondary in inst.secondary_nodes:
3395         if (secondary in self.my_node_info
3396             and inst.name not in self.my_inst_info):
3397           inst.MapLVsByNode(node_vol_should)
3398           break
3399
3400     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3401
3402     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3403       feedback_fn("* Verifying N+1 Memory redundancy")
3404       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3405
3406     feedback_fn("* Other Notes")
3407     if i_non_redundant:
3408       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3409                   % len(i_non_redundant))
3410
3411     if i_non_a_balanced:
3412       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3413                   % len(i_non_a_balanced))
3414
3415     if i_offline:
3416       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3417
3418     if n_offline:
3419       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3420
3421     if n_drained:
3422       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3423
3424     return not self.bad
3425
3426   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3427     """Analyze the post-hooks' result
3428
3429     This method analyses the hook result, handles it, and sends some
3430     nicely-formatted feedback back to the user.
3431
3432     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3433         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3434     @param hooks_results: the results of the multi-node hooks rpc call
3435     @param feedback_fn: function used send feedback back to the caller
3436     @param lu_result: previous Exec result
3437     @return: the new Exec result, based on the previous result
3438         and hook results
3439
3440     """
3441     # We only really run POST phase hooks, only for non-empty groups,
3442     # and are only interested in their results
3443     if not self.my_node_names:
3444       # empty node group
3445       pass
3446     elif phase == constants.HOOKS_PHASE_POST:
3447       # Used to change hooks' output to proper indentation
3448       feedback_fn("* Hooks Results")
3449       assert hooks_results, "invalid result from hooks"
3450
3451       for node_name in hooks_results:
3452         res = hooks_results[node_name]
3453         msg = res.fail_msg
3454         test = msg and not res.offline
3455         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3456                       "Communication failure in hooks execution: %s", msg)
3457         if res.offline or msg:
3458           # No need to investigate payload if node is offline or gave
3459           # an error.
3460           continue
3461         for script, hkr, output in res.payload:
3462           test = hkr == constants.HKR_FAIL
3463           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3464                         "Script %s failed, output:", script)
3465           if test:
3466             output = self._HOOKS_INDENT_RE.sub("      ", output)
3467             feedback_fn("%s" % output)
3468             lu_result = False
3469
3470     return lu_result
3471
3472
3473 class LUClusterVerifyDisks(NoHooksLU):
3474   """Verifies the cluster disks status.
3475
3476   """
3477   REQ_BGL = False
3478
3479   def ExpandNames(self):
3480     self.share_locks = _ShareAll()
3481     self.needed_locks = {
3482       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3483       }
3484
3485   def Exec(self, feedback_fn):
3486     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3487
3488     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3489     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3490                            for group in group_names])
3491
3492
3493 class LUGroupVerifyDisks(NoHooksLU):
3494   """Verifies the status of all disks in a node group.
3495
3496   """
3497   REQ_BGL = False
3498
3499   def ExpandNames(self):
3500     # Raises errors.OpPrereqError on its own if group can't be found
3501     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3502
3503     self.share_locks = _ShareAll()
3504     self.needed_locks = {
3505       locking.LEVEL_INSTANCE: [],
3506       locking.LEVEL_NODEGROUP: [],
3507       locking.LEVEL_NODE: [],
3508       }
3509
3510   def DeclareLocks(self, level):
3511     if level == locking.LEVEL_INSTANCE:
3512       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3513
3514       # Lock instances optimistically, needs verification once node and group
3515       # locks have been acquired
3516       self.needed_locks[locking.LEVEL_INSTANCE] = \
3517         self.cfg.GetNodeGroupInstances(self.group_uuid)
3518
3519     elif level == locking.LEVEL_NODEGROUP:
3520       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3521
3522       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3523         set([self.group_uuid] +
3524             # Lock all groups used by instances optimistically; this requires
3525             # going via the node before it's locked, requiring verification
3526             # later on
3527             [group_uuid
3528              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3529              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3530
3531     elif level == locking.LEVEL_NODE:
3532       # This will only lock the nodes in the group to be verified which contain
3533       # actual instances
3534       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3535       self._LockInstancesNodes()
3536
3537       # Lock all nodes in group to be verified
3538       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3539       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3540       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3541
3542   def CheckPrereq(self):
3543     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3544     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3545     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3546
3547     assert self.group_uuid in owned_groups
3548
3549     # Check if locked instances are still correct
3550     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3551
3552     # Get instance information
3553     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3554
3555     # Check if node groups for locked instances are still correct
3556     _CheckInstancesNodeGroups(self.cfg, self.instances,
3557                               owned_groups, owned_nodes, self.group_uuid)
3558
3559   def Exec(self, feedback_fn):
3560     """Verify integrity of cluster disks.
3561
3562     @rtype: tuple of three items
3563     @return: a tuple of (dict of node-to-node_error, list of instances
3564         which need activate-disks, dict of instance: (node, volume) for
3565         missing volumes
3566
3567     """
3568     res_nodes = {}
3569     res_instances = set()
3570     res_missing = {}
3571
3572     nv_dict = _MapInstanceDisksToNodes([inst
3573             for inst in self.instances.values()
3574             if inst.admin_state == constants.ADMINST_UP])
3575
3576     if nv_dict:
3577       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3578                              set(self.cfg.GetVmCapableNodeList()))
3579
3580       node_lvs = self.rpc.call_lv_list(nodes, [])
3581
3582       for (node, node_res) in node_lvs.items():
3583         if node_res.offline:
3584           continue
3585
3586         msg = node_res.fail_msg
3587         if msg:
3588           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3589           res_nodes[node] = msg
3590           continue
3591
3592         for lv_name, (_, _, lv_online) in node_res.payload.items():
3593           inst = nv_dict.pop((node, lv_name), None)
3594           if not (lv_online or inst is None):
3595             res_instances.add(inst)
3596
3597       # any leftover items in nv_dict are missing LVs, let's arrange the data
3598       # better
3599       for key, inst in nv_dict.iteritems():
3600         res_missing.setdefault(inst, []).append(list(key))
3601
3602     return (res_nodes, list(res_instances), res_missing)
3603
3604
3605 class LUClusterRepairDiskSizes(NoHooksLU):
3606   """Verifies the cluster disks sizes.
3607
3608   """
3609   REQ_BGL = False
3610
3611   def ExpandNames(self):
3612     if self.op.instances:
3613       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3614       self.needed_locks = {
3615         locking.LEVEL_NODE_RES: [],
3616         locking.LEVEL_INSTANCE: self.wanted_names,
3617         }
3618       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3619     else:
3620       self.wanted_names = None
3621       self.needed_locks = {
3622         locking.LEVEL_NODE_RES: locking.ALL_SET,
3623         locking.LEVEL_INSTANCE: locking.ALL_SET,
3624         }
3625     self.share_locks = {
3626       locking.LEVEL_NODE_RES: 1,
3627       locking.LEVEL_INSTANCE: 0,
3628       }
3629
3630   def DeclareLocks(self, level):
3631     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3632       self._LockInstancesNodes(primary_only=True, level=level)
3633
3634   def CheckPrereq(self):
3635     """Check prerequisites.
3636
3637     This only checks the optional instance list against the existing names.
3638
3639     """
3640     if self.wanted_names is None:
3641       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3642
3643     self.wanted_instances = \
3644         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3645
3646   def _EnsureChildSizes(self, disk):
3647     """Ensure children of the disk have the needed disk size.
3648
3649     This is valid mainly for DRBD8 and fixes an issue where the
3650     children have smaller disk size.
3651
3652     @param disk: an L{ganeti.objects.Disk} object
3653
3654     """
3655     if disk.dev_type == constants.LD_DRBD8:
3656       assert disk.children, "Empty children for DRBD8?"
3657       fchild = disk.children[0]
3658       mismatch = fchild.size < disk.size
3659       if mismatch:
3660         self.LogInfo("Child disk has size %d, parent %d, fixing",
3661                      fchild.size, disk.size)
3662         fchild.size = disk.size
3663
3664       # and we recurse on this child only, not on the metadev
3665       return self._EnsureChildSizes(fchild) or mismatch
3666     else:
3667       return False
3668
3669   def Exec(self, feedback_fn):
3670     """Verify the size of cluster disks.
3671
3672     """
3673     # TODO: check child disks too
3674     # TODO: check differences in size between primary/secondary nodes
3675     per_node_disks = {}
3676     for instance in self.wanted_instances:
3677       pnode = instance.primary_node
3678       if pnode not in per_node_disks:
3679         per_node_disks[pnode] = []
3680       for idx, disk in enumerate(instance.disks):
3681         per_node_disks[pnode].append((instance, idx, disk))
3682
3683     assert not (frozenset(per_node_disks.keys()) -
3684                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3685       "Not owning correct locks"
3686     assert not self.owned_locks(locking.LEVEL_NODE)
3687
3688     changed = []
3689     for node, dskl in per_node_disks.items():
3690       newl = [v[2].Copy() for v in dskl]
3691       for dsk in newl:
3692         self.cfg.SetDiskID(dsk, node)
3693       result = self.rpc.call_blockdev_getsize(node, newl)
3694       if result.fail_msg:
3695         self.LogWarning("Failure in blockdev_getsize call to node"
3696                         " %s, ignoring", node)
3697         continue
3698       if len(result.payload) != len(dskl):
3699         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3700                         " result.payload=%s", node, len(dskl), result.payload)
3701         self.LogWarning("Invalid result from node %s, ignoring node results",
3702                         node)
3703         continue
3704       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3705         if size is None:
3706           self.LogWarning("Disk %d of instance %s did not return size"
3707                           " information, ignoring", idx, instance.name)
3708           continue
3709         if not isinstance(size, (int, long)):
3710           self.LogWarning("Disk %d of instance %s did not return valid"
3711                           " size information, ignoring", idx, instance.name)
3712           continue
3713         size = size >> 20
3714         if size != disk.size:
3715           self.LogInfo("Disk %d of instance %s has mismatched size,"
3716                        " correcting: recorded %d, actual %d", idx,
3717                        instance.name, disk.size, size)
3718           disk.size = size
3719           self.cfg.Update(instance, feedback_fn)
3720           changed.append((instance.name, idx, size))
3721         if self._EnsureChildSizes(disk):
3722           self.cfg.Update(instance, feedback_fn)
3723           changed.append((instance.name, idx, disk.size))
3724     return changed
3725
3726
3727 class LUClusterRename(LogicalUnit):
3728   """Rename the cluster.
3729
3730   """
3731   HPATH = "cluster-rename"
3732   HTYPE = constants.HTYPE_CLUSTER
3733
3734   def BuildHooksEnv(self):
3735     """Build hooks env.
3736
3737     """
3738     return {
3739       "OP_TARGET": self.cfg.GetClusterName(),
3740       "NEW_NAME": self.op.name,
3741       }
3742
3743   def BuildHooksNodes(self):
3744     """Build hooks nodes.
3745
3746     """
3747     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3748
3749   def CheckPrereq(self):
3750     """Verify that the passed name is a valid one.
3751
3752     """
3753     hostname = netutils.GetHostname(name=self.op.name,
3754                                     family=self.cfg.GetPrimaryIPFamily())
3755
3756     new_name = hostname.name
3757     self.ip = new_ip = hostname.ip
3758     old_name = self.cfg.GetClusterName()
3759     old_ip = self.cfg.GetMasterIP()
3760     if new_name == old_name and new_ip == old_ip:
3761       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3762                                  " cluster has changed",
3763                                  errors.ECODE_INVAL)
3764     if new_ip != old_ip:
3765       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3766         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3767                                    " reachable on the network" %
3768                                    new_ip, errors.ECODE_NOTUNIQUE)
3769
3770     self.op.name = new_name
3771
3772   def Exec(self, feedback_fn):
3773     """Rename the cluster.
3774
3775     """
3776     clustername = self.op.name
3777     new_ip = self.ip
3778
3779     # shutdown the master IP
3780     master_params = self.cfg.GetMasterNetworkParameters()
3781     ems = self.cfg.GetUseExternalMipScript()
3782     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3783                                                      master_params, ems)
3784     result.Raise("Could not disable the master role")
3785
3786     try:
3787       cluster = self.cfg.GetClusterInfo()
3788       cluster.cluster_name = clustername
3789       cluster.master_ip = new_ip
3790       self.cfg.Update(cluster, feedback_fn)
3791
3792       # update the known hosts file
3793       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3794       node_list = self.cfg.GetOnlineNodeList()
3795       try:
3796         node_list.remove(master_params.name)
3797       except ValueError:
3798         pass
3799       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3800     finally:
3801       master_params.ip = new_ip
3802       result = self.rpc.call_node_activate_master_ip(master_params.name,
3803                                                      master_params, ems)
3804       msg = result.fail_msg
3805       if msg:
3806         self.LogWarning("Could not re-enable the master role on"
3807                         " the master, please restart manually: %s", msg)
3808
3809     return clustername
3810
3811
3812 def _ValidateNetmask(cfg, netmask):
3813   """Checks if a netmask is valid.
3814
3815   @type cfg: L{config.ConfigWriter}
3816   @param cfg: The cluster configuration
3817   @type netmask: int
3818   @param netmask: the netmask to be verified
3819   @raise errors.OpPrereqError: if the validation fails
3820
3821   """
3822   ip_family = cfg.GetPrimaryIPFamily()
3823   try:
3824     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3825   except errors.ProgrammerError:
3826     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3827                                ip_family)
3828   if not ipcls.ValidateNetmask(netmask):
3829     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3830                                 (netmask))
3831
3832
3833 class LUClusterSetParams(LogicalUnit):
3834   """Change the parameters of the cluster.
3835
3836   """
3837   HPATH = "cluster-modify"
3838   HTYPE = constants.HTYPE_CLUSTER
3839   REQ_BGL = False
3840
3841   def CheckArguments(self):
3842     """Check parameters
3843
3844     """
3845     if self.op.uid_pool:
3846       uidpool.CheckUidPool(self.op.uid_pool)
3847
3848     if self.op.add_uids:
3849       uidpool.CheckUidPool(self.op.add_uids)
3850
3851     if self.op.remove_uids:
3852       uidpool.CheckUidPool(self.op.remove_uids)
3853
3854     if self.op.master_netmask is not None:
3855       _ValidateNetmask(self.cfg, self.op.master_netmask)
3856
3857     if self.op.diskparams:
3858       for dt_params in self.op.diskparams.values():
3859         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3860       try:
3861         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3862       except errors.OpPrereqError, err:
3863         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3864                                    errors.ECODE_INVAL)
3865
3866   def ExpandNames(self):
3867     # FIXME: in the future maybe other cluster params won't require checking on
3868     # all nodes to be modified.
3869     self.needed_locks = {
3870       locking.LEVEL_NODE: locking.ALL_SET,
3871       locking.LEVEL_INSTANCE: locking.ALL_SET,
3872       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3873     }
3874     self.share_locks = {
3875         locking.LEVEL_NODE: 1,
3876         locking.LEVEL_INSTANCE: 1,
3877         locking.LEVEL_NODEGROUP: 1,
3878     }
3879
3880   def BuildHooksEnv(self):
3881     """Build hooks env.
3882
3883     """
3884     return {
3885       "OP_TARGET": self.cfg.GetClusterName(),
3886       "NEW_VG_NAME": self.op.vg_name,
3887       }
3888
3889   def BuildHooksNodes(self):
3890     """Build hooks nodes.
3891
3892     """
3893     mn = self.cfg.GetMasterNode()
3894     return ([mn], [mn])
3895
3896   def CheckPrereq(self):
3897     """Check prerequisites.
3898
3899     This checks whether the given params don't conflict and
3900     if the given volume group is valid.
3901
3902     """
3903     if self.op.vg_name is not None and not self.op.vg_name:
3904       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3905         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3906                                    " instances exist", errors.ECODE_INVAL)
3907
3908     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3909       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3910         raise errors.OpPrereqError("Cannot disable drbd helper while"
3911                                    " drbd-based instances exist",
3912                                    errors.ECODE_INVAL)
3913
3914     node_list = self.owned_locks(locking.LEVEL_NODE)
3915
3916     # if vg_name not None, checks given volume group on all nodes
3917     if self.op.vg_name:
3918       vglist = self.rpc.call_vg_list(node_list)
3919       for node in node_list:
3920         msg = vglist[node].fail_msg
3921         if msg:
3922           # ignoring down node
3923           self.LogWarning("Error while gathering data on node %s"
3924                           " (ignoring node): %s", node, msg)
3925           continue
3926         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3927                                               self.op.vg_name,
3928                                               constants.MIN_VG_SIZE)
3929         if vgstatus:
3930           raise errors.OpPrereqError("Error on node '%s': %s" %
3931                                      (node, vgstatus), errors.ECODE_ENVIRON)
3932
3933     if self.op.drbd_helper:
3934       # checks given drbd helper on all nodes
3935       helpers = self.rpc.call_drbd_helper(node_list)
3936       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3937         if ninfo.offline:
3938           self.LogInfo("Not checking drbd helper on offline node %s", node)
3939           continue
3940         msg = helpers[node].fail_msg
3941         if msg:
3942           raise errors.OpPrereqError("Error checking drbd helper on node"
3943                                      " '%s': %s" % (node, msg),
3944                                      errors.ECODE_ENVIRON)
3945         node_helper = helpers[node].payload
3946         if node_helper != self.op.drbd_helper:
3947           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3948                                      (node, node_helper), errors.ECODE_ENVIRON)
3949
3950     self.cluster = cluster = self.cfg.GetClusterInfo()
3951     # validate params changes
3952     if self.op.beparams:
3953       objects.UpgradeBeParams(self.op.beparams)
3954       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3955       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3956
3957     if self.op.ndparams:
3958       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3959       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3960
3961       # TODO: we need a more general way to handle resetting
3962       # cluster-level parameters to default values
3963       if self.new_ndparams["oob_program"] == "":
3964         self.new_ndparams["oob_program"] = \
3965             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3966
3967     if self.op.hv_state:
3968       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3969                                             self.cluster.hv_state_static)
3970       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3971                                for hv, values in new_hv_state.items())
3972
3973     if self.op.disk_state:
3974       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3975                                                 self.cluster.disk_state_static)
3976       self.new_disk_state = \
3977         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3978                             for name, values in svalues.items()))
3979              for storage, svalues in new_disk_state.items())
3980
3981     if self.op.ipolicy:
3982       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3983                                             group_policy=False)
3984
3985       all_instances = self.cfg.GetAllInstancesInfo().values()
3986       violations = set()
3987       for group in self.cfg.GetAllNodeGroupsInfo().values():
3988         instances = frozenset([inst for inst in all_instances
3989                                if compat.any(node in group.members
3990                                              for node in inst.all_nodes)])
3991         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3992         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3993                                                                    group),
3994                                             new_ipolicy, instances)
3995         if new:
3996           violations.update(new)
3997
3998       if violations:
3999         self.LogWarning("After the ipolicy change the following instances"
4000                         " violate them: %s",
4001                         utils.CommaJoin(utils.NiceSort(violations)))
4002
4003     if self.op.nicparams:
4004       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4005       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4006       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4007       nic_errors = []
4008
4009       # check all instances for consistency
4010       for instance in self.cfg.GetAllInstancesInfo().values():
4011         for nic_idx, nic in enumerate(instance.nics):
4012           params_copy = copy.deepcopy(nic.nicparams)
4013           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4014
4015           # check parameter syntax
4016           try:
4017             objects.NIC.CheckParameterSyntax(params_filled)
4018           except errors.ConfigurationError, err:
4019             nic_errors.append("Instance %s, nic/%d: %s" %
4020                               (instance.name, nic_idx, err))
4021
4022           # if we're moving instances to routed, check that they have an ip
4023           target_mode = params_filled[constants.NIC_MODE]
4024           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4025             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4026                               " address" % (instance.name, nic_idx))
4027       if nic_errors:
4028         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4029                                    "\n".join(nic_errors))
4030
4031     # hypervisor list/parameters
4032     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4033     if self.op.hvparams:
4034       for hv_name, hv_dict in self.op.hvparams.items():
4035         if hv_name not in self.new_hvparams:
4036           self.new_hvparams[hv_name] = hv_dict
4037         else:
4038           self.new_hvparams[hv_name].update(hv_dict)
4039
4040     # disk template parameters
4041     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4042     if self.op.diskparams:
4043       for dt_name, dt_params in self.op.diskparams.items():
4044         if dt_name not in self.op.diskparams:
4045           self.new_diskparams[dt_name] = dt_params
4046         else:
4047           self.new_diskparams[dt_name].update(dt_params)
4048
4049     # os hypervisor parameters
4050     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4051     if self.op.os_hvp:
4052       for os_name, hvs in self.op.os_hvp.items():
4053         if os_name not in self.new_os_hvp:
4054           self.new_os_hvp[os_name] = hvs
4055         else:
4056           for hv_name, hv_dict in hvs.items():
4057             if hv_name not in self.new_os_hvp[os_name]:
4058               self.new_os_hvp[os_name][hv_name] = hv_dict
4059             else:
4060               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4061
4062     # os parameters
4063     self.new_osp = objects.FillDict(cluster.osparams, {})
4064     if self.op.osparams:
4065       for os_name, osp in self.op.osparams.items():
4066         if os_name not in self.new_osp:
4067           self.new_osp[os_name] = {}
4068
4069         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4070                                                   use_none=True)
4071
4072         if not self.new_osp[os_name]:
4073           # we removed all parameters
4074           del self.new_osp[os_name]
4075         else:
4076           # check the parameter validity (remote check)
4077           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4078                          os_name, self.new_osp[os_name])
4079
4080     # changes to the hypervisor list
4081     if self.op.enabled_hypervisors is not None:
4082       self.hv_list = self.op.enabled_hypervisors
4083       for hv in self.hv_list:
4084         # if the hypervisor doesn't already exist in the cluster
4085         # hvparams, we initialize it to empty, and then (in both
4086         # cases) we make sure to fill the defaults, as we might not
4087         # have a complete defaults list if the hypervisor wasn't
4088         # enabled before
4089         if hv not in new_hvp:
4090           new_hvp[hv] = {}
4091         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4092         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4093     else:
4094       self.hv_list = cluster.enabled_hypervisors
4095
4096     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4097       # either the enabled list has changed, or the parameters have, validate
4098       for hv_name, hv_params in self.new_hvparams.items():
4099         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4100             (self.op.enabled_hypervisors and
4101              hv_name in self.op.enabled_hypervisors)):
4102           # either this is a new hypervisor, or its parameters have changed
4103           hv_class = hypervisor.GetHypervisor(hv_name)
4104           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4105           hv_class.CheckParameterSyntax(hv_params)
4106           _CheckHVParams(self, node_list, hv_name, hv_params)
4107
4108     if self.op.os_hvp:
4109       # no need to check any newly-enabled hypervisors, since the
4110       # defaults have already been checked in the above code-block
4111       for os_name, os_hvp in self.new_os_hvp.items():
4112         for hv_name, hv_params in os_hvp.items():
4113           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4114           # we need to fill in the new os_hvp on top of the actual hv_p
4115           cluster_defaults = self.new_hvparams.get(hv_name, {})
4116           new_osp = objects.FillDict(cluster_defaults, hv_params)
4117           hv_class = hypervisor.GetHypervisor(hv_name)
4118           hv_class.CheckParameterSyntax(new_osp)
4119           _CheckHVParams(self, node_list, hv_name, new_osp)
4120
4121     if self.op.default_iallocator:
4122       alloc_script = utils.FindFile(self.op.default_iallocator,
4123                                     constants.IALLOCATOR_SEARCH_PATH,
4124                                     os.path.isfile)
4125       if alloc_script is None:
4126         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4127                                    " specified" % self.op.default_iallocator,
4128                                    errors.ECODE_INVAL)
4129
4130   def Exec(self, feedback_fn):
4131     """Change the parameters of the cluster.
4132
4133     """
4134     if self.op.vg_name is not None:
4135       new_volume = self.op.vg_name
4136       if not new_volume:
4137         new_volume = None
4138       if new_volume != self.cfg.GetVGName():
4139         self.cfg.SetVGName(new_volume)
4140       else:
4141         feedback_fn("Cluster LVM configuration already in desired"
4142                     " state, not changing")
4143     if self.op.drbd_helper is not None:
4144       new_helper = self.op.drbd_helper
4145       if not new_helper:
4146         new_helper = None
4147       if new_helper != self.cfg.GetDRBDHelper():
4148         self.cfg.SetDRBDHelper(new_helper)
4149       else:
4150         feedback_fn("Cluster DRBD helper already in desired state,"
4151                     " not changing")
4152     if self.op.hvparams:
4153       self.cluster.hvparams = self.new_hvparams
4154     if self.op.os_hvp:
4155       self.cluster.os_hvp = self.new_os_hvp
4156     if self.op.enabled_hypervisors is not None:
4157       self.cluster.hvparams = self.new_hvparams
4158       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4159     if self.op.beparams:
4160       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4161     if self.op.nicparams:
4162       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4163     if self.op.ipolicy:
4164       self.cluster.ipolicy = self.new_ipolicy
4165     if self.op.osparams:
4166       self.cluster.osparams = self.new_osp
4167     if self.op.ndparams:
4168       self.cluster.ndparams = self.new_ndparams
4169     if self.op.diskparams:
4170       self.cluster.diskparams = self.new_diskparams
4171     if self.op.hv_state:
4172       self.cluster.hv_state_static = self.new_hv_state
4173     if self.op.disk_state:
4174       self.cluster.disk_state_static = self.new_disk_state
4175
4176     if self.op.candidate_pool_size is not None:
4177       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4178       # we need to update the pool size here, otherwise the save will fail
4179       _AdjustCandidatePool(self, [])
4180
4181     if self.op.maintain_node_health is not None:
4182       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4183         feedback_fn("Note: CONFD was disabled at build time, node health"
4184                     " maintenance is not useful (still enabling it)")
4185       self.cluster.maintain_node_health = self.op.maintain_node_health
4186
4187     if self.op.prealloc_wipe_disks is not None:
4188       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4189
4190     if self.op.add_uids is not None:
4191       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4192
4193     if self.op.remove_uids is not None:
4194       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4195
4196     if self.op.uid_pool is not None:
4197       self.cluster.uid_pool = self.op.uid_pool
4198
4199     if self.op.default_iallocator is not None:
4200       self.cluster.default_iallocator = self.op.default_iallocator
4201
4202     if self.op.reserved_lvs is not None:
4203       self.cluster.reserved_lvs = self.op.reserved_lvs
4204
4205     if self.op.use_external_mip_script is not None:
4206       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4207
4208     def helper_os(aname, mods, desc):
4209       desc += " OS list"
4210       lst = getattr(self.cluster, aname)
4211       for key, val in mods:
4212         if key == constants.DDM_ADD:
4213           if val in lst:
4214             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4215           else:
4216             lst.append(val)
4217         elif key == constants.DDM_REMOVE:
4218           if val in lst:
4219             lst.remove(val)
4220           else:
4221             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4222         else:
4223           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4224
4225     if self.op.hidden_os:
4226       helper_os("hidden_os", self.op.hidden_os, "hidden")
4227
4228     if self.op.blacklisted_os:
4229       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4230
4231     if self.op.master_netdev:
4232       master_params = self.cfg.GetMasterNetworkParameters()
4233       ems = self.cfg.GetUseExternalMipScript()
4234       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4235                   self.cluster.master_netdev)
4236       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4237                                                        master_params, ems)
4238       result.Raise("Could not disable the master ip")
4239       feedback_fn("Changing master_netdev from %s to %s" %
4240                   (master_params.netdev, self.op.master_netdev))
4241       self.cluster.master_netdev = self.op.master_netdev
4242
4243     if self.op.master_netmask:
4244       master_params = self.cfg.GetMasterNetworkParameters()
4245       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4246       result = self.rpc.call_node_change_master_netmask(master_params.name,
4247                                                         master_params.netmask,
4248                                                         self.op.master_netmask,
4249                                                         master_params.ip,
4250                                                         master_params.netdev)
4251       if result.fail_msg:
4252         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4253         feedback_fn(msg)
4254
4255       self.cluster.master_netmask = self.op.master_netmask
4256
4257     self.cfg.Update(self.cluster, feedback_fn)
4258
4259     if self.op.master_netdev:
4260       master_params = self.cfg.GetMasterNetworkParameters()
4261       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4262                   self.op.master_netdev)
4263       ems = self.cfg.GetUseExternalMipScript()
4264       result = self.rpc.call_node_activate_master_ip(master_params.name,
4265                                                      master_params, ems)
4266       if result.fail_msg:
4267         self.LogWarning("Could not re-enable the master ip on"
4268                         " the master, please restart manually: %s",
4269                         result.fail_msg)
4270
4271
4272 def _UploadHelper(lu, nodes, fname):
4273   """Helper for uploading a file and showing warnings.
4274
4275   """
4276   if os.path.exists(fname):
4277     result = lu.rpc.call_upload_file(nodes, fname)
4278     for to_node, to_result in result.items():
4279       msg = to_result.fail_msg
4280       if msg:
4281         msg = ("Copy of file %s to node %s failed: %s" %
4282                (fname, to_node, msg))
4283         lu.proc.LogWarning(msg)
4284
4285
4286 def _ComputeAncillaryFiles(cluster, redist):
4287   """Compute files external to Ganeti which need to be consistent.
4288
4289   @type redist: boolean
4290   @param redist: Whether to include files which need to be redistributed
4291
4292   """
4293   # Compute files for all nodes
4294   files_all = set([
4295     constants.SSH_KNOWN_HOSTS_FILE,
4296     constants.CONFD_HMAC_KEY,
4297     constants.CLUSTER_DOMAIN_SECRET_FILE,
4298     constants.SPICE_CERT_FILE,
4299     constants.SPICE_CACERT_FILE,
4300     constants.RAPI_USERS_FILE,
4301     ])
4302
4303   if not redist:
4304     files_all.update(constants.ALL_CERT_FILES)
4305     files_all.update(ssconf.SimpleStore().GetFileList())
4306   else:
4307     # we need to ship at least the RAPI certificate
4308     files_all.add(constants.RAPI_CERT_FILE)
4309
4310   if cluster.modify_etc_hosts:
4311     files_all.add(constants.ETC_HOSTS)
4312
4313   if cluster.use_external_mip_script:
4314     files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4315
4316   # Files which are optional, these must:
4317   # - be present in one other category as well
4318   # - either exist or not exist on all nodes of that category (mc, vm all)
4319   files_opt = set([
4320     constants.RAPI_USERS_FILE,
4321     ])
4322
4323   # Files which should only be on master candidates
4324   files_mc = set()
4325
4326   if not redist:
4327     files_mc.add(constants.CLUSTER_CONF_FILE)
4328
4329   # Files which should only be on VM-capable nodes
4330   files_vm = set(filename
4331     for hv_name in cluster.enabled_hypervisors
4332     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4333
4334   files_opt |= set(filename
4335     for hv_name in cluster.enabled_hypervisors
4336     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4337
4338   # Filenames in each category must be unique
4339   all_files_set = files_all | files_mc | files_vm
4340   assert (len(all_files_set) ==
4341           sum(map(len, [files_all, files_mc, files_vm]))), \
4342          "Found file listed in more than one file list"
4343
4344   # Optional files must be present in one other category
4345   assert all_files_set.issuperset(files_opt), \
4346          "Optional file not in a different required list"
4347
4348   return (files_all, files_opt, files_mc, files_vm)
4349
4350
4351 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4352   """Distribute additional files which are part of the cluster configuration.
4353
4354   ConfigWriter takes care of distributing the config and ssconf files, but
4355   there are more files which should be distributed to all nodes. This function
4356   makes sure those are copied.
4357
4358   @param lu: calling logical unit
4359   @param additional_nodes: list of nodes not in the config to distribute to
4360   @type additional_vm: boolean
4361   @param additional_vm: whether the additional nodes are vm-capable or not
4362
4363   """
4364   # Gather target nodes
4365   cluster = lu.cfg.GetClusterInfo()
4366   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4367
4368   online_nodes = lu.cfg.GetOnlineNodeList()
4369   online_set = frozenset(online_nodes)
4370   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4371
4372   if additional_nodes is not None:
4373     online_nodes.extend(additional_nodes)
4374     if additional_vm:
4375       vm_nodes.extend(additional_nodes)
4376
4377   # Never distribute to master node
4378   for nodelist in [online_nodes, vm_nodes]:
4379     if master_info.name in nodelist:
4380       nodelist.remove(master_info.name)
4381
4382   # Gather file lists
4383   (files_all, _, files_mc, files_vm) = \
4384     _ComputeAncillaryFiles(cluster, True)
4385
4386   # Never re-distribute configuration file from here
4387   assert not (constants.CLUSTER_CONF_FILE in files_all or
4388               constants.CLUSTER_CONF_FILE in files_vm)
4389   assert not files_mc, "Master candidates not handled in this function"
4390
4391   filemap = [
4392     (online_nodes, files_all),
4393     (vm_nodes, files_vm),
4394     ]
4395
4396   # Upload the files
4397   for (node_list, files) in filemap:
4398     for fname in files:
4399       _UploadHelper(lu, node_list, fname)
4400
4401
4402 class LUClusterRedistConf(NoHooksLU):
4403   """Force the redistribution of cluster configuration.
4404
4405   This is a very simple LU.
4406
4407   """
4408   REQ_BGL = False
4409
4410   def ExpandNames(self):
4411     self.needed_locks = {
4412       locking.LEVEL_NODE: locking.ALL_SET,
4413     }
4414     self.share_locks[locking.LEVEL_NODE] = 1
4415
4416   def Exec(self, feedback_fn):
4417     """Redistribute the configuration.
4418
4419     """
4420     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4421     _RedistributeAncillaryFiles(self)
4422
4423
4424 class LUClusterActivateMasterIp(NoHooksLU):
4425   """Activate the master IP on the master node.
4426
4427   """
4428   def Exec(self, feedback_fn):
4429     """Activate the master IP.
4430
4431     """
4432     master_params = self.cfg.GetMasterNetworkParameters()
4433     ems = self.cfg.GetUseExternalMipScript()
4434     result = self.rpc.call_node_activate_master_ip(master_params.name,
4435                                                    master_params, ems)
4436     result.Raise("Could not activate the master IP")
4437
4438
4439 class LUClusterDeactivateMasterIp(NoHooksLU):
4440   """Deactivate the master IP on the master node.
4441
4442   """
4443   def Exec(self, feedback_fn):
4444     """Deactivate the master IP.
4445
4446     """
4447     master_params = self.cfg.GetMasterNetworkParameters()
4448     ems = self.cfg.GetUseExternalMipScript()
4449     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4450                                                      master_params, ems)
4451     result.Raise("Could not deactivate the master IP")
4452
4453
4454 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4455   """Sleep and poll for an instance's disk to sync.
4456
4457   """
4458   if not instance.disks or disks is not None and not disks:
4459     return True
4460
4461   disks = _ExpandCheckDisks(instance, disks)
4462
4463   if not oneshot:
4464     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4465
4466   node = instance.primary_node
4467
4468   for dev in disks:
4469     lu.cfg.SetDiskID(dev, node)
4470
4471   # TODO: Convert to utils.Retry
4472
4473   retries = 0
4474   degr_retries = 10 # in seconds, as we sleep 1 second each time
4475   while True:
4476     max_time = 0
4477     done = True
4478     cumul_degraded = False
4479     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4480     msg = rstats.fail_msg
4481     if msg:
4482       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4483       retries += 1
4484       if retries >= 10:
4485         raise errors.RemoteError("Can't contact node %s for mirror data,"
4486                                  " aborting." % node)
4487       time.sleep(6)
4488       continue
4489     rstats = rstats.payload
4490     retries = 0
4491     for i, mstat in enumerate(rstats):
4492       if mstat is None:
4493         lu.LogWarning("Can't compute data for node %s/%s",
4494                            node, disks[i].iv_name)
4495         continue
4496
4497       cumul_degraded = (cumul_degraded or
4498                         (mstat.is_degraded and mstat.sync_percent is None))
4499       if mstat.sync_percent is not None:
4500         done = False
4501         if mstat.estimated_time is not None:
4502           rem_time = ("%s remaining (estimated)" %
4503                       utils.FormatSeconds(mstat.estimated_time))
4504           max_time = mstat.estimated_time
4505         else:
4506           rem_time = "no time estimate"
4507         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4508                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4509
4510     # if we're done but degraded, let's do a few small retries, to
4511     # make sure we see a stable and not transient situation; therefore
4512     # we force restart of the loop
4513     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4514       logging.info("Degraded disks found, %d retries left", degr_retries)
4515       degr_retries -= 1
4516       time.sleep(1)
4517       continue
4518
4519     if done or oneshot:
4520       break
4521
4522     time.sleep(min(60, max_time))
4523
4524   if done:
4525     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4526   return not cumul_degraded
4527
4528
4529 def _BlockdevFind(lu, node, dev, instance):
4530   """Wrapper around call_blockdev_find to annotate diskparams.
4531
4532   @param lu: A reference to the lu object
4533   @param node: The node to call out
4534   @param dev: The device to find
4535   @param instance: The instance object the device belongs to
4536   @returns The result of the rpc call
4537
4538   """
4539   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4540   return lu.rpc.call_blockdev_find(node, disk)
4541
4542
4543 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4544   """Wrapper around L{_CheckDiskConsistencyInner}.
4545
4546   """
4547   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4548   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4549                                     ldisk=ldisk)
4550
4551
4552 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4553                                ldisk=False):
4554   """Check that mirrors are not degraded.
4555
4556   @attention: The device has to be annotated already.
4557
4558   The ldisk parameter, if True, will change the test from the
4559   is_degraded attribute (which represents overall non-ok status for
4560   the device(s)) to the ldisk (representing the local storage status).
4561
4562   """
4563   lu.cfg.SetDiskID(dev, node)
4564
4565   result = True
4566
4567   if on_primary or dev.AssembleOnSecondary():
4568     rstats = lu.rpc.call_blockdev_find(node, dev)
4569     msg = rstats.fail_msg
4570     if msg:
4571       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4572       result = False
4573     elif not rstats.payload:
4574       lu.LogWarning("Can't find disk on node %s", node)
4575       result = False
4576     else:
4577       if ldisk:
4578         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4579       else:
4580         result = result and not rstats.payload.is_degraded
4581
4582   if dev.children:
4583     for child in dev.children:
4584       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4585                                                      on_primary)
4586
4587   return result
4588
4589
4590 class LUOobCommand(NoHooksLU):
4591   """Logical unit for OOB handling.
4592
4593   """
4594   REQ_BGL = False
4595   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4596
4597   def ExpandNames(self):
4598     """Gather locks we need.
4599
4600     """
4601     if self.op.node_names:
4602       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4603       lock_names = self.op.node_names
4604     else:
4605       lock_names = locking.ALL_SET
4606
4607     self.needed_locks = {
4608       locking.LEVEL_NODE: lock_names,
4609       }
4610
4611   def CheckPrereq(self):
4612     """Check prerequisites.
4613
4614     This checks:
4615      - the node exists in the configuration
4616      - OOB is supported
4617
4618     Any errors are signaled by raising errors.OpPrereqError.
4619
4620     """
4621     self.nodes = []
4622     self.master_node = self.cfg.GetMasterNode()
4623
4624     assert self.op.power_delay >= 0.0
4625
4626     if self.op.node_names:
4627       if (self.op.command in self._SKIP_MASTER and
4628           self.master_node in self.op.node_names):
4629         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4630         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4631
4632         if master_oob_handler:
4633           additional_text = ("run '%s %s %s' if you want to operate on the"
4634                              " master regardless") % (master_oob_handler,
4635                                                       self.op.command,
4636                                                       self.master_node)
4637         else:
4638           additional_text = "it does not support out-of-band operations"
4639
4640         raise errors.OpPrereqError(("Operating on the master node %s is not"
4641                                     " allowed for %s; %s") %
4642                                    (self.master_node, self.op.command,
4643                                     additional_text), errors.ECODE_INVAL)
4644     else:
4645       self.op.node_names = self.cfg.GetNodeList()
4646       if self.op.command in self._SKIP_MASTER:
4647         self.op.node_names.remove(self.master_node)
4648
4649     if self.op.command in self._SKIP_MASTER:
4650       assert self.master_node not in self.op.node_names
4651
4652     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4653       if node is None:
4654         raise errors.OpPrereqError("Node %s not found" % node_name,
4655                                    errors.ECODE_NOENT)
4656       else:
4657         self.nodes.append(node)
4658
4659       if (not self.op.ignore_status and
4660           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4661         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4662                                     " not marked offline") % node_name,
4663                                    errors.ECODE_STATE)
4664
4665   def Exec(self, feedback_fn):
4666     """Execute OOB and return result if we expect any.
4667
4668     """
4669     master_node = self.master_node
4670     ret = []
4671
4672     for idx, node in enumerate(utils.NiceSort(self.nodes,
4673                                               key=lambda node: node.name)):
4674       node_entry = [(constants.RS_NORMAL, node.name)]
4675       ret.append(node_entry)
4676
4677       oob_program = _SupportsOob(self.cfg, node)
4678
4679       if not oob_program:
4680         node_entry.append((constants.RS_UNAVAIL, None))
4681         continue
4682
4683       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4684                    self.op.command, oob_program, node.name)
4685       result = self.rpc.call_run_oob(master_node, oob_program,
4686                                      self.op.command, node.name,
4687                                      self.op.timeout)
4688
4689       if result.fail_msg:
4690         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4691                         node.name, result.fail_msg)
4692         node_entry.append((constants.RS_NODATA, None))
4693       else:
4694         try:
4695           self._CheckPayload(result)
4696         except errors.OpExecError, err:
4697           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4698                           node.name, err)
4699           node_entry.append((constants.RS_NODATA, None))
4700         else:
4701           if self.op.command == constants.OOB_HEALTH:
4702             # For health we should log important events
4703             for item, status in result.payload:
4704               if status in [constants.OOB_STATUS_WARNING,
4705                             constants.OOB_STATUS_CRITICAL]:
4706                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4707                                 item, node.name, status)
4708
4709           if self.op.command == constants.OOB_POWER_ON:
4710             node.powered = True
4711           elif self.op.command == constants.OOB_POWER_OFF:
4712             node.powered = False
4713           elif self.op.command == constants.OOB_POWER_STATUS:
4714             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4715             if powered != node.powered:
4716               logging.warning(("Recorded power state (%s) of node '%s' does not"
4717                                " match actual power state (%s)"), node.powered,
4718                               node.name, powered)
4719
4720           # For configuration changing commands we should update the node
4721           if self.op.command in (constants.OOB_POWER_ON,
4722                                  constants.OOB_POWER_OFF):
4723             self.cfg.Update(node, feedback_fn)
4724
4725           node_entry.append((constants.RS_NORMAL, result.payload))
4726
4727           if (self.op.command == constants.OOB_POWER_ON and
4728               idx < len(self.nodes) - 1):
4729             time.sleep(self.op.power_delay)
4730
4731     return ret
4732
4733   def _CheckPayload(self, result):
4734     """Checks if the payload is valid.
4735
4736     @param result: RPC result
4737     @raises errors.OpExecError: If payload is not valid
4738
4739     """
4740     errs = []
4741     if self.op.command == constants.OOB_HEALTH:
4742       if not isinstance(result.payload, list):
4743         errs.append("command 'health' is expected to return a list but got %s" %
4744                     type(result.payload))
4745       else:
4746         for item, status in result.payload:
4747           if status not in constants.OOB_STATUSES:
4748             errs.append("health item '%s' has invalid status '%s'" %
4749                         (item, status))
4750
4751     if self.op.command == constants.OOB_POWER_STATUS:
4752       if not isinstance(result.payload, dict):
4753         errs.append("power-status is expected to return a dict but got %s" %
4754                     type(result.payload))
4755
4756     if self.op.command in [
4757         constants.OOB_POWER_ON,
4758         constants.OOB_POWER_OFF,
4759         constants.OOB_POWER_CYCLE,
4760         ]:
4761       if result.payload is not None:
4762         errs.append("%s is expected to not return payload but got '%s'" %
4763                     (self.op.command, result.payload))
4764
4765     if errs:
4766       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4767                                utils.CommaJoin(errs))
4768
4769
4770 class _OsQuery(_QueryBase):
4771   FIELDS = query.OS_FIELDS
4772
4773   def ExpandNames(self, lu):
4774     # Lock all nodes in shared mode
4775     # Temporary removal of locks, should be reverted later
4776     # TODO: reintroduce locks when they are lighter-weight
4777     lu.needed_locks = {}
4778     #self.share_locks[locking.LEVEL_NODE] = 1
4779     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4780
4781     # The following variables interact with _QueryBase._GetNames
4782     if self.names:
4783       self.wanted = self.names
4784     else:
4785       self.wanted = locking.ALL_SET
4786
4787     self.do_locking = self.use_locking
4788
4789   def DeclareLocks(self, lu, level):
4790     pass
4791
4792   @staticmethod
4793   def _DiagnoseByOS(rlist):
4794     """Remaps a per-node return list into an a per-os per-node dictionary
4795
4796     @param rlist: a map with node names as keys and OS objects as values
4797
4798     @rtype: dict
4799     @return: a dictionary with osnames as keys and as value another
4800         map, with nodes as keys and tuples of (path, status, diagnose,
4801         variants, parameters, api_versions) as values, eg::
4802
4803           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4804                                      (/srv/..., False, "invalid api")],
4805                            "node2": [(/srv/..., True, "", [], [])]}
4806           }
4807
4808     """
4809     all_os = {}
4810     # we build here the list of nodes that didn't fail the RPC (at RPC
4811     # level), so that nodes with a non-responding node daemon don't
4812     # make all OSes invalid
4813     good_nodes = [node_name for node_name in rlist
4814                   if not rlist[node_name].fail_msg]
4815     for node_name, nr in rlist.items():
4816       if nr.fail_msg or not nr.payload:
4817         continue
4818       for (name, path, status, diagnose, variants,
4819            params, api_versions) in nr.payload:
4820         if name not in all_os:
4821           # build a list of nodes for this os containing empty lists
4822           # for each node in node_list
4823           all_os[name] = {}
4824           for nname in good_nodes:
4825             all_os[name][nname] = []
4826         # convert params from [name, help] to (name, help)
4827         params = [tuple(v) for v in params]
4828         all_os[name][node_name].append((path, status, diagnose,
4829                                         variants, params, api_versions))
4830     return all_os
4831
4832   def _GetQueryData(self, lu):
4833     """Computes the list of nodes and their attributes.
4834
4835     """
4836     # Locking is not used
4837     assert not (compat.any(lu.glm.is_owned(level)
4838                            for level in locking.LEVELS
4839                            if level != locking.LEVEL_CLUSTER) or
4840                 self.do_locking or self.use_locking)
4841
4842     valid_nodes = [node.name
4843                    for node in lu.cfg.GetAllNodesInfo().values()
4844                    if not node.offline and node.vm_capable]
4845     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4846     cluster = lu.cfg.GetClusterInfo()
4847
4848     data = {}
4849
4850     for (os_name, os_data) in pol.items():
4851       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4852                           hidden=(os_name in cluster.hidden_os),
4853                           blacklisted=(os_name in cluster.blacklisted_os))
4854
4855       variants = set()
4856       parameters = set()
4857       api_versions = set()
4858
4859       for idx, osl in enumerate(os_data.values()):
4860         info.valid = bool(info.valid and osl and osl[0][1])
4861         if not info.valid:
4862           break
4863
4864         (node_variants, node_params, node_api) = osl[0][3:6]
4865         if idx == 0:
4866           # First entry
4867           variants.update(node_variants)
4868           parameters.update(node_params)
4869           api_versions.update(node_api)
4870         else:
4871           # Filter out inconsistent values
4872           variants.intersection_update(node_variants)
4873           parameters.intersection_update(node_params)
4874           api_versions.intersection_update(node_api)
4875
4876       info.variants = list(variants)
4877       info.parameters = list(parameters)
4878       info.api_versions = list(api_versions)
4879
4880       data[os_name] = info
4881
4882     # Prepare data in requested order
4883     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4884             if name in data]
4885
4886
4887 class LUOsDiagnose(NoHooksLU):
4888   """Logical unit for OS diagnose/query.
4889
4890   """
4891   REQ_BGL = False
4892
4893   @staticmethod
4894   def _BuildFilter(fields, names):
4895     """Builds a filter for querying OSes.
4896
4897     """
4898     name_filter = qlang.MakeSimpleFilter("name", names)
4899
4900     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4901     # respective field is not requested
4902     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4903                      for fname in ["hidden", "blacklisted"]
4904                      if fname not in fields]
4905     if "valid" not in fields:
4906       status_filter.append([qlang.OP_TRUE, "valid"])
4907
4908     if status_filter:
4909       status_filter.insert(0, qlang.OP_AND)
4910     else:
4911       status_filter = None
4912
4913     if name_filter and status_filter:
4914       return [qlang.OP_AND, name_filter, status_filter]
4915     elif name_filter:
4916       return name_filter
4917     else:
4918       return status_filter
4919
4920   def CheckArguments(self):
4921     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4922                        self.op.output_fields, False)
4923
4924   def ExpandNames(self):
4925     self.oq.ExpandNames(self)
4926
4927   def Exec(self, feedback_fn):
4928     return self.oq.OldStyleQuery(self)
4929
4930
4931 class LUNodeRemove(LogicalUnit):
4932   """Logical unit for removing a node.
4933
4934   """
4935   HPATH = "node-remove"
4936   HTYPE = constants.HTYPE_NODE
4937
4938   def BuildHooksEnv(self):
4939     """Build hooks env.
4940
4941     """
4942     return {
4943       "OP_TARGET": self.op.node_name,
4944       "NODE_NAME": self.op.node_name,
4945       }
4946
4947   def BuildHooksNodes(self):
4948     """Build hooks nodes.
4949
4950     This doesn't run on the target node in the pre phase as a failed
4951     node would then be impossible to remove.
4952
4953     """
4954     all_nodes = self.cfg.GetNodeList()
4955     try:
4956       all_nodes.remove(self.op.node_name)
4957     except ValueError:
4958       pass
4959     return (all_nodes, all_nodes)
4960
4961   def CheckPrereq(self):
4962     """Check prerequisites.
4963
4964     This checks:
4965      - the node exists in the configuration
4966      - it does not have primary or secondary instances
4967      - it's not the master
4968
4969     Any errors are signaled by raising errors.OpPrereqError.
4970
4971     """
4972     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4973     node = self.cfg.GetNodeInfo(self.op.node_name)
4974     assert node is not None
4975
4976     masternode = self.cfg.GetMasterNode()
4977     if node.name == masternode:
4978       raise errors.OpPrereqError("Node is the master node, failover to another"
4979                                  " node is required", errors.ECODE_INVAL)
4980
4981     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4982       if node.name in instance.all_nodes:
4983         raise errors.OpPrereqError("Instance %s is still running on the node,"
4984                                    " please remove first" % instance_name,
4985                                    errors.ECODE_INVAL)
4986     self.op.node_name = node.name
4987     self.node = node
4988
4989   def Exec(self, feedback_fn):
4990     """Removes the node from the cluster.
4991
4992     """
4993     node = self.node
4994     logging.info("Stopping the node daemon and removing configs from node %s",
4995                  node.name)
4996
4997     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4998
4999     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5000       "Not owning BGL"
5001
5002     # Promote nodes to master candidate as needed
5003     _AdjustCandidatePool(self, exceptions=[node.name])
5004     self.context.RemoveNode(node.name)
5005
5006     # Run post hooks on the node before it's removed
5007     _RunPostHook(self, node.name)
5008
5009     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5010     msg = result.fail_msg
5011     if msg:
5012       self.LogWarning("Errors encountered on the remote node while leaving"
5013                       " the cluster: %s", msg)
5014
5015     # Remove node from our /etc/hosts
5016     if self.cfg.GetClusterInfo().modify_etc_hosts:
5017       master_node = self.cfg.GetMasterNode()
5018       result = self.rpc.call_etc_hosts_modify(master_node,
5019                                               constants.ETC_HOSTS_REMOVE,
5020                                               node.name, None)
5021       result.Raise("Can't update hosts file with new host data")
5022       _RedistributeAncillaryFiles(self)
5023
5024
5025 class _NodeQuery(_QueryBase):
5026   FIELDS = query.NODE_FIELDS
5027
5028   def ExpandNames(self, lu):
5029     lu.needed_locks = {}
5030     lu.share_locks = _ShareAll()
5031
5032     if self.names:
5033       self.wanted = _GetWantedNodes(lu, self.names)
5034     else:
5035       self.wanted = locking.ALL_SET
5036
5037     self.do_locking = (self.use_locking and
5038                        query.NQ_LIVE in self.requested_data)
5039
5040     if self.do_locking:
5041       # If any non-static field is requested we need to lock the nodes
5042       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5043
5044   def DeclareLocks(self, lu, level):
5045     pass
5046
5047   def _GetQueryData(self, lu):
5048     """Computes the list of nodes and their attributes.
5049
5050     """
5051     all_info = lu.cfg.GetAllNodesInfo()
5052
5053     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5054
5055     # Gather data as requested
5056     if query.NQ_LIVE in self.requested_data:
5057       # filter out non-vm_capable nodes
5058       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5059
5060       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5061                                         [lu.cfg.GetHypervisorType()])
5062       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5063                        for (name, nresult) in node_data.items()
5064                        if not nresult.fail_msg and nresult.payload)
5065     else:
5066       live_data = None
5067
5068     if query.NQ_INST in self.requested_data:
5069       node_to_primary = dict([(name, set()) for name in nodenames])
5070       node_to_secondary = dict([(name, set()) for name in nodenames])
5071
5072       inst_data = lu.cfg.GetAllInstancesInfo()
5073
5074       for inst in inst_data.values():
5075         if inst.primary_node in node_to_primary:
5076           node_to_primary[inst.primary_node].add(inst.name)
5077         for secnode in inst.secondary_nodes:
5078           if secnode in node_to_secondary:
5079             node_to_secondary[secnode].add(inst.name)
5080     else:
5081       node_to_primary = None
5082       node_to_secondary = None
5083
5084     if query.NQ_OOB in self.requested_data:
5085       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5086                          for name, node in all_info.iteritems())
5087     else:
5088       oob_support = None
5089
5090     if query.NQ_GROUP in self.requested_data:
5091       groups = lu.cfg.GetAllNodeGroupsInfo()
5092     else:
5093       groups = {}
5094
5095     return query.NodeQueryData([all_info[name] for name in nodenames],
5096                                live_data, lu.cfg.GetMasterNode(),
5097                                node_to_primary, node_to_secondary, groups,
5098                                oob_support, lu.cfg.GetClusterInfo())
5099
5100
5101 class LUNodeQuery(NoHooksLU):
5102   """Logical unit for querying nodes.
5103
5104   """
5105   # pylint: disable=W0142
5106   REQ_BGL = False
5107
5108   def CheckArguments(self):
5109     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5110                          self.op.output_fields, self.op.use_locking)
5111
5112   def ExpandNames(self):
5113     self.nq.ExpandNames(self)
5114
5115   def DeclareLocks(self, level):
5116     self.nq.DeclareLocks(self, level)
5117
5118   def Exec(self, feedback_fn):
5119     return self.nq.OldStyleQuery(self)
5120
5121
5122 class LUNodeQueryvols(NoHooksLU):
5123   """Logical unit for getting volumes on node(s).
5124
5125   """
5126   REQ_BGL = False
5127   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5128   _FIELDS_STATIC = utils.FieldSet("node")
5129
5130   def CheckArguments(self):
5131     _CheckOutputFields(static=self._FIELDS_STATIC,
5132                        dynamic=self._FIELDS_DYNAMIC,
5133                        selected=self.op.output_fields)
5134
5135   def ExpandNames(self):
5136     self.share_locks = _ShareAll()
5137     self.needed_locks = {}
5138
5139     if not self.op.nodes:
5140       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5141     else:
5142       self.needed_locks[locking.LEVEL_NODE] = \
5143         _GetWantedNodes(self, self.op.nodes)
5144
5145   def Exec(self, feedback_fn):
5146     """Computes the list of nodes and their attributes.
5147
5148     """
5149     nodenames = self.owned_locks(locking.LEVEL_NODE)
5150     volumes = self.rpc.call_node_volumes(nodenames)
5151
5152     ilist = self.cfg.GetAllInstancesInfo()
5153     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5154
5155     output = []
5156     for node in nodenames:
5157       nresult = volumes[node]
5158       if nresult.offline:
5159         continue
5160       msg = nresult.fail_msg
5161       if msg:
5162         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5163         continue
5164
5165       node_vols = sorted(nresult.payload,
5166                          key=operator.itemgetter("dev"))
5167
5168       for vol in node_vols:
5169         node_output = []
5170         for field in self.op.output_fields:
5171           if field == "node":
5172             val = node
5173           elif field == "phys":
5174             val = vol["dev"]
5175           elif field == "vg":
5176             val = vol["vg"]
5177           elif field == "name":
5178             val = vol["name"]
5179           elif field == "size":
5180             val = int(float(vol["size"]))
5181           elif field == "instance":
5182             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5183           else:
5184             raise errors.ParameterError(field)
5185           node_output.append(str(val))
5186
5187         output.append(node_output)
5188
5189     return output
5190
5191
5192 class LUNodeQueryStorage(NoHooksLU):
5193   """Logical unit for getting information on storage units on node(s).
5194
5195   """
5196   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5197   REQ_BGL = False
5198
5199   def CheckArguments(self):
5200     _CheckOutputFields(static=self._FIELDS_STATIC,
5201                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5202                        selected=self.op.output_fields)
5203
5204   def ExpandNames(self):
5205     self.share_locks = _ShareAll()
5206     self.needed_locks = {}
5207
5208     if self.op.nodes:
5209       self.needed_locks[locking.LEVEL_NODE] = \
5210         _GetWantedNodes(self, self.op.nodes)
5211     else:
5212       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5213
5214   def Exec(self, feedback_fn):
5215     """Computes the list of nodes and their attributes.
5216
5217     """
5218     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5219
5220     # Always get name to sort by
5221     if constants.SF_NAME in self.op.output_fields:
5222       fields = self.op.output_fields[:]
5223     else:
5224       fields = [constants.SF_NAME] + self.op.output_fields
5225
5226     # Never ask for node or type as it's only known to the LU
5227     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5228       while extra in fields:
5229         fields.remove(extra)
5230
5231     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5232     name_idx = field_idx[constants.SF_NAME]
5233
5234     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5235     data = self.rpc.call_storage_list(self.nodes,
5236                                       self.op.storage_type, st_args,
5237                                       self.op.name, fields)
5238
5239     result = []
5240
5241     for node in utils.NiceSort(self.nodes):
5242       nresult = data[node]
5243       if nresult.offline:
5244         continue
5245
5246       msg = nresult.fail_msg
5247       if msg:
5248         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5249         continue
5250
5251       rows = dict([(row[name_idx], row) for row in nresult.payload])
5252
5253       for name in utils.NiceSort(rows.keys()):
5254         row = rows[name]
5255
5256         out = []
5257
5258         for field in self.op.output_fields:
5259           if field == constants.SF_NODE:
5260             val = node
5261           elif field == constants.SF_TYPE:
5262             val = self.op.storage_type
5263           elif field in field_idx:
5264             val = row[field_idx[field]]
5265           else:
5266             raise errors.ParameterError(field)
5267
5268           out.append(val)
5269
5270         result.append(out)
5271
5272     return result
5273
5274
5275 class _InstanceQuery(_QueryBase):
5276   FIELDS = query.INSTANCE_FIELDS
5277
5278   def ExpandNames(self, lu):
5279     lu.needed_locks = {}
5280     lu.share_locks = _ShareAll()
5281
5282     if self.names:
5283       self.wanted = _GetWantedInstances(lu, self.names)
5284     else:
5285       self.wanted = locking.ALL_SET
5286
5287     self.do_locking = (self.use_locking and
5288                        query.IQ_LIVE in self.requested_data)
5289     if self.do_locking:
5290       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5291       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5292       lu.needed_locks[locking.LEVEL_NODE] = []
5293       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5294
5295     self.do_grouplocks = (self.do_locking and
5296                           query.IQ_NODES in self.requested_data)
5297
5298   def DeclareLocks(self, lu, level):
5299     if self.do_locking:
5300       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5301         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5302
5303         # Lock all groups used by instances optimistically; this requires going
5304         # via the node before it's locked, requiring verification later on
5305         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5306           set(group_uuid
5307               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5308               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5309       elif level == locking.LEVEL_NODE:
5310         lu._LockInstancesNodes() # pylint: disable=W0212
5311
5312   @staticmethod
5313   def _CheckGroupLocks(lu):
5314     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5315     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5316
5317     # Check if node groups for locked instances are still correct
5318     for instance_name in owned_instances:
5319       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5320
5321   def _GetQueryData(self, lu):
5322     """Computes the list of instances and their attributes.
5323
5324     """
5325     if self.do_grouplocks:
5326       self._CheckGroupLocks(lu)
5327
5328     cluster = lu.cfg.GetClusterInfo()
5329     all_info = lu.cfg.GetAllInstancesInfo()
5330
5331     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5332
5333     instance_list = [all_info[name] for name in instance_names]
5334     nodes = frozenset(itertools.chain(*(inst.all_nodes
5335                                         for inst in instance_list)))
5336     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5337     bad_nodes = []
5338     offline_nodes = []
5339     wrongnode_inst = set()
5340
5341     # Gather data as requested
5342     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5343       live_data = {}
5344       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5345       for name in nodes:
5346         result = node_data[name]
5347         if result.offline:
5348           # offline nodes will be in both lists
5349           assert result.fail_msg
5350           offline_nodes.append(name)
5351         if result.fail_msg:
5352           bad_nodes.append(name)
5353         elif result.payload:
5354           for inst in result.payload:
5355             if inst in all_info:
5356               if all_info[inst].primary_node == name:
5357                 live_data.update(result.payload)
5358               else:
5359                 wrongnode_inst.add(inst)
5360             else:
5361               # orphan instance; we don't list it here as we don't
5362               # handle this case yet in the output of instance listing
5363               logging.warning("Orphan instance '%s' found on node %s",
5364                               inst, name)
5365         # else no instance is alive
5366     else:
5367       live_data = {}
5368
5369     if query.IQ_DISKUSAGE in self.requested_data:
5370       disk_usage = dict((inst.name,
5371                          _ComputeDiskSize(inst.disk_template,
5372                                           [{constants.IDISK_SIZE: disk.size}
5373                                            for disk in inst.disks]))
5374                         for inst in instance_list)
5375     else:
5376       disk_usage = None
5377
5378     if query.IQ_CONSOLE in self.requested_data:
5379       consinfo = {}
5380       for inst in instance_list:
5381         if inst.name in live_data:
5382           # Instance is running
5383           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5384         else:
5385           consinfo[inst.name] = None
5386       assert set(consinfo.keys()) == set(instance_names)
5387     else:
5388       consinfo = None
5389
5390     if query.IQ_NODES in self.requested_data:
5391       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5392                                             instance_list)))
5393       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5394       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5395                     for uuid in set(map(operator.attrgetter("group"),
5396                                         nodes.values())))
5397     else:
5398       nodes = None
5399       groups = None
5400
5401     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5402                                    disk_usage, offline_nodes, bad_nodes,
5403                                    live_data, wrongnode_inst, consinfo,
5404                                    nodes, groups)
5405
5406
5407 class LUQuery(NoHooksLU):
5408   """Query for resources/items of a certain kind.
5409
5410   """
5411   # pylint: disable=W0142
5412   REQ_BGL = False
5413
5414   def CheckArguments(self):
5415     qcls = _GetQueryImplementation(self.op.what)
5416
5417     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5418
5419   def ExpandNames(self):
5420     self.impl.ExpandNames(self)
5421
5422   def DeclareLocks(self, level):
5423     self.impl.DeclareLocks(self, level)
5424
5425   def Exec(self, feedback_fn):
5426     return self.impl.NewStyleQuery(self)
5427
5428
5429 class LUQueryFields(NoHooksLU):
5430   """Query for resources/items of a certain kind.
5431
5432   """
5433   # pylint: disable=W0142
5434   REQ_BGL = False
5435
5436   def CheckArguments(self):
5437     self.qcls = _GetQueryImplementation(self.op.what)
5438
5439   def ExpandNames(self):
5440     self.needed_locks = {}
5441
5442   def Exec(self, feedback_fn):
5443     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5444
5445
5446 class LUNodeModifyStorage(NoHooksLU):
5447   """Logical unit for modifying a storage volume on a node.
5448
5449   """
5450   REQ_BGL = False
5451
5452   def CheckArguments(self):
5453     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5454
5455     storage_type = self.op.storage_type
5456
5457     try:
5458       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5459     except KeyError:
5460       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5461                                  " modified" % storage_type,
5462                                  errors.ECODE_INVAL)
5463
5464     diff = set(self.op.changes.keys()) - modifiable
5465     if diff:
5466       raise errors.OpPrereqError("The following fields can not be modified for"
5467                                  " storage units of type '%s': %r" %
5468                                  (storage_type, list(diff)),
5469                                  errors.ECODE_INVAL)
5470
5471   def ExpandNames(self):
5472     self.needed_locks = {
5473       locking.LEVEL_NODE: self.op.node_name,
5474       }
5475
5476   def Exec(self, feedback_fn):
5477     """Computes the list of nodes and their attributes.
5478
5479     """
5480     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5481     result = self.rpc.call_storage_modify(self.op.node_name,
5482                                           self.op.storage_type, st_args,
5483                                           self.op.name, self.op.changes)
5484     result.Raise("Failed to modify storage unit '%s' on %s" %
5485                  (self.op.name, self.op.node_name))
5486
5487
5488 class LUNodeAdd(LogicalUnit):
5489   """Logical unit for adding node to the cluster.
5490
5491   """
5492   HPATH = "node-add"
5493   HTYPE = constants.HTYPE_NODE
5494   _NFLAGS = ["master_capable", "vm_capable"]
5495
5496   def CheckArguments(self):
5497     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5498     # validate/normalize the node name
5499     self.hostname = netutils.GetHostname(name=self.op.node_name,
5500                                          family=self.primary_ip_family)
5501     self.op.node_name = self.hostname.name
5502
5503     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5504       raise errors.OpPrereqError("Cannot readd the master node",
5505                                  errors.ECODE_STATE)
5506
5507     if self.op.readd and self.op.group:
5508       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5509                                  " being readded", errors.ECODE_INVAL)
5510
5511   def BuildHooksEnv(self):
5512     """Build hooks env.
5513
5514     This will run on all nodes before, and on all nodes + the new node after.
5515
5516     """
5517     return {
5518       "OP_TARGET": self.op.node_name,
5519       "NODE_NAME": self.op.node_name,
5520       "NODE_PIP": self.op.primary_ip,
5521       "NODE_SIP": self.op.secondary_ip,
5522       "MASTER_CAPABLE": str(self.op.master_capable),
5523       "VM_CAPABLE": str(self.op.vm_capable),
5524       }
5525
5526   def BuildHooksNodes(self):
5527     """Build hooks nodes.
5528
5529     """
5530     # Exclude added node
5531     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5532     post_nodes = pre_nodes + [self.op.node_name, ]
5533
5534     return (pre_nodes, post_nodes)
5535
5536   def CheckPrereq(self):
5537     """Check prerequisites.
5538
5539     This checks:
5540      - the new node is not already in the config
5541      - it is resolvable
5542      - its parameters (single/dual homed) matches the cluster
5543
5544     Any errors are signaled by raising errors.OpPrereqError.
5545
5546     """
5547     cfg = self.cfg
5548     hostname = self.hostname
5549     node = hostname.name
5550     primary_ip = self.op.primary_ip = hostname.ip
5551     if self.op.secondary_ip is None:
5552       if self.primary_ip_family == netutils.IP6Address.family:
5553         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5554                                    " IPv4 address must be given as secondary",
5555                                    errors.ECODE_INVAL)
5556       self.op.secondary_ip = primary_ip
5557
5558     secondary_ip = self.op.secondary_ip
5559     if not netutils.IP4Address.IsValid(secondary_ip):
5560       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5561                                  " address" % secondary_ip, errors.ECODE_INVAL)
5562
5563     node_list = cfg.GetNodeList()
5564     if not self.op.readd and node in node_list:
5565       raise errors.OpPrereqError("Node %s is already in the configuration" %
5566                                  node, errors.ECODE_EXISTS)
5567     elif self.op.readd and node not in node_list:
5568       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5569                                  errors.ECODE_NOENT)
5570
5571     self.changed_primary_ip = False
5572
5573     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5574       if self.op.readd and node == existing_node_name:
5575         if existing_node.secondary_ip != secondary_ip:
5576           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5577                                      " address configuration as before",
5578                                      errors.ECODE_INVAL)
5579         if existing_node.primary_ip != primary_ip:
5580           self.changed_primary_ip = True
5581
5582         continue
5583
5584       if (existing_node.primary_ip == primary_ip or
5585           existing_node.secondary_ip == primary_ip or
5586           existing_node.primary_ip == secondary_ip or
5587           existing_node.secondary_ip == secondary_ip):
5588         raise errors.OpPrereqError("New node ip address(es) conflict with"
5589                                    " existing node %s" % existing_node.name,
5590                                    errors.ECODE_NOTUNIQUE)
5591
5592     # After this 'if' block, None is no longer a valid value for the
5593     # _capable op attributes
5594     if self.op.readd:
5595       old_node = self.cfg.GetNodeInfo(node)
5596       assert old_node is not None, "Can't retrieve locked node %s" % node
5597       for attr in self._NFLAGS:
5598         if getattr(self.op, attr) is None:
5599           setattr(self.op, attr, getattr(old_node, attr))
5600     else:
5601       for attr in self._NFLAGS:
5602         if getattr(self.op, attr) is None:
5603           setattr(self.op, attr, True)
5604
5605     if self.op.readd and not self.op.vm_capable:
5606       pri, sec = cfg.GetNodeInstances(node)
5607       if pri or sec:
5608         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5609                                    " flag set to false, but it already holds"
5610                                    " instances" % node,
5611                                    errors.ECODE_STATE)
5612
5613     # check that the type of the node (single versus dual homed) is the
5614     # same as for the master
5615     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5616     master_singlehomed = myself.secondary_ip == myself.primary_ip
5617     newbie_singlehomed = secondary_ip == primary_ip
5618     if master_singlehomed != newbie_singlehomed:
5619       if master_singlehomed:
5620         raise errors.OpPrereqError("The master has no secondary ip but the"
5621                                    " new node has one",
5622                                    errors.ECODE_INVAL)
5623       else:
5624         raise errors.OpPrereqError("The master has a secondary ip but the"
5625                                    " new node doesn't have one",
5626                                    errors.ECODE_INVAL)
5627
5628     # checks reachability
5629     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5630       raise errors.OpPrereqError("Node not reachable by ping",
5631                                  errors.ECODE_ENVIRON)
5632
5633     if not newbie_singlehomed:
5634       # check reachability from my secondary ip to newbie's secondary ip
5635       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5636                            source=myself.secondary_ip):
5637         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5638                                    " based ping to node daemon port",
5639                                    errors.ECODE_ENVIRON)
5640
5641     if self.op.readd:
5642       exceptions = [node]
5643     else:
5644       exceptions = []
5645
5646     if self.op.master_capable:
5647       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5648     else:
5649       self.master_candidate = False
5650
5651     if self.op.readd:
5652       self.new_node = old_node
5653     else:
5654       node_group = cfg.LookupNodeGroup(self.op.group)
5655       self.new_node = objects.Node(name=node,
5656                                    primary_ip=primary_ip,
5657                                    secondary_ip=secondary_ip,
5658                                    master_candidate=self.master_candidate,
5659                                    offline=False, drained=False,
5660                                    group=node_group)
5661
5662     if self.op.ndparams:
5663       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5664
5665     if self.op.hv_state:
5666       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5667
5668     if self.op.disk_state:
5669       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5670
5671     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5672     #       it a property on the base class.
5673     result = rpc.DnsOnlyRunner().call_version([node])[node]
5674     result.Raise("Can't get version information from node %s" % node)
5675     if constants.PROTOCOL_VERSION == result.payload:
5676       logging.info("Communication to node %s fine, sw version %s match",
5677                    node, result.payload)
5678     else:
5679       raise errors.OpPrereqError("Version mismatch master version %s,"
5680                                  " node version %s" %
5681                                  (constants.PROTOCOL_VERSION, result.payload),
5682                                  errors.ECODE_ENVIRON)
5683
5684   def Exec(self, feedback_fn):
5685     """Adds the new node to the cluster.
5686
5687     """
5688     new_node = self.new_node
5689     node = new_node.name
5690
5691     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5692       "Not owning BGL"
5693
5694     # We adding a new node so we assume it's powered
5695     new_node.powered = True
5696
5697     # for re-adds, reset the offline/drained/master-candidate flags;
5698     # we need to reset here, otherwise offline would prevent RPC calls
5699     # later in the procedure; this also means that if the re-add
5700     # fails, we are left with a non-offlined, broken node
5701     if self.op.readd:
5702       new_node.drained = new_node.offline = False # pylint: disable=W0201
5703       self.LogInfo("Readding a node, the offline/drained flags were reset")
5704       # if we demote the node, we do cleanup later in the procedure
5705       new_node.master_candidate = self.master_candidate
5706       if self.changed_primary_ip:
5707         new_node.primary_ip = self.op.primary_ip
5708
5709     # copy the master/vm_capable flags
5710     for attr in self._NFLAGS:
5711       setattr(new_node, attr, getattr(self.op, attr))
5712
5713     # notify the user about any possible mc promotion
5714     if new_node.master_candidate:
5715       self.LogInfo("Node will be a master candidate")
5716
5717     if self.op.ndparams:
5718       new_node.ndparams = self.op.ndparams
5719     else:
5720       new_node.ndparams = {}
5721
5722     if self.op.hv_state:
5723       new_node.hv_state_static = self.new_hv_state
5724
5725     if self.op.disk_state:
5726       new_node.disk_state_static = self.new_disk_state
5727
5728     # Add node to our /etc/hosts, and add key to known_hosts
5729     if self.cfg.GetClusterInfo().modify_etc_hosts:
5730       master_node = self.cfg.GetMasterNode()
5731       result = self.rpc.call_etc_hosts_modify(master_node,
5732                                               constants.ETC_HOSTS_ADD,
5733                                               self.hostname.name,
5734                                               self.hostname.ip)
5735       result.Raise("Can't update hosts file with new host data")
5736
5737     if new_node.secondary_ip != new_node.primary_ip:
5738       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5739                                False)
5740
5741     node_verify_list = [self.cfg.GetMasterNode()]
5742     node_verify_param = {
5743       constants.NV_NODELIST: ([node], {}),
5744       # TODO: do a node-net-test as well?
5745     }
5746
5747     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5748                                        self.cfg.GetClusterName())
5749     for verifier in node_verify_list:
5750       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5751       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5752       if nl_payload:
5753         for failed in nl_payload:
5754           feedback_fn("ssh/hostname verification failed"
5755                       " (checking from %s): %s" %
5756                       (verifier, nl_payload[failed]))
5757         raise errors.OpExecError("ssh/hostname verification failed")
5758
5759     if self.op.readd:
5760       _RedistributeAncillaryFiles(self)
5761       self.context.ReaddNode(new_node)
5762       # make sure we redistribute the config
5763       self.cfg.Update(new_node, feedback_fn)
5764       # and make sure the new node will not have old files around
5765       if not new_node.master_candidate:
5766         result = self.rpc.call_node_demote_from_mc(new_node.name)
5767         msg = result.fail_msg
5768         if msg:
5769           self.LogWarning("Node failed to demote itself from master"
5770                           " candidate status: %s" % msg)
5771     else:
5772       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5773                                   additional_vm=self.op.vm_capable)
5774       self.context.AddNode(new_node, self.proc.GetECId())
5775
5776
5777 class LUNodeSetParams(LogicalUnit):
5778   """Modifies the parameters of a node.
5779
5780   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5781       to the node role (as _ROLE_*)
5782   @cvar _R2F: a dictionary from node role to tuples of flags
5783   @cvar _FLAGS: a list of attribute names corresponding to the flags
5784
5785   """
5786   HPATH = "node-modify"
5787   HTYPE = constants.HTYPE_NODE
5788   REQ_BGL = False
5789   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5790   _F2R = {
5791     (True, False, False): _ROLE_CANDIDATE,
5792     (False, True, False): _ROLE_DRAINED,
5793     (False, False, True): _ROLE_OFFLINE,
5794     (False, False, False): _ROLE_REGULAR,
5795     }
5796   _R2F = dict((v, k) for k, v in _F2R.items())
5797   _FLAGS = ["master_candidate", "drained", "offline"]
5798
5799   def CheckArguments(self):
5800     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5801     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5802                 self.op.master_capable, self.op.vm_capable,
5803                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5804                 self.op.disk_state]
5805     if all_mods.count(None) == len(all_mods):
5806       raise errors.OpPrereqError("Please pass at least one modification",
5807                                  errors.ECODE_INVAL)
5808     if all_mods.count(True) > 1:
5809       raise errors.OpPrereqError("Can't set the node into more than one"
5810                                  " state at the same time",
5811                                  errors.ECODE_INVAL)
5812
5813     # Boolean value that tells us whether we might be demoting from MC
5814     self.might_demote = (self.op.master_candidate == False or
5815                          self.op.offline == True or
5816                          self.op.drained == True or
5817                          self.op.master_capable == False)
5818
5819     if self.op.secondary_ip:
5820       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5821         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5822                                    " address" % self.op.secondary_ip,
5823                                    errors.ECODE_INVAL)
5824
5825     self.lock_all = self.op.auto_promote and self.might_demote
5826     self.lock_instances = self.op.secondary_ip is not None
5827
5828   def _InstanceFilter(self, instance):
5829     """Filter for getting affected instances.
5830
5831     """
5832     return (instance.disk_template in constants.DTS_INT_MIRROR and
5833             self.op.node_name in instance.all_nodes)
5834
5835   def ExpandNames(self):
5836     if self.lock_all:
5837       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5838     else:
5839       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5840
5841     # Since modifying a node can have severe effects on currently running
5842     # operations the resource lock is at least acquired in shared mode
5843     self.needed_locks[locking.LEVEL_NODE_RES] = \
5844       self.needed_locks[locking.LEVEL_NODE]
5845
5846     # Get node resource and instance locks in shared mode; they are not used
5847     # for anything but read-only access
5848     self.share_locks[locking.LEVEL_NODE_RES] = 1
5849     self.share_locks[locking.LEVEL_INSTANCE] = 1
5850
5851     if self.lock_instances:
5852       self.needed_locks[locking.LEVEL_INSTANCE] = \
5853         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5854
5855   def BuildHooksEnv(self):
5856     """Build hooks env.
5857
5858     This runs on the master node.
5859
5860     """
5861     return {
5862       "OP_TARGET": self.op.node_name,
5863       "MASTER_CANDIDATE": str(self.op.master_candidate),
5864       "OFFLINE": str(self.op.offline),
5865       "DRAINED": str(self.op.drained),
5866       "MASTER_CAPABLE": str(self.op.master_capable),
5867       "VM_CAPABLE": str(self.op.vm_capable),
5868       }
5869
5870   def BuildHooksNodes(self):
5871     """Build hooks nodes.
5872
5873     """
5874     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5875     return (nl, nl)
5876
5877   def CheckPrereq(self):
5878     """Check prerequisites.
5879
5880     This only checks the instance list against the existing names.
5881
5882     """
5883     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5884
5885     if self.lock_instances:
5886       affected_instances = \
5887         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5888
5889       # Verify instance locks
5890       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5891       wanted_instances = frozenset(affected_instances.keys())
5892       if wanted_instances - owned_instances:
5893         raise errors.OpPrereqError("Instances affected by changing node %s's"
5894                                    " secondary IP address have changed since"
5895                                    " locks were acquired, wanted '%s', have"
5896                                    " '%s'; retry the operation" %
5897                                    (self.op.node_name,
5898                                     utils.CommaJoin(wanted_instances),
5899                                     utils.CommaJoin(owned_instances)),
5900                                    errors.ECODE_STATE)
5901     else:
5902       affected_instances = None
5903
5904     if (self.op.master_candidate is not None or
5905         self.op.drained is not None or
5906         self.op.offline is not None):
5907       # we can't change the master's node flags
5908       if self.op.node_name == self.cfg.GetMasterNode():
5909         raise errors.OpPrereqError("The master role can be changed"
5910                                    " only via master-failover",
5911                                    errors.ECODE_INVAL)
5912
5913     if self.op.master_candidate and not node.master_capable:
5914       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5915                                  " it a master candidate" % node.name,
5916                                  errors.ECODE_STATE)
5917
5918     if self.op.vm_capable == False:
5919       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5920       if ipri or isec:
5921         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5922                                    " the vm_capable flag" % node.name,
5923                                    errors.ECODE_STATE)
5924
5925     if node.master_candidate and self.might_demote and not self.lock_all:
5926       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5927       # check if after removing the current node, we're missing master
5928       # candidates
5929       (mc_remaining, mc_should, _) = \
5930           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5931       if mc_remaining < mc_should:
5932         raise errors.OpPrereqError("Not enough master candidates, please"
5933                                    " pass auto promote option to allow"
5934                                    " promotion (--auto-promote or RAPI"
5935                                    " auto_promote=True)", errors.ECODE_STATE)
5936
5937     self.old_flags = old_flags = (node.master_candidate,
5938                                   node.drained, node.offline)
5939     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5940     self.old_role = old_role = self._F2R[old_flags]
5941
5942     # Check for ineffective changes
5943     for attr in self._FLAGS:
5944       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5945         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5946         setattr(self.op, attr, None)
5947
5948     # Past this point, any flag change to False means a transition
5949     # away from the respective state, as only real changes are kept
5950
5951     # TODO: We might query the real power state if it supports OOB
5952     if _SupportsOob(self.cfg, node):
5953       if self.op.offline is False and not (node.powered or
5954                                            self.op.powered == True):
5955         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5956                                     " offline status can be reset") %
5957                                    self.op.node_name)
5958     elif self.op.powered is not None:
5959       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5960                                   " as it does not support out-of-band"
5961                                   " handling") % self.op.node_name)
5962
5963     # If we're being deofflined/drained, we'll MC ourself if needed
5964     if (self.op.drained == False or self.op.offline == False or
5965         (self.op.master_capable and not node.master_capable)):
5966       if _DecideSelfPromotion(self):
5967         self.op.master_candidate = True
5968         self.LogInfo("Auto-promoting node to master candidate")
5969
5970     # If we're no longer master capable, we'll demote ourselves from MC
5971     if self.op.master_capable == False and node.master_candidate:
5972       self.LogInfo("Demoting from master candidate")
5973       self.op.master_candidate = False
5974
5975     # Compute new role
5976     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5977     if self.op.master_candidate:
5978       new_role = self._ROLE_CANDIDATE
5979     elif self.op.drained:
5980       new_role = self._ROLE_DRAINED
5981     elif self.op.offline:
5982       new_role = self._ROLE_OFFLINE
5983     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5984       # False is still in new flags, which means we're un-setting (the
5985       # only) True flag
5986       new_role = self._ROLE_REGULAR
5987     else: # no new flags, nothing, keep old role
5988       new_role = old_role
5989
5990     self.new_role = new_role
5991
5992     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5993       # Trying to transition out of offline status
5994       result = self.rpc.call_version([node.name])[node.name]
5995       if result.fail_msg:
5996         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5997                                    " to report its version: %s" %
5998                                    (node.name, result.fail_msg),
5999                                    errors.ECODE_STATE)
6000       else:
6001         self.LogWarning("Transitioning node from offline to online state"
6002                         " without using re-add. Please make sure the node"
6003                         " is healthy!")
6004
6005     if self.op.secondary_ip:
6006       # Ok even without locking, because this can't be changed by any LU
6007       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6008       master_singlehomed = master.secondary_ip == master.primary_ip
6009       if master_singlehomed and self.op.secondary_ip:
6010         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6011                                    " homed cluster", errors.ECODE_INVAL)
6012
6013       assert not (frozenset(affected_instances) -
6014                   self.owned_locks(locking.LEVEL_INSTANCE))
6015
6016       if node.offline:
6017         if affected_instances:
6018           raise errors.OpPrereqError("Cannot change secondary IP address:"
6019                                      " offline node has instances (%s)"
6020                                      " configured to use it" %
6021                                      utils.CommaJoin(affected_instances.keys()))
6022       else:
6023         # On online nodes, check that no instances are running, and that
6024         # the node has the new ip and we can reach it.
6025         for instance in affected_instances.values():
6026           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6027                               msg="cannot change secondary ip")
6028
6029         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6030         if master.name != node.name:
6031           # check reachability from master secondary ip to new secondary ip
6032           if not netutils.TcpPing(self.op.secondary_ip,
6033                                   constants.DEFAULT_NODED_PORT,
6034                                   source=master.secondary_ip):
6035             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6036                                        " based ping to node daemon port",
6037                                        errors.ECODE_ENVIRON)
6038
6039     if self.op.ndparams:
6040       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6041       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6042       self.new_ndparams = new_ndparams
6043
6044     if self.op.hv_state:
6045       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6046                                                  self.node.hv_state_static)
6047
6048     if self.op.disk_state:
6049       self.new_disk_state = \
6050         _MergeAndVerifyDiskState(self.op.disk_state,
6051                                  self.node.disk_state_static)
6052
6053   def Exec(self, feedback_fn):
6054     """Modifies a node.
6055
6056     """
6057     node = self.node
6058     old_role = self.old_role
6059     new_role = self.new_role
6060
6061     result = []
6062
6063     if self.op.ndparams:
6064       node.ndparams = self.new_ndparams
6065
6066     if self.op.powered is not None:
6067       node.powered = self.op.powered
6068
6069     if self.op.hv_state:
6070       node.hv_state_static = self.new_hv_state
6071
6072     if self.op.disk_state:
6073       node.disk_state_static = self.new_disk_state
6074
6075     for attr in ["master_capable", "vm_capable"]:
6076       val = getattr(self.op, attr)
6077       if val is not None:
6078         setattr(node, attr, val)
6079         result.append((attr, str(val)))
6080
6081     if new_role != old_role:
6082       # Tell the node to demote itself, if no longer MC and not offline
6083       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6084         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6085         if msg:
6086           self.LogWarning("Node failed to demote itself: %s", msg)
6087
6088       new_flags = self._R2F[new_role]
6089       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6090         if of != nf:
6091           result.append((desc, str(nf)))
6092       (node.master_candidate, node.drained, node.offline) = new_flags
6093
6094       # we locked all nodes, we adjust the CP before updating this node
6095       if self.lock_all:
6096         _AdjustCandidatePool(self, [node.name])
6097
6098     if self.op.secondary_ip:
6099       node.secondary_ip = self.op.secondary_ip
6100       result.append(("secondary_ip", self.op.secondary_ip))
6101
6102     # this will trigger configuration file update, if needed
6103     self.cfg.Update(node, feedback_fn)
6104
6105     # this will trigger job queue propagation or cleanup if the mc
6106     # flag changed
6107     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6108       self.context.ReaddNode(node)
6109
6110     return result
6111
6112
6113 class LUNodePowercycle(NoHooksLU):
6114   """Powercycles a node.
6115
6116   """
6117   REQ_BGL = False
6118
6119   def CheckArguments(self):
6120     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6121     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6122       raise errors.OpPrereqError("The node is the master and the force"
6123                                  " parameter was not set",
6124                                  errors.ECODE_INVAL)
6125
6126   def ExpandNames(self):
6127     """Locking for PowercycleNode.
6128
6129     This is a last-resort option and shouldn't block on other
6130     jobs. Therefore, we grab no locks.
6131
6132     """
6133     self.needed_locks = {}
6134
6135   def Exec(self, feedback_fn):
6136     """Reboots a node.
6137
6138     """
6139     result = self.rpc.call_node_powercycle(self.op.node_name,
6140                                            self.cfg.GetHypervisorType())
6141     result.Raise("Failed to schedule the reboot")
6142     return result.payload
6143
6144
6145 class LUClusterQuery(NoHooksLU):
6146   """Query cluster configuration.
6147
6148   """
6149   REQ_BGL = False
6150
6151   def ExpandNames(self):
6152     self.needed_locks = {}
6153
6154   def Exec(self, feedback_fn):
6155     """Return cluster config.
6156
6157     """
6158     cluster = self.cfg.GetClusterInfo()
6159     os_hvp = {}
6160
6161     # Filter just for enabled hypervisors
6162     for os_name, hv_dict in cluster.os_hvp.items():
6163       os_hvp[os_name] = {}
6164       for hv_name, hv_params in hv_dict.items():
6165         if hv_name in cluster.enabled_hypervisors:
6166           os_hvp[os_name][hv_name] = hv_params
6167
6168     # Convert ip_family to ip_version
6169     primary_ip_version = constants.IP4_VERSION
6170     if cluster.primary_ip_family == netutils.IP6Address.family:
6171       primary_ip_version = constants.IP6_VERSION
6172
6173     result = {
6174       "software_version": constants.RELEASE_VERSION,
6175       "protocol_version": constants.PROTOCOL_VERSION,
6176       "config_version": constants.CONFIG_VERSION,
6177       "os_api_version": max(constants.OS_API_VERSIONS),
6178       "export_version": constants.EXPORT_VERSION,
6179       "architecture": runtime.GetArchInfo(),
6180       "name": cluster.cluster_name,
6181       "master": cluster.master_node,
6182       "default_hypervisor": cluster.primary_hypervisor,
6183       "enabled_hypervisors": cluster.enabled_hypervisors,
6184       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6185                         for hypervisor_name in cluster.enabled_hypervisors]),
6186       "os_hvp": os_hvp,
6187       "beparams": cluster.beparams,
6188       "osparams": cluster.osparams,
6189       "ipolicy": cluster.ipolicy,
6190       "nicparams": cluster.nicparams,
6191       "ndparams": cluster.ndparams,
6192       "diskparams": cluster.diskparams,
6193       "candidate_pool_size": cluster.candidate_pool_size,
6194       "master_netdev": cluster.master_netdev,
6195       "master_netmask": cluster.master_netmask,
6196       "use_external_mip_script": cluster.use_external_mip_script,
6197       "volume_group_name": cluster.volume_group_name,
6198       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6199       "file_storage_dir": cluster.file_storage_dir,
6200       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6201       "maintain_node_health": cluster.maintain_node_health,
6202       "ctime": cluster.ctime,
6203       "mtime": cluster.mtime,
6204       "uuid": cluster.uuid,
6205       "tags": list(cluster.GetTags()),
6206       "uid_pool": cluster.uid_pool,
6207       "default_iallocator": cluster.default_iallocator,
6208       "reserved_lvs": cluster.reserved_lvs,
6209       "primary_ip_version": primary_ip_version,
6210       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6211       "hidden_os": cluster.hidden_os,
6212       "blacklisted_os": cluster.blacklisted_os,
6213       }
6214
6215     return result
6216
6217
6218 class LUClusterConfigQuery(NoHooksLU):
6219   """Return configuration values.
6220
6221   """
6222   REQ_BGL = False
6223
6224   def CheckArguments(self):
6225     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6226
6227   def ExpandNames(self):
6228     self.cq.ExpandNames(self)
6229
6230   def DeclareLocks(self, level):
6231     self.cq.DeclareLocks(self, level)
6232
6233   def Exec(self, feedback_fn):
6234     result = self.cq.OldStyleQuery(self)
6235
6236     assert len(result) == 1
6237
6238     return result[0]
6239
6240
6241 class _ClusterQuery(_QueryBase):
6242   FIELDS = query.CLUSTER_FIELDS
6243
6244   #: Do not sort (there is only one item)
6245   SORT_FIELD = None
6246
6247   def ExpandNames(self, lu):
6248     lu.needed_locks = {}
6249
6250     # The following variables interact with _QueryBase._GetNames
6251     self.wanted = locking.ALL_SET
6252     self.do_locking = self.use_locking
6253
6254     if self.do_locking:
6255       raise errors.OpPrereqError("Can not use locking for cluster queries",
6256                                  errors.ECODE_INVAL)
6257
6258   def DeclareLocks(self, lu, level):
6259     pass
6260
6261   def _GetQueryData(self, lu):
6262     """Computes the list of nodes and their attributes.
6263
6264     """
6265     # Locking is not used
6266     assert not (compat.any(lu.glm.is_owned(level)
6267                            for level in locking.LEVELS
6268                            if level != locking.LEVEL_CLUSTER) or
6269                 self.do_locking or self.use_locking)
6270
6271     if query.CQ_CONFIG in self.requested_data:
6272       cluster = lu.cfg.GetClusterInfo()
6273     else:
6274       cluster = NotImplemented
6275
6276     if query.CQ_QUEUE_DRAINED in self.requested_data:
6277       drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6278     else:
6279       drain_flag = NotImplemented
6280
6281     if query.CQ_WATCHER_PAUSE in self.requested_data:
6282       watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6283     else:
6284       watcher_pause = NotImplemented
6285
6286     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6287
6288
6289 class LUInstanceActivateDisks(NoHooksLU):
6290   """Bring up an instance's disks.
6291
6292   """
6293   REQ_BGL = False
6294
6295   def ExpandNames(self):
6296     self._ExpandAndLockInstance()
6297     self.needed_locks[locking.LEVEL_NODE] = []
6298     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6299
6300   def DeclareLocks(self, level):
6301     if level == locking.LEVEL_NODE:
6302       self._LockInstancesNodes()
6303
6304   def CheckPrereq(self):
6305     """Check prerequisites.
6306
6307     This checks that the instance is in the cluster.
6308
6309     """
6310     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6311     assert self.instance is not None, \
6312       "Cannot retrieve locked instance %s" % self.op.instance_name
6313     _CheckNodeOnline(self, self.instance.primary_node)
6314
6315   def Exec(self, feedback_fn):
6316     """Activate the disks.
6317
6318     """
6319     disks_ok, disks_info = \
6320               _AssembleInstanceDisks(self, self.instance,
6321                                      ignore_size=self.op.ignore_size)
6322     if not disks_ok:
6323       raise errors.OpExecError("Cannot activate block devices")
6324
6325     return disks_info
6326
6327
6328 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6329                            ignore_size=False):
6330   """Prepare the block devices for an instance.
6331
6332   This sets up the block devices on all nodes.
6333
6334   @type lu: L{LogicalUnit}
6335   @param lu: the logical unit on whose behalf we execute
6336   @type instance: L{objects.Instance}
6337   @param instance: the instance for whose disks we assemble
6338   @type disks: list of L{objects.Disk} or None
6339   @param disks: which disks to assemble (or all, if None)
6340   @type ignore_secondaries: boolean
6341   @param ignore_secondaries: if true, errors on secondary nodes
6342       won't result in an error return from the function
6343   @type ignore_size: boolean
6344   @param ignore_size: if true, the current known size of the disk
6345       will not be used during the disk activation, useful for cases
6346       when the size is wrong
6347   @return: False if the operation failed, otherwise a list of
6348       (host, instance_visible_name, node_visible_name)
6349       with the mapping from node devices to instance devices
6350
6351   """
6352   device_info = []
6353   disks_ok = True
6354   iname = instance.name
6355   disks = _ExpandCheckDisks(instance, disks)
6356
6357   # With the two passes mechanism we try to reduce the window of
6358   # opportunity for the race condition of switching DRBD to primary
6359   # before handshaking occured, but we do not eliminate it
6360
6361   # The proper fix would be to wait (with some limits) until the
6362   # connection has been made and drbd transitions from WFConnection
6363   # into any other network-connected state (Connected, SyncTarget,
6364   # SyncSource, etc.)
6365
6366   # 1st pass, assemble on all nodes in secondary mode
6367   for idx, inst_disk in enumerate(disks):
6368     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6369       if ignore_size:
6370         node_disk = node_disk.Copy()
6371         node_disk.UnsetSize()
6372       lu.cfg.SetDiskID(node_disk, node)
6373       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6374                                              False, idx)
6375       msg = result.fail_msg
6376       if msg:
6377         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6378                            " (is_primary=False, pass=1): %s",
6379                            inst_disk.iv_name, node, msg)
6380         if not ignore_secondaries:
6381           disks_ok = False
6382
6383   # FIXME: race condition on drbd migration to primary
6384
6385   # 2nd pass, do only the primary node
6386   for idx, inst_disk in enumerate(disks):
6387     dev_path = None
6388
6389     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6390       if node != instance.primary_node:
6391         continue
6392       if ignore_size:
6393         node_disk = node_disk.Copy()
6394         node_disk.UnsetSize()
6395       lu.cfg.SetDiskID(node_disk, node)
6396       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6397                                              True, idx)
6398       msg = result.fail_msg
6399       if msg:
6400         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6401                            " (is_primary=True, pass=2): %s",
6402                            inst_disk.iv_name, node, msg)
6403         disks_ok = False
6404       else:
6405         dev_path = result.payload
6406
6407     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6408
6409   # leave the disks configured for the primary node
6410   # this is a workaround that would be fixed better by
6411   # improving the logical/physical id handling
6412   for disk in disks:
6413     lu.cfg.SetDiskID(disk, instance.primary_node)
6414
6415   return disks_ok, device_info
6416
6417
6418 def _StartInstanceDisks(lu, instance, force):
6419   """Start the disks of an instance.
6420
6421   """
6422   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6423                                            ignore_secondaries=force)
6424   if not disks_ok:
6425     _ShutdownInstanceDisks(lu, instance)
6426     if force is not None and not force:
6427       lu.proc.LogWarning("", hint="If the message above refers to a"
6428                          " secondary node,"
6429                          " you can retry the operation using '--force'.")
6430     raise errors.OpExecError("Disk consistency error")
6431
6432
6433 class LUInstanceDeactivateDisks(NoHooksLU):
6434   """Shutdown an instance's disks.
6435
6436   """
6437   REQ_BGL = False
6438
6439   def ExpandNames(self):
6440     self._ExpandAndLockInstance()
6441     self.needed_locks[locking.LEVEL_NODE] = []
6442     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6443
6444   def DeclareLocks(self, level):
6445     if level == locking.LEVEL_NODE:
6446       self._LockInstancesNodes()
6447
6448   def CheckPrereq(self):
6449     """Check prerequisites.
6450
6451     This checks that the instance is in the cluster.
6452
6453     """
6454     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6455     assert self.instance is not None, \
6456       "Cannot retrieve locked instance %s" % self.op.instance_name
6457
6458   def Exec(self, feedback_fn):
6459     """Deactivate the disks
6460
6461     """
6462     instance = self.instance
6463     if self.op.force:
6464       _ShutdownInstanceDisks(self, instance)
6465     else:
6466       _SafeShutdownInstanceDisks(self, instance)
6467
6468
6469 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6470   """Shutdown block devices of an instance.
6471
6472   This function checks if an instance is running, before calling
6473   _ShutdownInstanceDisks.
6474
6475   """
6476   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6477   _ShutdownInstanceDisks(lu, instance, disks=disks)
6478
6479
6480 def _ExpandCheckDisks(instance, disks):
6481   """Return the instance disks selected by the disks list
6482
6483   @type disks: list of L{objects.Disk} or None
6484   @param disks: selected disks
6485   @rtype: list of L{objects.Disk}
6486   @return: selected instance disks to act on
6487
6488   """
6489   if disks is None:
6490     return instance.disks
6491   else:
6492     if not set(disks).issubset(instance.disks):
6493       raise errors.ProgrammerError("Can only act on disks belonging to the"
6494                                    " target instance")
6495     return disks
6496
6497
6498 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6499   """Shutdown block devices of an instance.
6500
6501   This does the shutdown on all nodes of the instance.
6502
6503   If the ignore_primary is false, errors on the primary node are
6504   ignored.
6505
6506   """
6507   all_result = True
6508   disks = _ExpandCheckDisks(instance, disks)
6509
6510   for disk in disks:
6511     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6512       lu.cfg.SetDiskID(top_disk, node)
6513       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6514       msg = result.fail_msg
6515       if msg:
6516         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6517                       disk.iv_name, node, msg)
6518         if ((node == instance.primary_node and not ignore_primary) or
6519             (node != instance.primary_node and not result.offline)):
6520           all_result = False
6521   return all_result
6522
6523
6524 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6525   """Checks if a node has enough free memory.
6526
6527   This function check if a given node has the needed amount of free
6528   memory. In case the node has less memory or we cannot get the
6529   information from the node, this function raise an OpPrereqError
6530   exception.
6531
6532   @type lu: C{LogicalUnit}
6533   @param lu: a logical unit from which we get configuration data
6534   @type node: C{str}
6535   @param node: the node to check
6536   @type reason: C{str}
6537   @param reason: string to use in the error message
6538   @type requested: C{int}
6539   @param requested: the amount of memory in MiB to check for
6540   @type hypervisor_name: C{str}
6541   @param hypervisor_name: the hypervisor to ask for memory stats
6542   @rtype: integer
6543   @return: node current free memory
6544   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6545       we cannot check the node
6546
6547   """
6548   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6549   nodeinfo[node].Raise("Can't get data from node %s" % node,
6550                        prereq=True, ecode=errors.ECODE_ENVIRON)
6551   (_, _, (hv_info, )) = nodeinfo[node].payload
6552
6553   free_mem = hv_info.get("memory_free", None)
6554   if not isinstance(free_mem, int):
6555     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6556                                " was '%s'" % (node, free_mem),
6557                                errors.ECODE_ENVIRON)
6558   if requested > free_mem:
6559     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6560                                " needed %s MiB, available %s MiB" %
6561                                (node, reason, requested, free_mem),
6562                                errors.ECODE_NORES)
6563   return free_mem
6564
6565
6566 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6567   """Checks if nodes have enough free disk space in the all VGs.
6568
6569   This function check if all given nodes have the needed amount of
6570   free disk. In case any node has less disk or we cannot get the
6571   information from the node, this function raise an OpPrereqError
6572   exception.
6573
6574   @type lu: C{LogicalUnit}
6575   @param lu: a logical unit from which we get configuration data
6576   @type nodenames: C{list}
6577   @param nodenames: the list of node names to check
6578   @type req_sizes: C{dict}
6579   @param req_sizes: the hash of vg and corresponding amount of disk in
6580       MiB to check for
6581   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6582       or we cannot check the node
6583
6584   """
6585   for vg, req_size in req_sizes.items():
6586     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6587
6588
6589 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6590   """Checks if nodes have enough free disk space in the specified VG.
6591
6592   This function check if all given nodes have the needed amount of
6593   free disk. In case any node has less disk or we cannot get the
6594   information from the node, this function raise an OpPrereqError
6595   exception.
6596
6597   @type lu: C{LogicalUnit}
6598   @param lu: a logical unit from which we get configuration data
6599   @type nodenames: C{list}
6600   @param nodenames: the list of node names to check
6601   @type vg: C{str}
6602   @param vg: the volume group to check
6603   @type requested: C{int}
6604   @param requested: the amount of disk in MiB to check for
6605   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6606       or we cannot check the node
6607
6608   """
6609   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6610   for node in nodenames:
6611     info = nodeinfo[node]
6612     info.Raise("Cannot get current information from node %s" % node,
6613                prereq=True, ecode=errors.ECODE_ENVIRON)
6614     (_, (vg_info, ), _) = info.payload
6615     vg_free = vg_info.get("vg_free", None)
6616     if not isinstance(vg_free, int):
6617       raise errors.OpPrereqError("Can't compute free disk space on node"
6618                                  " %s for vg %s, result was '%s'" %
6619                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6620     if requested > vg_free:
6621       raise errors.OpPrereqError("Not enough disk space on target node %s"
6622                                  " vg %s: required %d MiB, available %d MiB" %
6623                                  (node, vg, requested, vg_free),
6624                                  errors.ECODE_NORES)
6625
6626
6627 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6628   """Checks if nodes have enough physical CPUs
6629
6630   This function checks if all given nodes have the needed number of
6631   physical CPUs. In case any node has less CPUs or we cannot get the
6632   information from the node, this function raises an OpPrereqError
6633   exception.
6634
6635   @type lu: C{LogicalUnit}
6636   @param lu: a logical unit from which we get configuration data
6637   @type nodenames: C{list}
6638   @param nodenames: the list of node names to check
6639   @type requested: C{int}
6640   @param requested: the minimum acceptable number of physical CPUs
6641   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6642       or we cannot check the node
6643
6644   """
6645   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6646   for node in nodenames:
6647     info = nodeinfo[node]
6648     info.Raise("Cannot get current information from node %s" % node,
6649                prereq=True, ecode=errors.ECODE_ENVIRON)
6650     (_, _, (hv_info, )) = info.payload
6651     num_cpus = hv_info.get("cpu_total", None)
6652     if not isinstance(num_cpus, int):
6653       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6654                                  " on node %s, result was '%s'" %
6655                                  (node, num_cpus), errors.ECODE_ENVIRON)
6656     if requested > num_cpus:
6657       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6658                                  "required" % (node, num_cpus, requested),
6659                                  errors.ECODE_NORES)
6660
6661
6662 class LUInstanceStartup(LogicalUnit):
6663   """Starts an instance.
6664
6665   """
6666   HPATH = "instance-start"
6667   HTYPE = constants.HTYPE_INSTANCE
6668   REQ_BGL = False
6669
6670   def CheckArguments(self):
6671     # extra beparams
6672     if self.op.beparams:
6673       # fill the beparams dict
6674       objects.UpgradeBeParams(self.op.beparams)
6675       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6676
6677   def ExpandNames(self):
6678     self._ExpandAndLockInstance()
6679     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6680
6681   def DeclareLocks(self, level):
6682     if level == locking.LEVEL_NODE_RES:
6683       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6684
6685   def BuildHooksEnv(self):
6686     """Build hooks env.
6687
6688     This runs on master, primary and secondary nodes of the instance.
6689
6690     """
6691     env = {
6692       "FORCE": self.op.force,
6693       }
6694
6695     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6696
6697     return env
6698
6699   def BuildHooksNodes(self):
6700     """Build hooks nodes.
6701
6702     """
6703     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6704     return (nl, nl)
6705
6706   def CheckPrereq(self):
6707     """Check prerequisites.
6708
6709     This checks that the instance is in the cluster.
6710
6711     """
6712     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6713     assert self.instance is not None, \
6714       "Cannot retrieve locked instance %s" % self.op.instance_name
6715
6716     # extra hvparams
6717     if self.op.hvparams:
6718       # check hypervisor parameter syntax (locally)
6719       cluster = self.cfg.GetClusterInfo()
6720       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6721       filled_hvp = cluster.FillHV(instance)
6722       filled_hvp.update(self.op.hvparams)
6723       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6724       hv_type.CheckParameterSyntax(filled_hvp)
6725       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6726
6727     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6728
6729     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6730
6731     if self.primary_offline and self.op.ignore_offline_nodes:
6732       self.proc.LogWarning("Ignoring offline primary node")
6733
6734       if self.op.hvparams or self.op.beparams:
6735         self.proc.LogWarning("Overridden parameters are ignored")
6736     else:
6737       _CheckNodeOnline(self, instance.primary_node)
6738
6739       bep = self.cfg.GetClusterInfo().FillBE(instance)
6740       bep.update(self.op.beparams)
6741
6742       # check bridges existence
6743       _CheckInstanceBridgesExist(self, instance)
6744
6745       remote_info = self.rpc.call_instance_info(instance.primary_node,
6746                                                 instance.name,
6747                                                 instance.hypervisor)
6748       remote_info.Raise("Error checking node %s" % instance.primary_node,
6749                         prereq=True, ecode=errors.ECODE_ENVIRON)
6750       if not remote_info.payload: # not running already
6751         _CheckNodeFreeMemory(self, instance.primary_node,
6752                              "starting instance %s" % instance.name,
6753                              bep[constants.BE_MINMEM], instance.hypervisor)
6754
6755   def Exec(self, feedback_fn):
6756     """Start the instance.
6757
6758     """
6759     instance = self.instance
6760     force = self.op.force
6761
6762     if not self.op.no_remember:
6763       self.cfg.MarkInstanceUp(instance.name)
6764
6765     if self.primary_offline:
6766       assert self.op.ignore_offline_nodes
6767       self.proc.LogInfo("Primary node offline, marked instance as started")
6768     else:
6769       node_current = instance.primary_node
6770
6771       _StartInstanceDisks(self, instance, force)
6772
6773       result = \
6774         self.rpc.call_instance_start(node_current,
6775                                      (instance, self.op.hvparams,
6776                                       self.op.beparams),
6777                                      self.op.startup_paused)
6778       msg = result.fail_msg
6779       if msg:
6780         _ShutdownInstanceDisks(self, instance)
6781         raise errors.OpExecError("Could not start instance: %s" % msg)
6782
6783
6784 class LUInstanceReboot(LogicalUnit):
6785   """Reboot an instance.
6786
6787   """
6788   HPATH = "instance-reboot"
6789   HTYPE = constants.HTYPE_INSTANCE
6790   REQ_BGL = False
6791
6792   def ExpandNames(self):
6793     self._ExpandAndLockInstance()
6794
6795   def BuildHooksEnv(self):
6796     """Build hooks env.
6797
6798     This runs on master, primary and secondary nodes of the instance.
6799
6800     """
6801     env = {
6802       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6803       "REBOOT_TYPE": self.op.reboot_type,
6804       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6805       }
6806
6807     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6808
6809     return env
6810
6811   def BuildHooksNodes(self):
6812     """Build hooks nodes.
6813
6814     """
6815     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6816     return (nl, nl)
6817
6818   def CheckPrereq(self):
6819     """Check prerequisites.
6820
6821     This checks that the instance is in the cluster.
6822
6823     """
6824     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6825     assert self.instance is not None, \
6826       "Cannot retrieve locked instance %s" % self.op.instance_name
6827     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6828     _CheckNodeOnline(self, instance.primary_node)
6829
6830     # check bridges existence
6831     _CheckInstanceBridgesExist(self, instance)
6832
6833   def Exec(self, feedback_fn):
6834     """Reboot the instance.
6835
6836     """
6837     instance = self.instance
6838     ignore_secondaries = self.op.ignore_secondaries
6839     reboot_type = self.op.reboot_type
6840
6841     remote_info = self.rpc.call_instance_info(instance.primary_node,
6842                                               instance.name,
6843                                               instance.hypervisor)
6844     remote_info.Raise("Error checking node %s" % instance.primary_node)
6845     instance_running = bool(remote_info.payload)
6846
6847     node_current = instance.primary_node
6848
6849     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6850                                             constants.INSTANCE_REBOOT_HARD]:
6851       for disk in instance.disks:
6852         self.cfg.SetDiskID(disk, node_current)
6853       result = self.rpc.call_instance_reboot(node_current, instance,
6854                                              reboot_type,
6855                                              self.op.shutdown_timeout)
6856       result.Raise("Could not reboot instance")
6857     else:
6858       if instance_running:
6859         result = self.rpc.call_instance_shutdown(node_current, instance,
6860                                                  self.op.shutdown_timeout)
6861         result.Raise("Could not shutdown instance for full reboot")
6862         _ShutdownInstanceDisks(self, instance)
6863       else:
6864         self.LogInfo("Instance %s was already stopped, starting now",
6865                      instance.name)
6866       _StartInstanceDisks(self, instance, ignore_secondaries)
6867       result = self.rpc.call_instance_start(node_current,
6868                                             (instance, None, None), False)
6869       msg = result.fail_msg
6870       if msg:
6871         _ShutdownInstanceDisks(self, instance)
6872         raise errors.OpExecError("Could not start instance for"
6873                                  " full reboot: %s" % msg)
6874
6875     self.cfg.MarkInstanceUp(instance.name)
6876
6877
6878 class LUInstanceShutdown(LogicalUnit):
6879   """Shutdown an instance.
6880
6881   """
6882   HPATH = "instance-stop"
6883   HTYPE = constants.HTYPE_INSTANCE
6884   REQ_BGL = False
6885
6886   def ExpandNames(self):
6887     self._ExpandAndLockInstance()
6888
6889   def BuildHooksEnv(self):
6890     """Build hooks env.
6891
6892     This runs on master, primary and secondary nodes of the instance.
6893
6894     """
6895     env = _BuildInstanceHookEnvByObject(self, self.instance)
6896     env["TIMEOUT"] = self.op.timeout
6897     return env
6898
6899   def BuildHooksNodes(self):
6900     """Build hooks nodes.
6901
6902     """
6903     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6904     return (nl, nl)
6905
6906   def CheckPrereq(self):
6907     """Check prerequisites.
6908
6909     This checks that the instance is in the cluster.
6910
6911     """
6912     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6913     assert self.instance is not None, \
6914       "Cannot retrieve locked instance %s" % self.op.instance_name
6915
6916     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6917
6918     self.primary_offline = \
6919       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6920
6921     if self.primary_offline and self.op.ignore_offline_nodes:
6922       self.proc.LogWarning("Ignoring offline primary node")
6923     else:
6924       _CheckNodeOnline(self, self.instance.primary_node)
6925
6926   def Exec(self, feedback_fn):
6927     """Shutdown the instance.
6928
6929     """
6930     instance = self.instance
6931     node_current = instance.primary_node
6932     timeout = self.op.timeout
6933
6934     if not self.op.no_remember:
6935       self.cfg.MarkInstanceDown(instance.name)
6936
6937     if self.primary_offline:
6938       assert self.op.ignore_offline_nodes
6939       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6940     else:
6941       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6942       msg = result.fail_msg
6943       if msg:
6944         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6945
6946       _ShutdownInstanceDisks(self, instance)
6947
6948
6949 class LUInstanceReinstall(LogicalUnit):
6950   """Reinstall an instance.
6951
6952   """
6953   HPATH = "instance-reinstall"
6954   HTYPE = constants.HTYPE_INSTANCE
6955   REQ_BGL = False
6956
6957   def ExpandNames(self):
6958     self._ExpandAndLockInstance()
6959
6960   def BuildHooksEnv(self):
6961     """Build hooks env.
6962
6963     This runs on master, primary and secondary nodes of the instance.
6964
6965     """
6966     return _BuildInstanceHookEnvByObject(self, self.instance)
6967
6968   def BuildHooksNodes(self):
6969     """Build hooks nodes.
6970
6971     """
6972     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6973     return (nl, nl)
6974
6975   def CheckPrereq(self):
6976     """Check prerequisites.
6977
6978     This checks that the instance is in the cluster and is not running.
6979
6980     """
6981     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6982     assert instance is not None, \
6983       "Cannot retrieve locked instance %s" % self.op.instance_name
6984     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6985                      " offline, cannot reinstall")
6986     for node in instance.secondary_nodes:
6987       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6988                        " cannot reinstall")
6989
6990     if instance.disk_template == constants.DT_DISKLESS:
6991       raise errors.OpPrereqError("Instance '%s' has no disks" %
6992                                  self.op.instance_name,
6993                                  errors.ECODE_INVAL)
6994     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6995
6996     if self.op.os_type is not None:
6997       # OS verification
6998       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6999       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7000       instance_os = self.op.os_type
7001     else:
7002       instance_os = instance.os
7003
7004     nodelist = list(instance.all_nodes)
7005
7006     if self.op.osparams:
7007       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7008       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7009       self.os_inst = i_osdict # the new dict (without defaults)
7010     else:
7011       self.os_inst = None
7012
7013     self.instance = instance
7014
7015   def Exec(self, feedback_fn):
7016     """Reinstall the instance.
7017
7018     """
7019     inst = self.instance
7020
7021     if self.op.os_type is not None:
7022       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7023       inst.os = self.op.os_type
7024       # Write to configuration
7025       self.cfg.Update(inst, feedback_fn)
7026
7027     _StartInstanceDisks(self, inst, None)
7028     try:
7029       feedback_fn("Running the instance OS create scripts...")
7030       # FIXME: pass debug option from opcode to backend
7031       result = self.rpc.call_instance_os_add(inst.primary_node,
7032                                              (inst, self.os_inst), True,
7033                                              self.op.debug_level)
7034       result.Raise("Could not install OS for instance %s on node %s" %
7035                    (inst.name, inst.primary_node))
7036     finally:
7037       _ShutdownInstanceDisks(self, inst)
7038
7039
7040 class LUInstanceRecreateDisks(LogicalUnit):
7041   """Recreate an instance's missing disks.
7042
7043   """
7044   HPATH = "instance-recreate-disks"
7045   HTYPE = constants.HTYPE_INSTANCE
7046   REQ_BGL = False
7047
7048   _MODIFYABLE = frozenset([
7049     constants.IDISK_SIZE,
7050     constants.IDISK_MODE,
7051     ])
7052
7053   # New or changed disk parameters may have different semantics
7054   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7055     constants.IDISK_ADOPT,
7056
7057     # TODO: Implement support changing VG while recreating
7058     constants.IDISK_VG,
7059     constants.IDISK_METAVG,
7060     ]))
7061
7062   def CheckArguments(self):
7063     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7064       # Normalize and convert deprecated list of disk indices
7065       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7066
7067     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7068     if duplicates:
7069       raise errors.OpPrereqError("Some disks have been specified more than"
7070                                  " once: %s" % utils.CommaJoin(duplicates),
7071                                  errors.ECODE_INVAL)
7072
7073     for (idx, params) in self.op.disks:
7074       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7075       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7076       if unsupported:
7077         raise errors.OpPrereqError("Parameters for disk %s try to change"
7078                                    " unmodifyable parameter(s): %s" %
7079                                    (idx, utils.CommaJoin(unsupported)),
7080                                    errors.ECODE_INVAL)
7081
7082   def ExpandNames(self):
7083     self._ExpandAndLockInstance()
7084     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7085     if self.op.nodes:
7086       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7087       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7088     else:
7089       self.needed_locks[locking.LEVEL_NODE] = []
7090     self.needed_locks[locking.LEVEL_NODE_RES] = []
7091
7092   def DeclareLocks(self, level):
7093     if level == locking.LEVEL_NODE:
7094       # if we replace the nodes, we only need to lock the old primary,
7095       # otherwise we need to lock all nodes for disk re-creation
7096       primary_only = bool(self.op.nodes)
7097       self._LockInstancesNodes(primary_only=primary_only)
7098     elif level == locking.LEVEL_NODE_RES:
7099       # Copy node locks
7100       self.needed_locks[locking.LEVEL_NODE_RES] = \
7101         self.needed_locks[locking.LEVEL_NODE][:]
7102
7103   def BuildHooksEnv(self):
7104     """Build hooks env.
7105
7106     This runs on master, primary and secondary nodes of the instance.
7107
7108     """
7109     return _BuildInstanceHookEnvByObject(self, self.instance)
7110
7111   def BuildHooksNodes(self):
7112     """Build hooks nodes.
7113
7114     """
7115     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7116     return (nl, nl)
7117
7118   def CheckPrereq(self):
7119     """Check prerequisites.
7120
7121     This checks that the instance is in the cluster and is not running.
7122
7123     """
7124     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7125     assert instance is not None, \
7126       "Cannot retrieve locked instance %s" % self.op.instance_name
7127     if self.op.nodes:
7128       if len(self.op.nodes) != len(instance.all_nodes):
7129         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7130                                    " %d replacement nodes were specified" %
7131                                    (instance.name, len(instance.all_nodes),
7132                                     len(self.op.nodes)),
7133                                    errors.ECODE_INVAL)
7134       assert instance.disk_template != constants.DT_DRBD8 or \
7135           len(self.op.nodes) == 2
7136       assert instance.disk_template != constants.DT_PLAIN or \
7137           len(self.op.nodes) == 1
7138       primary_node = self.op.nodes[0]
7139     else:
7140       primary_node = instance.primary_node
7141     _CheckNodeOnline(self, primary_node)
7142
7143     if instance.disk_template == constants.DT_DISKLESS:
7144       raise errors.OpPrereqError("Instance '%s' has no disks" %
7145                                  self.op.instance_name, errors.ECODE_INVAL)
7146
7147     # if we replace nodes *and* the old primary is offline, we don't
7148     # check
7149     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7150     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7151     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7152     if not (self.op.nodes and old_pnode.offline):
7153       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7154                           msg="cannot recreate disks")
7155
7156     if self.op.disks:
7157       self.disks = dict(self.op.disks)
7158     else:
7159       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7160
7161     maxidx = max(self.disks.keys())
7162     if maxidx >= len(instance.disks):
7163       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7164                                  errors.ECODE_INVAL)
7165
7166     if (self.op.nodes and
7167         sorted(self.disks.keys()) != range(len(instance.disks))):
7168       raise errors.OpPrereqError("Can't recreate disks partially and"
7169                                  " change the nodes at the same time",
7170                                  errors.ECODE_INVAL)
7171
7172     self.instance = instance
7173
7174   def Exec(self, feedback_fn):
7175     """Recreate the disks.
7176
7177     """
7178     instance = self.instance
7179
7180     assert (self.owned_locks(locking.LEVEL_NODE) ==
7181             self.owned_locks(locking.LEVEL_NODE_RES))
7182
7183     to_skip = []
7184     mods = [] # keeps track of needed changes
7185
7186     for idx, disk in enumerate(instance.disks):
7187       try:
7188         changes = self.disks[idx]
7189       except KeyError:
7190         # Disk should not be recreated
7191         to_skip.append(idx)
7192         continue
7193
7194       # update secondaries for disks, if needed
7195       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7196         # need to update the nodes and minors
7197         assert len(self.op.nodes) == 2
7198         assert len(disk.logical_id) == 6 # otherwise disk internals
7199                                          # have changed
7200         (_, _, old_port, _, _, old_secret) = disk.logical_id
7201         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7202         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7203                   new_minors[0], new_minors[1], old_secret)
7204         assert len(disk.logical_id) == len(new_id)
7205       else:
7206         new_id = None
7207
7208       mods.append((idx, new_id, changes))
7209
7210     # now that we have passed all asserts above, we can apply the mods
7211     # in a single run (to avoid partial changes)
7212     for idx, new_id, changes in mods:
7213       disk = instance.disks[idx]
7214       if new_id is not None:
7215         assert disk.dev_type == constants.LD_DRBD8
7216         disk.logical_id = new_id
7217       if changes:
7218         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7219                     mode=changes.get(constants.IDISK_MODE, None))
7220
7221     # change primary node, if needed
7222     if self.op.nodes:
7223       instance.primary_node = self.op.nodes[0]
7224       self.LogWarning("Changing the instance's nodes, you will have to"
7225                       " remove any disks left on the older nodes manually")
7226
7227     if self.op.nodes:
7228       self.cfg.Update(instance, feedback_fn)
7229
7230     _CreateDisks(self, instance, to_skip=to_skip)
7231
7232
7233 class LUInstanceRename(LogicalUnit):
7234   """Rename an instance.
7235
7236   """
7237   HPATH = "instance-rename"
7238   HTYPE = constants.HTYPE_INSTANCE
7239
7240   def CheckArguments(self):
7241     """Check arguments.
7242
7243     """
7244     if self.op.ip_check and not self.op.name_check:
7245       # TODO: make the ip check more flexible and not depend on the name check
7246       raise errors.OpPrereqError("IP address check requires a name check",
7247                                  errors.ECODE_INVAL)
7248
7249   def BuildHooksEnv(self):
7250     """Build hooks env.
7251
7252     This runs on master, primary and secondary nodes of the instance.
7253
7254     """
7255     env = _BuildInstanceHookEnvByObject(self, self.instance)
7256     env["INSTANCE_NEW_NAME"] = self.op.new_name
7257     return env
7258
7259   def BuildHooksNodes(self):
7260     """Build hooks nodes.
7261
7262     """
7263     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7264     return (nl, nl)
7265
7266   def CheckPrereq(self):
7267     """Check prerequisites.
7268
7269     This checks that the instance is in the cluster and is not running.
7270
7271     """
7272     self.op.instance_name = _ExpandInstanceName(self.cfg,
7273                                                 self.op.instance_name)
7274     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7275     assert instance is not None
7276     _CheckNodeOnline(self, instance.primary_node)
7277     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7278                         msg="cannot rename")
7279     self.instance = instance
7280
7281     new_name = self.op.new_name
7282     if self.op.name_check:
7283       hostname = netutils.GetHostname(name=new_name)
7284       if hostname.name != new_name:
7285         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7286                      hostname.name)
7287       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7288         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7289                                     " same as given hostname '%s'") %
7290                                     (hostname.name, self.op.new_name),
7291                                     errors.ECODE_INVAL)
7292       new_name = self.op.new_name = hostname.name
7293       if (self.op.ip_check and
7294           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7295         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7296                                    (hostname.ip, new_name),
7297                                    errors.ECODE_NOTUNIQUE)
7298
7299     instance_list = self.cfg.GetInstanceList()
7300     if new_name in instance_list and new_name != instance.name:
7301       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7302                                  new_name, errors.ECODE_EXISTS)
7303
7304   def Exec(self, feedback_fn):
7305     """Rename the instance.
7306
7307     """
7308     inst = self.instance
7309     old_name = inst.name
7310
7311     rename_file_storage = False
7312     if (inst.disk_template in constants.DTS_FILEBASED and
7313         self.op.new_name != inst.name):
7314       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7315       rename_file_storage = True
7316
7317     self.cfg.RenameInstance(inst.name, self.op.new_name)
7318     # Change the instance lock. This is definitely safe while we hold the BGL.
7319     # Otherwise the new lock would have to be added in acquired mode.
7320     assert self.REQ_BGL
7321     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7322     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7323
7324     # re-read the instance from the configuration after rename
7325     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7326
7327     if rename_file_storage:
7328       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7329       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7330                                                      old_file_storage_dir,
7331                                                      new_file_storage_dir)
7332       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7333                    " (but the instance has been renamed in Ganeti)" %
7334                    (inst.primary_node, old_file_storage_dir,
7335                     new_file_storage_dir))
7336
7337     _StartInstanceDisks(self, inst, None)
7338     try:
7339       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7340                                                  old_name, self.op.debug_level)
7341       msg = result.fail_msg
7342       if msg:
7343         msg = ("Could not run OS rename script for instance %s on node %s"
7344                " (but the instance has been renamed in Ganeti): %s" %
7345                (inst.name, inst.primary_node, msg))
7346         self.proc.LogWarning(msg)
7347     finally:
7348       _ShutdownInstanceDisks(self, inst)
7349
7350     return inst.name
7351
7352
7353 class LUInstanceRemove(LogicalUnit):
7354   """Remove an instance.
7355
7356   """
7357   HPATH = "instance-remove"
7358   HTYPE = constants.HTYPE_INSTANCE
7359   REQ_BGL = False
7360
7361   def ExpandNames(self):
7362     self._ExpandAndLockInstance()
7363     self.needed_locks[locking.LEVEL_NODE] = []
7364     self.needed_locks[locking.LEVEL_NODE_RES] = []
7365     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7366
7367   def DeclareLocks(self, level):
7368     if level == locking.LEVEL_NODE:
7369       self._LockInstancesNodes()
7370     elif level == locking.LEVEL_NODE_RES:
7371       # Copy node locks
7372       self.needed_locks[locking.LEVEL_NODE_RES] = \
7373         self.needed_locks[locking.LEVEL_NODE][:]
7374
7375   def BuildHooksEnv(self):
7376     """Build hooks env.
7377
7378     This runs on master, primary and secondary nodes of the instance.
7379
7380     """
7381     env = _BuildInstanceHookEnvByObject(self, self.instance)
7382     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7383     return env
7384
7385   def BuildHooksNodes(self):
7386     """Build hooks nodes.
7387
7388     """
7389     nl = [self.cfg.GetMasterNode()]
7390     nl_post = list(self.instance.all_nodes) + nl
7391     return (nl, nl_post)
7392
7393   def CheckPrereq(self):
7394     """Check prerequisites.
7395
7396     This checks that the instance is in the cluster.
7397
7398     """
7399     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7400     assert self.instance is not None, \
7401       "Cannot retrieve locked instance %s" % self.op.instance_name
7402
7403   def Exec(self, feedback_fn):
7404     """Remove the instance.
7405
7406     """
7407     instance = self.instance
7408     logging.info("Shutting down instance %s on node %s",
7409                  instance.name, instance.primary_node)
7410
7411     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7412                                              self.op.shutdown_timeout)
7413     msg = result.fail_msg
7414     if msg:
7415       if self.op.ignore_failures:
7416         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7417       else:
7418         raise errors.OpExecError("Could not shutdown instance %s on"
7419                                  " node %s: %s" %
7420                                  (instance.name, instance.primary_node, msg))
7421
7422     assert (self.owned_locks(locking.LEVEL_NODE) ==
7423             self.owned_locks(locking.LEVEL_NODE_RES))
7424     assert not (set(instance.all_nodes) -
7425                 self.owned_locks(locking.LEVEL_NODE)), \
7426       "Not owning correct locks"
7427
7428     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7429
7430
7431 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7432   """Utility function to remove an instance.
7433
7434   """
7435   logging.info("Removing block devices for instance %s", instance.name)
7436
7437   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7438     if not ignore_failures:
7439       raise errors.OpExecError("Can't remove instance's disks")
7440     feedback_fn("Warning: can't remove instance's disks")
7441
7442   logging.info("Removing instance %s out of cluster config", instance.name)
7443
7444   lu.cfg.RemoveInstance(instance.name)
7445
7446   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7447     "Instance lock removal conflict"
7448
7449   # Remove lock for the instance
7450   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7451
7452
7453 class LUInstanceQuery(NoHooksLU):
7454   """Logical unit for querying instances.
7455
7456   """
7457   # pylint: disable=W0142
7458   REQ_BGL = False
7459
7460   def CheckArguments(self):
7461     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7462                              self.op.output_fields, self.op.use_locking)
7463
7464   def ExpandNames(self):
7465     self.iq.ExpandNames(self)
7466
7467   def DeclareLocks(self, level):
7468     self.iq.DeclareLocks(self, level)
7469
7470   def Exec(self, feedback_fn):
7471     return self.iq.OldStyleQuery(self)
7472
7473
7474 class LUInstanceFailover(LogicalUnit):
7475   """Failover an instance.
7476
7477   """
7478   HPATH = "instance-failover"
7479   HTYPE = constants.HTYPE_INSTANCE
7480   REQ_BGL = False
7481
7482   def CheckArguments(self):
7483     """Check the arguments.
7484
7485     """
7486     self.iallocator = getattr(self.op, "iallocator", None)
7487     self.target_node = getattr(self.op, "target_node", None)
7488
7489   def ExpandNames(self):
7490     self._ExpandAndLockInstance()
7491
7492     if self.op.target_node is not None:
7493       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7494
7495     self.needed_locks[locking.LEVEL_NODE] = []
7496     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7497
7498     self.needed_locks[locking.LEVEL_NODE_RES] = []
7499     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7500
7501     ignore_consistency = self.op.ignore_consistency
7502     shutdown_timeout = self.op.shutdown_timeout
7503     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7504                                        cleanup=False,
7505                                        failover=True,
7506                                        ignore_consistency=ignore_consistency,
7507                                        shutdown_timeout=shutdown_timeout,
7508                                        ignore_ipolicy=self.op.ignore_ipolicy)
7509     self.tasklets = [self._migrater]
7510
7511   def DeclareLocks(self, level):
7512     if level == locking.LEVEL_NODE:
7513       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7514       if instance.disk_template in constants.DTS_EXT_MIRROR:
7515         if self.op.target_node is None:
7516           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7517         else:
7518           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7519                                                    self.op.target_node]
7520         del self.recalculate_locks[locking.LEVEL_NODE]
7521       else:
7522         self._LockInstancesNodes()
7523     elif level == locking.LEVEL_NODE_RES:
7524       # Copy node locks
7525       self.needed_locks[locking.LEVEL_NODE_RES] = \
7526         self.needed_locks[locking.LEVEL_NODE][:]
7527
7528   def BuildHooksEnv(self):
7529     """Build hooks env.
7530
7531     This runs on master, primary and secondary nodes of the instance.
7532
7533     """
7534     instance = self._migrater.instance
7535     source_node = instance.primary_node
7536     target_node = self.op.target_node
7537     env = {
7538       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7539       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7540       "OLD_PRIMARY": source_node,
7541       "NEW_PRIMARY": target_node,
7542       }
7543
7544     if instance.disk_template in constants.DTS_INT_MIRROR:
7545       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7546       env["NEW_SECONDARY"] = source_node
7547     else:
7548       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7549
7550     env.update(_BuildInstanceHookEnvByObject(self, instance))
7551
7552     return env
7553
7554   def BuildHooksNodes(self):
7555     """Build hooks nodes.
7556
7557     """
7558     instance = self._migrater.instance
7559     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7560     return (nl, nl + [instance.primary_node])
7561
7562
7563 class LUInstanceMigrate(LogicalUnit):
7564   """Migrate an instance.
7565
7566   This is migration without shutting down, compared to the failover,
7567   which is done with shutdown.
7568
7569   """
7570   HPATH = "instance-migrate"
7571   HTYPE = constants.HTYPE_INSTANCE
7572   REQ_BGL = False
7573
7574   def ExpandNames(self):
7575     self._ExpandAndLockInstance()
7576
7577     if self.op.target_node is not None:
7578       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7579
7580     self.needed_locks[locking.LEVEL_NODE] = []
7581     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7582
7583     self.needed_locks[locking.LEVEL_NODE] = []
7584     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7585
7586     self._migrater = \
7587       TLMigrateInstance(self, self.op.instance_name,
7588                         cleanup=self.op.cleanup,
7589                         failover=False,
7590                         fallback=self.op.allow_failover,
7591                         allow_runtime_changes=self.op.allow_runtime_changes,
7592                         ignore_ipolicy=self.op.ignore_ipolicy)
7593     self.tasklets = [self._migrater]
7594
7595   def DeclareLocks(self, level):
7596     if level == locking.LEVEL_NODE:
7597       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7598       if instance.disk_template in constants.DTS_EXT_MIRROR:
7599         if self.op.target_node is None:
7600           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7601         else:
7602           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7603                                                    self.op.target_node]
7604         del self.recalculate_locks[locking.LEVEL_NODE]
7605       else:
7606         self._LockInstancesNodes()
7607     elif level == locking.LEVEL_NODE_RES:
7608       # Copy node locks
7609       self.needed_locks[locking.LEVEL_NODE_RES] = \
7610         self.needed_locks[locking.LEVEL_NODE][:]
7611
7612   def BuildHooksEnv(self):
7613     """Build hooks env.
7614
7615     This runs on master, primary and secondary nodes of the instance.
7616
7617     """
7618     instance = self._migrater.instance
7619     source_node = instance.primary_node
7620     target_node = self.op.target_node
7621     env = _BuildInstanceHookEnvByObject(self, instance)
7622     env.update({
7623       "MIGRATE_LIVE": self._migrater.live,
7624       "MIGRATE_CLEANUP": self.op.cleanup,
7625       "OLD_PRIMARY": source_node,
7626       "NEW_PRIMARY": target_node,
7627       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7628       })
7629
7630     if instance.disk_template in constants.DTS_INT_MIRROR:
7631       env["OLD_SECONDARY"] = target_node
7632       env["NEW_SECONDARY"] = source_node
7633     else:
7634       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7635
7636     return env
7637
7638   def BuildHooksNodes(self):
7639     """Build hooks nodes.
7640
7641     """
7642     instance = self._migrater.instance
7643     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7644     return (nl, nl + [instance.primary_node])
7645
7646
7647 class LUInstanceMove(LogicalUnit):
7648   """Move an instance by data-copying.
7649
7650   """
7651   HPATH = "instance-move"
7652   HTYPE = constants.HTYPE_INSTANCE
7653   REQ_BGL = False
7654
7655   def ExpandNames(self):
7656     self._ExpandAndLockInstance()
7657     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7658     self.op.target_node = target_node
7659     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7660     self.needed_locks[locking.LEVEL_NODE_RES] = []
7661     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7662
7663   def DeclareLocks(self, level):
7664     if level == locking.LEVEL_NODE:
7665       self._LockInstancesNodes(primary_only=True)
7666     elif level == locking.LEVEL_NODE_RES:
7667       # Copy node locks
7668       self.needed_locks[locking.LEVEL_NODE_RES] = \
7669         self.needed_locks[locking.LEVEL_NODE][:]
7670
7671   def BuildHooksEnv(self):
7672     """Build hooks env.
7673
7674     This runs on master, primary and secondary nodes of the instance.
7675
7676     """
7677     env = {
7678       "TARGET_NODE": self.op.target_node,
7679       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7680       }
7681     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7682     return env
7683
7684   def BuildHooksNodes(self):
7685     """Build hooks nodes.
7686
7687     """
7688     nl = [
7689       self.cfg.GetMasterNode(),
7690       self.instance.primary_node,
7691       self.op.target_node,
7692       ]
7693     return (nl, nl)
7694
7695   def CheckPrereq(self):
7696     """Check prerequisites.
7697
7698     This checks that the instance is in the cluster.
7699
7700     """
7701     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7702     assert self.instance is not None, \
7703       "Cannot retrieve locked instance %s" % self.op.instance_name
7704
7705     node = self.cfg.GetNodeInfo(self.op.target_node)
7706     assert node is not None, \
7707       "Cannot retrieve locked node %s" % self.op.target_node
7708
7709     self.target_node = target_node = node.name
7710
7711     if target_node == instance.primary_node:
7712       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7713                                  (instance.name, target_node),
7714                                  errors.ECODE_STATE)
7715
7716     bep = self.cfg.GetClusterInfo().FillBE(instance)
7717
7718     for idx, dsk in enumerate(instance.disks):
7719       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7720         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7721                                    " cannot copy" % idx, errors.ECODE_STATE)
7722
7723     _CheckNodeOnline(self, target_node)
7724     _CheckNodeNotDrained(self, target_node)
7725     _CheckNodeVmCapable(self, target_node)
7726     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7727                                      self.cfg.GetNodeGroup(node.group))
7728     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7729                             ignore=self.op.ignore_ipolicy)
7730
7731     if instance.admin_state == constants.ADMINST_UP:
7732       # check memory requirements on the secondary node
7733       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7734                            instance.name, bep[constants.BE_MAXMEM],
7735                            instance.hypervisor)
7736     else:
7737       self.LogInfo("Not checking memory on the secondary node as"
7738                    " instance will not be started")
7739
7740     # check bridge existance
7741     _CheckInstanceBridgesExist(self, instance, node=target_node)
7742
7743   def Exec(self, feedback_fn):
7744     """Move an instance.
7745
7746     The move is done by shutting it down on its present node, copying
7747     the data over (slow) and starting it on the new node.
7748
7749     """
7750     instance = self.instance
7751
7752     source_node = instance.primary_node
7753     target_node = self.target_node
7754
7755     self.LogInfo("Shutting down instance %s on source node %s",
7756                  instance.name, source_node)
7757
7758     assert (self.owned_locks(locking.LEVEL_NODE) ==
7759             self.owned_locks(locking.LEVEL_NODE_RES))
7760
7761     result = self.rpc.call_instance_shutdown(source_node, instance,
7762                                              self.op.shutdown_timeout)
7763     msg = result.fail_msg
7764     if msg:
7765       if self.op.ignore_consistency:
7766         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7767                              " Proceeding anyway. Please make sure node"
7768                              " %s is down. Error details: %s",
7769                              instance.name, source_node, source_node, msg)
7770       else:
7771         raise errors.OpExecError("Could not shutdown instance %s on"
7772                                  " node %s: %s" %
7773                                  (instance.name, source_node, msg))
7774
7775     # create the target disks
7776     try:
7777       _CreateDisks(self, instance, target_node=target_node)
7778     except errors.OpExecError:
7779       self.LogWarning("Device creation failed, reverting...")
7780       try:
7781         _RemoveDisks(self, instance, target_node=target_node)
7782       finally:
7783         self.cfg.ReleaseDRBDMinors(instance.name)
7784         raise
7785
7786     cluster_name = self.cfg.GetClusterInfo().cluster_name
7787
7788     errs = []
7789     # activate, get path, copy the data over
7790     for idx, disk in enumerate(instance.disks):
7791       self.LogInfo("Copying data for disk %d", idx)
7792       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7793                                                instance.name, True, idx)
7794       if result.fail_msg:
7795         self.LogWarning("Can't assemble newly created disk %d: %s",
7796                         idx, result.fail_msg)
7797         errs.append(result.fail_msg)
7798         break
7799       dev_path = result.payload
7800       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7801                                              target_node, dev_path,
7802                                              cluster_name)
7803       if result.fail_msg:
7804         self.LogWarning("Can't copy data over for disk %d: %s",
7805                         idx, result.fail_msg)
7806         errs.append(result.fail_msg)
7807         break
7808
7809     if errs:
7810       self.LogWarning("Some disks failed to copy, aborting")
7811       try:
7812         _RemoveDisks(self, instance, target_node=target_node)
7813       finally:
7814         self.cfg.ReleaseDRBDMinors(instance.name)
7815         raise errors.OpExecError("Errors during disk copy: %s" %
7816                                  (",".join(errs),))
7817
7818     instance.primary_node = target_node
7819     self.cfg.Update(instance, feedback_fn)
7820
7821     self.LogInfo("Removing the disks on the original node")
7822     _RemoveDisks(self, instance, target_node=source_node)
7823
7824     # Only start the instance if it's marked as up
7825     if instance.admin_state == constants.ADMINST_UP:
7826       self.LogInfo("Starting instance %s on node %s",
7827                    instance.name, target_node)
7828
7829       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7830                                            ignore_secondaries=True)
7831       if not disks_ok:
7832         _ShutdownInstanceDisks(self, instance)
7833         raise errors.OpExecError("Can't activate the instance's disks")
7834
7835       result = self.rpc.call_instance_start(target_node,
7836                                             (instance, None, None), False)
7837       msg = result.fail_msg
7838       if msg:
7839         _ShutdownInstanceDisks(self, instance)
7840         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7841                                  (instance.name, target_node, msg))
7842
7843
7844 class LUNodeMigrate(LogicalUnit):
7845   """Migrate all instances from a node.
7846
7847   """
7848   HPATH = "node-migrate"
7849   HTYPE = constants.HTYPE_NODE
7850   REQ_BGL = False
7851
7852   def CheckArguments(self):
7853     pass
7854
7855   def ExpandNames(self):
7856     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7857
7858     self.share_locks = _ShareAll()
7859     self.needed_locks = {
7860       locking.LEVEL_NODE: [self.op.node_name],
7861       }
7862
7863   def BuildHooksEnv(self):
7864     """Build hooks env.
7865
7866     This runs on the master, the primary and all the secondaries.
7867
7868     """
7869     return {
7870       "NODE_NAME": self.op.node_name,
7871       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7872       }
7873
7874   def BuildHooksNodes(self):
7875     """Build hooks nodes.
7876
7877     """
7878     nl = [self.cfg.GetMasterNode()]
7879     return (nl, nl)
7880
7881   def CheckPrereq(self):
7882     pass
7883
7884   def Exec(self, feedback_fn):
7885     # Prepare jobs for migration instances
7886     allow_runtime_changes = self.op.allow_runtime_changes
7887     jobs = [
7888       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7889                                  mode=self.op.mode,
7890                                  live=self.op.live,
7891                                  iallocator=self.op.iallocator,
7892                                  target_node=self.op.target_node,
7893                                  allow_runtime_changes=allow_runtime_changes,
7894                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7895       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7896       ]
7897
7898     # TODO: Run iallocator in this opcode and pass correct placement options to
7899     # OpInstanceMigrate. Since other jobs can modify the cluster between
7900     # running the iallocator and the actual migration, a good consistency model
7901     # will have to be found.
7902
7903     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7904             frozenset([self.op.node_name]))
7905
7906     return ResultWithJobs(jobs)
7907
7908
7909 class TLMigrateInstance(Tasklet):
7910   """Tasklet class for instance migration.
7911
7912   @type live: boolean
7913   @ivar live: whether the migration will be done live or non-live;
7914       this variable is initalized only after CheckPrereq has run
7915   @type cleanup: boolean
7916   @ivar cleanup: Wheater we cleanup from a failed migration
7917   @type iallocator: string
7918   @ivar iallocator: The iallocator used to determine target_node
7919   @type target_node: string
7920   @ivar target_node: If given, the target_node to reallocate the instance to
7921   @type failover: boolean
7922   @ivar failover: Whether operation results in failover or migration
7923   @type fallback: boolean
7924   @ivar fallback: Whether fallback to failover is allowed if migration not
7925                   possible
7926   @type ignore_consistency: boolean
7927   @ivar ignore_consistency: Wheter we should ignore consistency between source
7928                             and target node
7929   @type shutdown_timeout: int
7930   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7931   @type ignore_ipolicy: bool
7932   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7933
7934   """
7935
7936   # Constants
7937   _MIGRATION_POLL_INTERVAL = 1      # seconds
7938   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7939
7940   def __init__(self, lu, instance_name, cleanup=False,
7941                failover=False, fallback=False,
7942                ignore_consistency=False,
7943                allow_runtime_changes=True,
7944                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7945                ignore_ipolicy=False):
7946     """Initializes this class.
7947
7948     """
7949     Tasklet.__init__(self, lu)
7950
7951     # Parameters
7952     self.instance_name = instance_name
7953     self.cleanup = cleanup
7954     self.live = False # will be overridden later
7955     self.failover = failover
7956     self.fallback = fallback
7957     self.ignore_consistency = ignore_consistency
7958     self.shutdown_timeout = shutdown_timeout
7959     self.ignore_ipolicy = ignore_ipolicy
7960     self.allow_runtime_changes = allow_runtime_changes
7961
7962   def CheckPrereq(self):
7963     """Check prerequisites.
7964
7965     This checks that the instance is in the cluster.
7966
7967     """
7968     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7969     instance = self.cfg.GetInstanceInfo(instance_name)
7970     assert instance is not None
7971     self.instance = instance
7972     cluster = self.cfg.GetClusterInfo()
7973
7974     if (not self.cleanup and
7975         not instance.admin_state == constants.ADMINST_UP and
7976         not self.failover and self.fallback):
7977       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7978                       " switching to failover")
7979       self.failover = True
7980
7981     if instance.disk_template not in constants.DTS_MIRRORED:
7982       if self.failover:
7983         text = "failovers"
7984       else:
7985         text = "migrations"
7986       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7987                                  " %s" % (instance.disk_template, text),
7988                                  errors.ECODE_STATE)
7989
7990     if instance.disk_template in constants.DTS_EXT_MIRROR:
7991       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7992
7993       if self.lu.op.iallocator:
7994         self._RunAllocator()
7995       else:
7996         # We set set self.target_node as it is required by
7997         # BuildHooksEnv
7998         self.target_node = self.lu.op.target_node
7999
8000       # Check that the target node is correct in terms of instance policy
8001       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8002       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8003       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8004       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8005                               ignore=self.ignore_ipolicy)
8006
8007       # self.target_node is already populated, either directly or by the
8008       # iallocator run
8009       target_node = self.target_node
8010       if self.target_node == instance.primary_node:
8011         raise errors.OpPrereqError("Cannot migrate instance %s"
8012                                    " to its primary (%s)" %
8013                                    (instance.name, instance.primary_node))
8014
8015       if len(self.lu.tasklets) == 1:
8016         # It is safe to release locks only when we're the only tasklet
8017         # in the LU
8018         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8019                       keep=[instance.primary_node, self.target_node])
8020
8021     else:
8022       secondary_nodes = instance.secondary_nodes
8023       if not secondary_nodes:
8024         raise errors.ConfigurationError("No secondary node but using"
8025                                         " %s disk template" %
8026                                         instance.disk_template)
8027       target_node = secondary_nodes[0]
8028       if self.lu.op.iallocator or (self.lu.op.target_node and
8029                                    self.lu.op.target_node != target_node):
8030         if self.failover:
8031           text = "failed over"
8032         else:
8033           text = "migrated"
8034         raise errors.OpPrereqError("Instances with disk template %s cannot"
8035                                    " be %s to arbitrary nodes"
8036                                    " (neither an iallocator nor a target"
8037                                    " node can be passed)" %
8038                                    (instance.disk_template, text),
8039                                    errors.ECODE_INVAL)
8040       nodeinfo = self.cfg.GetNodeInfo(target_node)
8041       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8042       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8043       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8044                               ignore=self.ignore_ipolicy)
8045
8046     i_be = cluster.FillBE(instance)
8047
8048     # check memory requirements on the secondary node
8049     if (not self.cleanup and
8050          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8051       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8052                                                "migrating instance %s" %
8053                                                instance.name,
8054                                                i_be[constants.BE_MINMEM],
8055                                                instance.hypervisor)
8056     else:
8057       self.lu.LogInfo("Not checking memory on the secondary node as"
8058                       " instance will not be started")
8059
8060     # check if failover must be forced instead of migration
8061     if (not self.cleanup and not self.failover and
8062         i_be[constants.BE_ALWAYS_FAILOVER]):
8063       if self.fallback:
8064         self.lu.LogInfo("Instance configured to always failover; fallback"
8065                         " to failover")
8066         self.failover = True
8067       else:
8068         raise errors.OpPrereqError("This instance has been configured to"
8069                                    " always failover, please allow failover",
8070                                    errors.ECODE_STATE)
8071
8072     # check bridge existance
8073     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8074
8075     if not self.cleanup:
8076       _CheckNodeNotDrained(self.lu, target_node)
8077       if not self.failover:
8078         result = self.rpc.call_instance_migratable(instance.primary_node,
8079                                                    instance)
8080         if result.fail_msg and self.fallback:
8081           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8082                           " failover")
8083           self.failover = True
8084         else:
8085           result.Raise("Can't migrate, please use failover",
8086                        prereq=True, ecode=errors.ECODE_STATE)
8087
8088     assert not (self.failover and self.cleanup)
8089
8090     if not self.failover:
8091       if self.lu.op.live is not None and self.lu.op.mode is not None:
8092         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8093                                    " parameters are accepted",
8094                                    errors.ECODE_INVAL)
8095       if self.lu.op.live is not None:
8096         if self.lu.op.live:
8097           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8098         else:
8099           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8100         # reset the 'live' parameter to None so that repeated
8101         # invocations of CheckPrereq do not raise an exception
8102         self.lu.op.live = None
8103       elif self.lu.op.mode is None:
8104         # read the default value from the hypervisor
8105         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8106         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8107
8108       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8109     else:
8110       # Failover is never live
8111       self.live = False
8112
8113     if not (self.failover or self.cleanup):
8114       remote_info = self.rpc.call_instance_info(instance.primary_node,
8115                                                 instance.name,
8116                                                 instance.hypervisor)
8117       remote_info.Raise("Error checking instance on node %s" %
8118                         instance.primary_node)
8119       instance_running = bool(remote_info.payload)
8120       if instance_running:
8121         self.current_mem = int(remote_info.payload["memory"])
8122
8123   def _RunAllocator(self):
8124     """Run the allocator based on input opcode.
8125
8126     """
8127     # FIXME: add a self.ignore_ipolicy option
8128     ial = IAllocator(self.cfg, self.rpc,
8129                      mode=constants.IALLOCATOR_MODE_RELOC,
8130                      name=self.instance_name,
8131                      relocate_from=[self.instance.primary_node],
8132                      )
8133
8134     ial.Run(self.lu.op.iallocator)
8135
8136     if not ial.success:
8137       raise errors.OpPrereqError("Can't compute nodes using"
8138                                  " iallocator '%s': %s" %
8139                                  (self.lu.op.iallocator, ial.info),
8140                                  errors.ECODE_NORES)
8141     if len(ial.result) != ial.required_nodes:
8142       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8143                                  " of nodes (%s), required %s" %
8144                                  (self.lu.op.iallocator, len(ial.result),
8145                                   ial.required_nodes), errors.ECODE_FAULT)
8146     self.target_node = ial.result[0]
8147     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8148                  self.instance_name, self.lu.op.iallocator,
8149                  utils.CommaJoin(ial.result))
8150
8151   def _WaitUntilSync(self):
8152     """Poll with custom rpc for disk sync.
8153
8154     This uses our own step-based rpc call.
8155
8156     """
8157     self.feedback_fn("* wait until resync is done")
8158     all_done = False
8159     while not all_done:
8160       all_done = True
8161       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8162                                             self.nodes_ip,
8163                                             (self.instance.disks,
8164                                              self.instance))
8165       min_percent = 100
8166       for node, nres in result.items():
8167         nres.Raise("Cannot resync disks on node %s" % node)
8168         node_done, node_percent = nres.payload
8169         all_done = all_done and node_done
8170         if node_percent is not None:
8171           min_percent = min(min_percent, node_percent)
8172       if not all_done:
8173         if min_percent < 100:
8174           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8175         time.sleep(2)
8176
8177   def _EnsureSecondary(self, node):
8178     """Demote a node to secondary.
8179
8180     """
8181     self.feedback_fn("* switching node %s to secondary mode" % node)
8182
8183     for dev in self.instance.disks:
8184       self.cfg.SetDiskID(dev, node)
8185
8186     result = self.rpc.call_blockdev_close(node, self.instance.name,
8187                                           self.instance.disks)
8188     result.Raise("Cannot change disk to secondary on node %s" % node)
8189
8190   def _GoStandalone(self):
8191     """Disconnect from the network.
8192
8193     """
8194     self.feedback_fn("* changing into standalone mode")
8195     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8196                                                self.instance.disks)
8197     for node, nres in result.items():
8198       nres.Raise("Cannot disconnect disks node %s" % node)
8199
8200   def _GoReconnect(self, multimaster):
8201     """Reconnect to the network.
8202
8203     """
8204     if multimaster:
8205       msg = "dual-master"
8206     else:
8207       msg = "single-master"
8208     self.feedback_fn("* changing disks into %s mode" % msg)
8209     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8210                                            (self.instance.disks, self.instance),
8211                                            self.instance.name, multimaster)
8212     for node, nres in result.items():
8213       nres.Raise("Cannot change disks config on node %s" % node)
8214
8215   def _ExecCleanup(self):
8216     """Try to cleanup after a failed migration.
8217
8218     The cleanup is done by:
8219       - check that the instance is running only on one node
8220         (and update the config if needed)
8221       - change disks on its secondary node to secondary
8222       - wait until disks are fully synchronized
8223       - disconnect from the network
8224       - change disks into single-master mode
8225       - wait again until disks are fully synchronized
8226
8227     """
8228     instance = self.instance
8229     target_node = self.target_node
8230     source_node = self.source_node
8231
8232     # check running on only one node
8233     self.feedback_fn("* checking where the instance actually runs"
8234                      " (if this hangs, the hypervisor might be in"
8235                      " a bad state)")
8236     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8237     for node, result in ins_l.items():
8238       result.Raise("Can't contact node %s" % node)
8239
8240     runningon_source = instance.name in ins_l[source_node].payload
8241     runningon_target = instance.name in ins_l[target_node].payload
8242
8243     if runningon_source and runningon_target:
8244       raise errors.OpExecError("Instance seems to be running on two nodes,"
8245                                " or the hypervisor is confused; you will have"
8246                                " to ensure manually that it runs only on one"
8247                                " and restart this operation")
8248
8249     if not (runningon_source or runningon_target):
8250       raise errors.OpExecError("Instance does not seem to be running at all;"
8251                                " in this case it's safer to repair by"
8252                                " running 'gnt-instance stop' to ensure disk"
8253                                " shutdown, and then restarting it")
8254
8255     if runningon_target:
8256       # the migration has actually succeeded, we need to update the config
8257       self.feedback_fn("* instance running on secondary node (%s),"
8258                        " updating config" % target_node)
8259       instance.primary_node = target_node
8260       self.cfg.Update(instance, self.feedback_fn)
8261       demoted_node = source_node
8262     else:
8263       self.feedback_fn("* instance confirmed to be running on its"
8264                        " primary node (%s)" % source_node)
8265       demoted_node = target_node
8266
8267     if instance.disk_template in constants.DTS_INT_MIRROR:
8268       self._EnsureSecondary(demoted_node)
8269       try:
8270         self._WaitUntilSync()
8271       except errors.OpExecError:
8272         # we ignore here errors, since if the device is standalone, it
8273         # won't be able to sync
8274         pass
8275       self._GoStandalone()
8276       self._GoReconnect(False)
8277       self._WaitUntilSync()
8278
8279     self.feedback_fn("* done")
8280
8281   def _RevertDiskStatus(self):
8282     """Try to revert the disk status after a failed migration.
8283
8284     """
8285     target_node = self.target_node
8286     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8287       return
8288
8289     try:
8290       self._EnsureSecondary(target_node)
8291       self._GoStandalone()
8292       self._GoReconnect(False)
8293       self._WaitUntilSync()
8294     except errors.OpExecError, err:
8295       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8296                          " please try to recover the instance manually;"
8297                          " error '%s'" % str(err))
8298
8299   def _AbortMigration(self):
8300     """Call the hypervisor code to abort a started migration.
8301
8302     """
8303     instance = self.instance
8304     target_node = self.target_node
8305     source_node = self.source_node
8306     migration_info = self.migration_info
8307
8308     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8309                                                                  instance,
8310                                                                  migration_info,
8311                                                                  False)
8312     abort_msg = abort_result.fail_msg
8313     if abort_msg:
8314       logging.error("Aborting migration failed on target node %s: %s",
8315                     target_node, abort_msg)
8316       # Don't raise an exception here, as we stil have to try to revert the
8317       # disk status, even if this step failed.
8318
8319     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8320         instance, False, self.live)
8321     abort_msg = abort_result.fail_msg
8322     if abort_msg:
8323       logging.error("Aborting migration failed on source node %s: %s",
8324                     source_node, abort_msg)
8325
8326   def _ExecMigration(self):
8327     """Migrate an instance.
8328
8329     The migrate is done by:
8330       - change the disks into dual-master mode
8331       - wait until disks are fully synchronized again
8332       - migrate the instance
8333       - change disks on the new secondary node (the old primary) to secondary
8334       - wait until disks are fully synchronized
8335       - change disks into single-master mode
8336
8337     """
8338     instance = self.instance
8339     target_node = self.target_node
8340     source_node = self.source_node
8341
8342     # Check for hypervisor version mismatch and warn the user.
8343     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8344                                        None, [self.instance.hypervisor])
8345     for ninfo in nodeinfo.values():
8346       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8347                   ninfo.node)
8348     (_, _, (src_info, )) = nodeinfo[source_node].payload
8349     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8350
8351     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8352         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8353       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8354       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8355       if src_version != dst_version:
8356         self.feedback_fn("* warning: hypervisor version mismatch between"
8357                          " source (%s) and target (%s) node" %
8358                          (src_version, dst_version))
8359
8360     self.feedback_fn("* checking disk consistency between source and target")
8361     for (idx, dev) in enumerate(instance.disks):
8362       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8363         raise errors.OpExecError("Disk %s is degraded or not fully"
8364                                  " synchronized on target node,"
8365                                  " aborting migration" % idx)
8366
8367     if self.current_mem > self.tgt_free_mem:
8368       if not self.allow_runtime_changes:
8369         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8370                                  " free memory to fit instance %s on target"
8371                                  " node %s (have %dMB, need %dMB)" %
8372                                  (instance.name, target_node,
8373                                   self.tgt_free_mem, self.current_mem))
8374       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8375       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8376                                                      instance,
8377                                                      self.tgt_free_mem)
8378       rpcres.Raise("Cannot modify instance runtime memory")
8379
8380     # First get the migration information from the remote node
8381     result = self.rpc.call_migration_info(source_node, instance)
8382     msg = result.fail_msg
8383     if msg:
8384       log_err = ("Failed fetching source migration information from %s: %s" %
8385                  (source_node, msg))
8386       logging.error(log_err)
8387       raise errors.OpExecError(log_err)
8388
8389     self.migration_info = migration_info = result.payload
8390
8391     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8392       # Then switch the disks to master/master mode
8393       self._EnsureSecondary(target_node)
8394       self._GoStandalone()
8395       self._GoReconnect(True)
8396       self._WaitUntilSync()
8397
8398     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8399     result = self.rpc.call_accept_instance(target_node,
8400                                            instance,
8401                                            migration_info,
8402                                            self.nodes_ip[target_node])
8403
8404     msg = result.fail_msg
8405     if msg:
8406       logging.error("Instance pre-migration failed, trying to revert"
8407                     " disk status: %s", msg)
8408       self.feedback_fn("Pre-migration failed, aborting")
8409       self._AbortMigration()
8410       self._RevertDiskStatus()
8411       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8412                                (instance.name, msg))
8413
8414     self.feedback_fn("* migrating instance to %s" % target_node)
8415     result = self.rpc.call_instance_migrate(source_node, instance,
8416                                             self.nodes_ip[target_node],
8417                                             self.live)
8418     msg = result.fail_msg
8419     if msg:
8420       logging.error("Instance migration failed, trying to revert"
8421                     " disk status: %s", msg)
8422       self.feedback_fn("Migration failed, aborting")
8423       self._AbortMigration()
8424       self._RevertDiskStatus()
8425       raise errors.OpExecError("Could not migrate instance %s: %s" %
8426                                (instance.name, msg))
8427
8428     self.feedback_fn("* starting memory transfer")
8429     last_feedback = time.time()
8430     while True:
8431       result = self.rpc.call_instance_get_migration_status(source_node,
8432                                                            instance)
8433       msg = result.fail_msg
8434       ms = result.payload   # MigrationStatus instance
8435       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8436         logging.error("Instance migration failed, trying to revert"
8437                       " disk status: %s", msg)
8438         self.feedback_fn("Migration failed, aborting")
8439         self._AbortMigration()
8440         self._RevertDiskStatus()
8441         raise errors.OpExecError("Could not migrate instance %s: %s" %
8442                                  (instance.name, msg))
8443
8444       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8445         self.feedback_fn("* memory transfer complete")
8446         break
8447
8448       if (utils.TimeoutExpired(last_feedback,
8449                                self._MIGRATION_FEEDBACK_INTERVAL) and
8450           ms.transferred_ram is not None):
8451         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8452         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8453         last_feedback = time.time()
8454
8455       time.sleep(self._MIGRATION_POLL_INTERVAL)
8456
8457     result = self.rpc.call_instance_finalize_migration_src(source_node,
8458                                                            instance,
8459                                                            True,
8460                                                            self.live)
8461     msg = result.fail_msg
8462     if msg:
8463       logging.error("Instance migration succeeded, but finalization failed"
8464                     " on the source node: %s", msg)
8465       raise errors.OpExecError("Could not finalize instance migration: %s" %
8466                                msg)
8467
8468     instance.primary_node = target_node
8469
8470     # distribute new instance config to the other nodes
8471     self.cfg.Update(instance, self.feedback_fn)
8472
8473     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8474                                                            instance,
8475                                                            migration_info,
8476                                                            True)
8477     msg = result.fail_msg
8478     if msg:
8479       logging.error("Instance migration succeeded, but finalization failed"
8480                     " on the target node: %s", msg)
8481       raise errors.OpExecError("Could not finalize instance migration: %s" %
8482                                msg)
8483
8484     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8485       self._EnsureSecondary(source_node)
8486       self._WaitUntilSync()
8487       self._GoStandalone()
8488       self._GoReconnect(False)
8489       self._WaitUntilSync()
8490
8491     # If the instance's disk template is `rbd' and there was a successful
8492     # migration, unmap the device from the source node.
8493     if self.instance.disk_template == constants.DT_RBD:
8494       disks = _ExpandCheckDisks(instance, instance.disks)
8495       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8496       for disk in disks:
8497         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8498         msg = result.fail_msg
8499         if msg:
8500           logging.error("Migration was successful, but couldn't unmap the"
8501                         " block device %s on source node %s: %s",
8502                         disk.iv_name, source_node, msg)
8503           logging.error("You need to unmap the device %s manually on %s",
8504                         disk.iv_name, source_node)
8505
8506     self.feedback_fn("* done")
8507
8508   def _ExecFailover(self):
8509     """Failover an instance.
8510
8511     The failover is done by shutting it down on its present node and
8512     starting it on the secondary.
8513
8514     """
8515     instance = self.instance
8516     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8517
8518     source_node = instance.primary_node
8519     target_node = self.target_node
8520
8521     if instance.admin_state == constants.ADMINST_UP:
8522       self.feedback_fn("* checking disk consistency between source and target")
8523       for (idx, dev) in enumerate(instance.disks):
8524         # for drbd, these are drbd over lvm
8525         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8526                                      False):
8527           if primary_node.offline:
8528             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8529                              " target node %s" %
8530                              (primary_node.name, idx, target_node))
8531           elif not self.ignore_consistency:
8532             raise errors.OpExecError("Disk %s is degraded on target node,"
8533                                      " aborting failover" % idx)
8534     else:
8535       self.feedback_fn("* not checking disk consistency as instance is not"
8536                        " running")
8537
8538     self.feedback_fn("* shutting down instance on source node")
8539     logging.info("Shutting down instance %s on node %s",
8540                  instance.name, source_node)
8541
8542     result = self.rpc.call_instance_shutdown(source_node, instance,
8543                                              self.shutdown_timeout)
8544     msg = result.fail_msg
8545     if msg:
8546       if self.ignore_consistency or primary_node.offline:
8547         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8548                            " proceeding anyway; please make sure node"
8549                            " %s is down; error details: %s",
8550                            instance.name, source_node, source_node, msg)
8551       else:
8552         raise errors.OpExecError("Could not shutdown instance %s on"
8553                                  " node %s: %s" %
8554                                  (instance.name, source_node, msg))
8555
8556     self.feedback_fn("* deactivating the instance's disks on source node")
8557     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8558       raise errors.OpExecError("Can't shut down the instance's disks")
8559
8560     instance.primary_node = target_node
8561     # distribute new instance config to the other nodes
8562     self.cfg.Update(instance, self.feedback_fn)
8563
8564     # Only start the instance if it's marked as up
8565     if instance.admin_state == constants.ADMINST_UP:
8566       self.feedback_fn("* activating the instance's disks on target node %s" %
8567                        target_node)
8568       logging.info("Starting instance %s on node %s",
8569                    instance.name, target_node)
8570
8571       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8572                                            ignore_secondaries=True)
8573       if not disks_ok:
8574         _ShutdownInstanceDisks(self.lu, instance)
8575         raise errors.OpExecError("Can't activate the instance's disks")
8576
8577       self.feedback_fn("* starting the instance on the target node %s" %
8578                        target_node)
8579       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8580                                             False)
8581       msg = result.fail_msg
8582       if msg:
8583         _ShutdownInstanceDisks(self.lu, instance)
8584         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8585                                  (instance.name, target_node, msg))
8586
8587   def Exec(self, feedback_fn):
8588     """Perform the migration.
8589
8590     """
8591     self.feedback_fn = feedback_fn
8592     self.source_node = self.instance.primary_node
8593
8594     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8595     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8596       self.target_node = self.instance.secondary_nodes[0]
8597       # Otherwise self.target_node has been populated either
8598       # directly, or through an iallocator.
8599
8600     self.all_nodes = [self.source_node, self.target_node]
8601     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8602                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8603
8604     if self.failover:
8605       feedback_fn("Failover instance %s" % self.instance.name)
8606       self._ExecFailover()
8607     else:
8608       feedback_fn("Migrating instance %s" % self.instance.name)
8609
8610       if self.cleanup:
8611         return self._ExecCleanup()
8612       else:
8613         return self._ExecMigration()
8614
8615
8616 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8617                     force_open):
8618   """Wrapper around L{_CreateBlockDevInner}.
8619
8620   This method annotates the root device first.
8621
8622   """
8623   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8624   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8625                               force_open)
8626
8627
8628 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8629                          info, force_open):
8630   """Create a tree of block devices on a given node.
8631
8632   If this device type has to be created on secondaries, create it and
8633   all its children.
8634
8635   If not, just recurse to children keeping the same 'force' value.
8636
8637   @attention: The device has to be annotated already.
8638
8639   @param lu: the lu on whose behalf we execute
8640   @param node: the node on which to create the device
8641   @type instance: L{objects.Instance}
8642   @param instance: the instance which owns the device
8643   @type device: L{objects.Disk}
8644   @param device: the device to create
8645   @type force_create: boolean
8646   @param force_create: whether to force creation of this device; this
8647       will be change to True whenever we find a device which has
8648       CreateOnSecondary() attribute
8649   @param info: the extra 'metadata' we should attach to the device
8650       (this will be represented as a LVM tag)
8651   @type force_open: boolean
8652   @param force_open: this parameter will be passes to the
8653       L{backend.BlockdevCreate} function where it specifies
8654       whether we run on primary or not, and it affects both
8655       the child assembly and the device own Open() execution
8656
8657   """
8658   if device.CreateOnSecondary():
8659     force_create = True
8660
8661   if device.children:
8662     for child in device.children:
8663       _CreateBlockDevInner(lu, node, instance, child, force_create,
8664                            info, force_open)
8665
8666   if not force_create:
8667     return
8668
8669   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8670
8671
8672 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8673   """Create a single block device on a given node.
8674
8675   This will not recurse over children of the device, so they must be
8676   created in advance.
8677
8678   @param lu: the lu on whose behalf we execute
8679   @param node: the node on which to create the device
8680   @type instance: L{objects.Instance}
8681   @param instance: the instance which owns the device
8682   @type device: L{objects.Disk}
8683   @param device: the device to create
8684   @param info: the extra 'metadata' we should attach to the device
8685       (this will be represented as a LVM tag)
8686   @type force_open: boolean
8687   @param force_open: this parameter will be passes to the
8688       L{backend.BlockdevCreate} function where it specifies
8689       whether we run on primary or not, and it affects both
8690       the child assembly and the device own Open() execution
8691
8692   """
8693   lu.cfg.SetDiskID(device, node)
8694   result = lu.rpc.call_blockdev_create(node, device, device.size,
8695                                        instance.name, force_open, info)
8696   result.Raise("Can't create block device %s on"
8697                " node %s for instance %s" % (device, node, instance.name))
8698   if device.physical_id is None:
8699     device.physical_id = result.payload
8700
8701
8702 def _GenerateUniqueNames(lu, exts):
8703   """Generate a suitable LV name.
8704
8705   This will generate a logical volume name for the given instance.
8706
8707   """
8708   results = []
8709   for val in exts:
8710     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8711     results.append("%s%s" % (new_id, val))
8712   return results
8713
8714
8715 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8716                          iv_name, p_minor, s_minor):
8717   """Generate a drbd8 device complete with its children.
8718
8719   """
8720   assert len(vgnames) == len(names) == 2
8721   port = lu.cfg.AllocatePort()
8722   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8723
8724   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8725                           logical_id=(vgnames[0], names[0]),
8726                           params={})
8727   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8728                           logical_id=(vgnames[1], names[1]),
8729                           params={})
8730   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8731                           logical_id=(primary, secondary, port,
8732                                       p_minor, s_minor,
8733                                       shared_secret),
8734                           children=[dev_data, dev_meta],
8735                           iv_name=iv_name, params={})
8736   return drbd_dev
8737
8738
8739 _DISK_TEMPLATE_NAME_PREFIX = {
8740   constants.DT_PLAIN: "",
8741   constants.DT_RBD: ".rbd",
8742   }
8743
8744
8745 _DISK_TEMPLATE_DEVICE_TYPE = {
8746   constants.DT_PLAIN: constants.LD_LV,
8747   constants.DT_FILE: constants.LD_FILE,
8748   constants.DT_SHARED_FILE: constants.LD_FILE,
8749   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8750   constants.DT_RBD: constants.LD_RBD,
8751   }
8752
8753
8754 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8755     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8756     feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8757     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8758   """Generate the entire disk layout for a given template type.
8759
8760   """
8761   #TODO: compute space requirements
8762
8763   vgname = lu.cfg.GetVGName()
8764   disk_count = len(disk_info)
8765   disks = []
8766
8767   if template_name == constants.DT_DISKLESS:
8768     pass
8769   elif template_name == constants.DT_DRBD8:
8770     if len(secondary_nodes) != 1:
8771       raise errors.ProgrammerError("Wrong template configuration")
8772     remote_node = secondary_nodes[0]
8773     minors = lu.cfg.AllocateDRBDMinor(
8774       [primary_node, remote_node] * len(disk_info), instance_name)
8775
8776     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8777                                                        full_disk_params)
8778     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8779
8780     names = []
8781     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8782                                                for i in range(disk_count)]):
8783       names.append(lv_prefix + "_data")
8784       names.append(lv_prefix + "_meta")
8785     for idx, disk in enumerate(disk_info):
8786       disk_index = idx + base_index
8787       data_vg = disk.get(constants.IDISK_VG, vgname)
8788       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8789       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8790                                       disk[constants.IDISK_SIZE],
8791                                       [data_vg, meta_vg],
8792                                       names[idx * 2:idx * 2 + 2],
8793                                       "disk/%d" % disk_index,
8794                                       minors[idx * 2], minors[idx * 2 + 1])
8795       disk_dev.mode = disk[constants.IDISK_MODE]
8796       disks.append(disk_dev)
8797   else:
8798     if secondary_nodes:
8799       raise errors.ProgrammerError("Wrong template configuration")
8800
8801     if template_name == constants.DT_FILE:
8802       _req_file_storage()
8803     elif template_name == constants.DT_SHARED_FILE:
8804       _req_shr_file_storage()
8805
8806     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8807     if name_prefix is None:
8808       names = None
8809     else:
8810       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8811                                         (name_prefix, base_index + i)
8812                                         for i in range(disk_count)])
8813
8814     if template_name == constants.DT_PLAIN:
8815       def logical_id_fn(idx, _, disk):
8816         vg = disk.get(constants.IDISK_VG, vgname)
8817         return (vg, names[idx])
8818     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8819       logical_id_fn = \
8820         lambda _, disk_index, disk: (file_driver,
8821                                      "%s/disk%d" % (file_storage_dir,
8822                                                     disk_index))
8823     elif template_name == constants.DT_BLOCK:
8824       logical_id_fn = \
8825         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8826                                        disk[constants.IDISK_ADOPT])
8827     elif template_name == constants.DT_RBD:
8828       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8829     else:
8830       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8831
8832     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8833
8834     for idx, disk in enumerate(disk_info):
8835       disk_index = idx + base_index
8836       size = disk[constants.IDISK_SIZE]
8837       feedback_fn("* disk %s, size %s" %
8838                   (disk_index, utils.FormatUnit(size, "h")))
8839       disks.append(objects.Disk(dev_type=dev_type, size=size,
8840                                 logical_id=logical_id_fn(idx, disk_index, disk),
8841                                 iv_name="disk/%d" % disk_index,
8842                                 mode=disk[constants.IDISK_MODE],
8843                                 params={}))
8844
8845   return disks
8846
8847
8848 def _GetInstanceInfoText(instance):
8849   """Compute that text that should be added to the disk's metadata.
8850
8851   """
8852   return "originstname+%s" % instance.name
8853
8854
8855 def _CalcEta(time_taken, written, total_size):
8856   """Calculates the ETA based on size written and total size.
8857
8858   @param time_taken: The time taken so far
8859   @param written: amount written so far
8860   @param total_size: The total size of data to be written
8861   @return: The remaining time in seconds
8862
8863   """
8864   avg_time = time_taken / float(written)
8865   return (total_size - written) * avg_time
8866
8867
8868 def _WipeDisks(lu, instance):
8869   """Wipes instance disks.
8870
8871   @type lu: L{LogicalUnit}
8872   @param lu: the logical unit on whose behalf we execute
8873   @type instance: L{objects.Instance}
8874   @param instance: the instance whose disks we should create
8875   @return: the success of the wipe
8876
8877   """
8878   node = instance.primary_node
8879
8880   for device in instance.disks:
8881     lu.cfg.SetDiskID(device, node)
8882
8883   logging.info("Pause sync of instance %s disks", instance.name)
8884   result = lu.rpc.call_blockdev_pause_resume_sync(node,
8885                                                   (instance.disks, instance),
8886                                                   True)
8887
8888   for idx, success in enumerate(result.payload):
8889     if not success:
8890       logging.warn("pause-sync of instance %s for disks %d failed",
8891                    instance.name, idx)
8892
8893   try:
8894     for idx, device in enumerate(instance.disks):
8895       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8896       # MAX_WIPE_CHUNK at max
8897       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8898                             constants.MIN_WIPE_CHUNK_PERCENT)
8899       # we _must_ make this an int, otherwise rounding errors will
8900       # occur
8901       wipe_chunk_size = int(wipe_chunk_size)
8902
8903       lu.LogInfo("* Wiping disk %d", idx)
8904       logging.info("Wiping disk %d for instance %s, node %s using"
8905                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8906
8907       offset = 0
8908       size = device.size
8909       last_output = 0
8910       start_time = time.time()
8911
8912       while offset < size:
8913         wipe_size = min(wipe_chunk_size, size - offset)
8914         logging.debug("Wiping disk %d, offset %s, chunk %s",
8915                       idx, offset, wipe_size)
8916         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8917                                            wipe_size)
8918         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8919                      (idx, offset, wipe_size))
8920         now = time.time()
8921         offset += wipe_size
8922         if now - last_output >= 60:
8923           eta = _CalcEta(now - start_time, offset, size)
8924           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8925                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8926           last_output = now
8927   finally:
8928     logging.info("Resume sync of instance %s disks", instance.name)
8929
8930     result = lu.rpc.call_blockdev_pause_resume_sync(node,
8931                                                     (instance.disks, instance),
8932                                                     False)
8933
8934     for idx, success in enumerate(result.payload):
8935       if not success:
8936         lu.LogWarning("Resume sync of disk %d failed, please have a"
8937                       " look at the status and troubleshoot the issue", idx)
8938         logging.warn("resume-sync of instance %s for disks %d failed",
8939                      instance.name, idx)
8940
8941
8942 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8943   """Create all disks for an instance.
8944
8945   This abstracts away some work from AddInstance.
8946
8947   @type lu: L{LogicalUnit}
8948   @param lu: the logical unit on whose behalf we execute
8949   @type instance: L{objects.Instance}
8950   @param instance: the instance whose disks we should create
8951   @type to_skip: list
8952   @param to_skip: list of indices to skip
8953   @type target_node: string
8954   @param target_node: if passed, overrides the target node for creation
8955   @rtype: boolean
8956   @return: the success of the creation
8957
8958   """
8959   info = _GetInstanceInfoText(instance)
8960   if target_node is None:
8961     pnode = instance.primary_node
8962     all_nodes = instance.all_nodes
8963   else:
8964     pnode = target_node
8965     all_nodes = [pnode]
8966
8967   if instance.disk_template in constants.DTS_FILEBASED:
8968     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8969     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8970
8971     result.Raise("Failed to create directory '%s' on"
8972                  " node %s" % (file_storage_dir, pnode))
8973
8974   # Note: this needs to be kept in sync with adding of disks in
8975   # LUInstanceSetParams
8976   for idx, device in enumerate(instance.disks):
8977     if to_skip and idx in to_skip:
8978       continue
8979     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8980     #HARDCODE
8981     for node in all_nodes:
8982       f_create = node == pnode
8983       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8984
8985
8986 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8987   """Remove all disks for an instance.
8988
8989   This abstracts away some work from `AddInstance()` and
8990   `RemoveInstance()`. Note that in case some of the devices couldn't
8991   be removed, the removal will continue with the other ones (compare
8992   with `_CreateDisks()`).
8993
8994   @type lu: L{LogicalUnit}
8995   @param lu: the logical unit on whose behalf we execute
8996   @type instance: L{objects.Instance}
8997   @param instance: the instance whose disks we should remove
8998   @type target_node: string
8999   @param target_node: used to override the node on which to remove the disks
9000   @rtype: boolean
9001   @return: the success of the removal
9002
9003   """
9004   logging.info("Removing block devices for instance %s", instance.name)
9005
9006   all_result = True
9007   ports_to_release = set()
9008   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9009   for (idx, device) in enumerate(anno_disks):
9010     if target_node:
9011       edata = [(target_node, device)]
9012     else:
9013       edata = device.ComputeNodeTree(instance.primary_node)
9014     for node, disk in edata:
9015       lu.cfg.SetDiskID(disk, node)
9016       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
9017       if msg:
9018         lu.LogWarning("Could not remove disk %s on node %s,"
9019                       " continuing anyway: %s", idx, node, msg)
9020         all_result = False
9021
9022     # if this is a DRBD disk, return its port to the pool
9023     if device.dev_type in constants.LDS_DRBD:
9024       ports_to_release.add(device.logical_id[2])
9025
9026   if all_result or ignore_failures:
9027     for port in ports_to_release:
9028       lu.cfg.AddTcpUdpPort(port)
9029
9030   if instance.disk_template == constants.DT_FILE:
9031     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9032     if target_node:
9033       tgt = target_node
9034     else:
9035       tgt = instance.primary_node
9036     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9037     if result.fail_msg:
9038       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9039                     file_storage_dir, instance.primary_node, result.fail_msg)
9040       all_result = False
9041
9042   return all_result
9043
9044
9045 def _ComputeDiskSizePerVG(disk_template, disks):
9046   """Compute disk size requirements in the volume group
9047
9048   """
9049   def _compute(disks, payload):
9050     """Universal algorithm.
9051
9052     """
9053     vgs = {}
9054     for disk in disks:
9055       vgs[disk[constants.IDISK_VG]] = \
9056         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9057
9058     return vgs
9059
9060   # Required free disk space as a function of disk and swap space
9061   req_size_dict = {
9062     constants.DT_DISKLESS: {},
9063     constants.DT_PLAIN: _compute(disks, 0),
9064     # 128 MB are added for drbd metadata for each disk
9065     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9066     constants.DT_FILE: {},
9067     constants.DT_SHARED_FILE: {},
9068   }
9069
9070   if disk_template not in req_size_dict:
9071     raise errors.ProgrammerError("Disk template '%s' size requirement"
9072                                  " is unknown" % disk_template)
9073
9074   return req_size_dict[disk_template]
9075
9076
9077 def _ComputeDiskSize(disk_template, disks):
9078   """Compute disk size requirements in the volume group
9079
9080   """
9081   # Required free disk space as a function of disk and swap space
9082   req_size_dict = {
9083     constants.DT_DISKLESS: None,
9084     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9085     # 128 MB are added for drbd metadata for each disk
9086     constants.DT_DRBD8:
9087       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9088     constants.DT_FILE: None,
9089     constants.DT_SHARED_FILE: 0,
9090     constants.DT_BLOCK: 0,
9091     constants.DT_RBD: 0,
9092   }
9093
9094   if disk_template not in req_size_dict:
9095     raise errors.ProgrammerError("Disk template '%s' size requirement"
9096                                  " is unknown" % disk_template)
9097
9098   return req_size_dict[disk_template]
9099
9100
9101 def _FilterVmNodes(lu, nodenames):
9102   """Filters out non-vm_capable nodes from a list.
9103
9104   @type lu: L{LogicalUnit}
9105   @param lu: the logical unit for which we check
9106   @type nodenames: list
9107   @param nodenames: the list of nodes on which we should check
9108   @rtype: list
9109   @return: the list of vm-capable nodes
9110
9111   """
9112   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9113   return [name for name in nodenames if name not in vm_nodes]
9114
9115
9116 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9117   """Hypervisor parameter validation.
9118
9119   This function abstract the hypervisor parameter validation to be
9120   used in both instance create and instance modify.
9121
9122   @type lu: L{LogicalUnit}
9123   @param lu: the logical unit for which we check
9124   @type nodenames: list
9125   @param nodenames: the list of nodes on which we should check
9126   @type hvname: string
9127   @param hvname: the name of the hypervisor we should use
9128   @type hvparams: dict
9129   @param hvparams: the parameters which we need to check
9130   @raise errors.OpPrereqError: if the parameters are not valid
9131
9132   """
9133   nodenames = _FilterVmNodes(lu, nodenames)
9134
9135   cluster = lu.cfg.GetClusterInfo()
9136   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9137
9138   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9139   for node in nodenames:
9140     info = hvinfo[node]
9141     if info.offline:
9142       continue
9143     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9144
9145
9146 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9147   """OS parameters validation.
9148
9149   @type lu: L{LogicalUnit}
9150   @param lu: the logical unit for which we check
9151   @type required: boolean
9152   @param required: whether the validation should fail if the OS is not
9153       found
9154   @type nodenames: list
9155   @param nodenames: the list of nodes on which we should check
9156   @type osname: string
9157   @param osname: the name of the hypervisor we should use
9158   @type osparams: dict
9159   @param osparams: the parameters which we need to check
9160   @raise errors.OpPrereqError: if the parameters are not valid
9161
9162   """
9163   nodenames = _FilterVmNodes(lu, nodenames)
9164   result = lu.rpc.call_os_validate(nodenames, required, osname,
9165                                    [constants.OS_VALIDATE_PARAMETERS],
9166                                    osparams)
9167   for node, nres in result.items():
9168     # we don't check for offline cases since this should be run only
9169     # against the master node and/or an instance's nodes
9170     nres.Raise("OS Parameters validation failed on node %s" % node)
9171     if not nres.payload:
9172       lu.LogInfo("OS %s not found on node %s, validation skipped",
9173                  osname, node)
9174
9175
9176 class LUInstanceCreate(LogicalUnit):
9177   """Create an instance.
9178
9179   """
9180   HPATH = "instance-add"
9181   HTYPE = constants.HTYPE_INSTANCE
9182   REQ_BGL = False
9183
9184   def CheckArguments(self):
9185     """Check arguments.
9186
9187     """
9188     # do not require name_check to ease forward/backward compatibility
9189     # for tools
9190     if self.op.no_install and self.op.start:
9191       self.LogInfo("No-installation mode selected, disabling startup")
9192       self.op.start = False
9193     # validate/normalize the instance name
9194     self.op.instance_name = \
9195       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9196
9197     if self.op.ip_check and not self.op.name_check:
9198       # TODO: make the ip check more flexible and not depend on the name check
9199       raise errors.OpPrereqError("Cannot do IP address check without a name"
9200                                  " check", errors.ECODE_INVAL)
9201
9202     # check nics' parameter names
9203     for nic in self.op.nics:
9204       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9205
9206     # check disks. parameter names and consistent adopt/no-adopt strategy
9207     has_adopt = has_no_adopt = False
9208     for disk in self.op.disks:
9209       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9210       if constants.IDISK_ADOPT in disk:
9211         has_adopt = True
9212       else:
9213         has_no_adopt = True
9214     if has_adopt and has_no_adopt:
9215       raise errors.OpPrereqError("Either all disks are adopted or none is",
9216                                  errors.ECODE_INVAL)
9217     if has_adopt:
9218       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9219         raise errors.OpPrereqError("Disk adoption is not supported for the"
9220                                    " '%s' disk template" %
9221                                    self.op.disk_template,
9222                                    errors.ECODE_INVAL)
9223       if self.op.iallocator is not None:
9224         raise errors.OpPrereqError("Disk adoption not allowed with an"
9225                                    " iallocator script", errors.ECODE_INVAL)
9226       if self.op.mode == constants.INSTANCE_IMPORT:
9227         raise errors.OpPrereqError("Disk adoption not allowed for"
9228                                    " instance import", errors.ECODE_INVAL)
9229     else:
9230       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9231         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9232                                    " but no 'adopt' parameter given" %
9233                                    self.op.disk_template,
9234                                    errors.ECODE_INVAL)
9235
9236     self.adopt_disks = has_adopt
9237
9238     # instance name verification
9239     if self.op.name_check:
9240       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9241       self.op.instance_name = self.hostname1.name
9242       # used in CheckPrereq for ip ping check
9243       self.check_ip = self.hostname1.ip
9244     else:
9245       self.check_ip = None
9246
9247     # file storage checks
9248     if (self.op.file_driver and
9249         not self.op.file_driver in constants.FILE_DRIVER):
9250       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9251                                  self.op.file_driver, errors.ECODE_INVAL)
9252
9253     if self.op.disk_template == constants.DT_FILE:
9254       opcodes.RequireFileStorage()
9255     elif self.op.disk_template == constants.DT_SHARED_FILE:
9256       opcodes.RequireSharedFileStorage()
9257
9258     ### Node/iallocator related checks
9259     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9260
9261     if self.op.pnode is not None:
9262       if self.op.disk_template in constants.DTS_INT_MIRROR:
9263         if self.op.snode is None:
9264           raise errors.OpPrereqError("The networked disk templates need"
9265                                      " a mirror node", errors.ECODE_INVAL)
9266       elif self.op.snode:
9267         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9268                         " template")
9269         self.op.snode = None
9270
9271     self._cds = _GetClusterDomainSecret()
9272
9273     if self.op.mode == constants.INSTANCE_IMPORT:
9274       # On import force_variant must be True, because if we forced it at
9275       # initial install, our only chance when importing it back is that it
9276       # works again!
9277       self.op.force_variant = True
9278
9279       if self.op.no_install:
9280         self.LogInfo("No-installation mode has no effect during import")
9281
9282     elif self.op.mode == constants.INSTANCE_CREATE:
9283       if self.op.os_type is None:
9284         raise errors.OpPrereqError("No guest OS specified",
9285                                    errors.ECODE_INVAL)
9286       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9287         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9288                                    " installation" % self.op.os_type,
9289                                    errors.ECODE_STATE)
9290       if self.op.disk_template is None:
9291         raise errors.OpPrereqError("No disk template specified",
9292                                    errors.ECODE_INVAL)
9293
9294     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9295       # Check handshake to ensure both clusters have the same domain secret
9296       src_handshake = self.op.source_handshake
9297       if not src_handshake:
9298         raise errors.OpPrereqError("Missing source handshake",
9299                                    errors.ECODE_INVAL)
9300
9301       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9302                                                            src_handshake)
9303       if errmsg:
9304         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9305                                    errors.ECODE_INVAL)
9306
9307       # Load and check source CA
9308       self.source_x509_ca_pem = self.op.source_x509_ca
9309       if not self.source_x509_ca_pem:
9310         raise errors.OpPrereqError("Missing source X509 CA",
9311                                    errors.ECODE_INVAL)
9312
9313       try:
9314         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9315                                                     self._cds)
9316       except OpenSSL.crypto.Error, err:
9317         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9318                                    (err, ), errors.ECODE_INVAL)
9319
9320       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9321       if errcode is not None:
9322         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9323                                    errors.ECODE_INVAL)
9324
9325       self.source_x509_ca = cert
9326
9327       src_instance_name = self.op.source_instance_name
9328       if not src_instance_name:
9329         raise errors.OpPrereqError("Missing source instance name",
9330                                    errors.ECODE_INVAL)
9331
9332       self.source_instance_name = \
9333           netutils.GetHostname(name=src_instance_name).name
9334
9335     else:
9336       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9337                                  self.op.mode, errors.ECODE_INVAL)
9338
9339   def ExpandNames(self):
9340     """ExpandNames for CreateInstance.
9341
9342     Figure out the right locks for instance creation.
9343
9344     """
9345     self.needed_locks = {}
9346
9347     instance_name = self.op.instance_name
9348     # this is just a preventive check, but someone might still add this
9349     # instance in the meantime, and creation will fail at lock-add time
9350     if instance_name in self.cfg.GetInstanceList():
9351       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9352                                  instance_name, errors.ECODE_EXISTS)
9353
9354     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9355
9356     if self.op.iallocator:
9357       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9358       # specifying a group on instance creation and then selecting nodes from
9359       # that group
9360       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9361       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9362     else:
9363       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9364       nodelist = [self.op.pnode]
9365       if self.op.snode is not None:
9366         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9367         nodelist.append(self.op.snode)
9368       self.needed_locks[locking.LEVEL_NODE] = nodelist
9369       # Lock resources of instance's primary and secondary nodes (copy to
9370       # prevent accidential modification)
9371       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9372
9373     # in case of import lock the source node too
9374     if self.op.mode == constants.INSTANCE_IMPORT:
9375       src_node = self.op.src_node
9376       src_path = self.op.src_path
9377
9378       if src_path is None:
9379         self.op.src_path = src_path = self.op.instance_name
9380
9381       if src_node is None:
9382         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9383         self.op.src_node = None
9384         if os.path.isabs(src_path):
9385           raise errors.OpPrereqError("Importing an instance from a path"
9386                                      " requires a source node option",
9387                                      errors.ECODE_INVAL)
9388       else:
9389         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9390         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9391           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9392         if not os.path.isabs(src_path):
9393           self.op.src_path = src_path = \
9394             utils.PathJoin(constants.EXPORT_DIR, src_path)
9395
9396   def _RunAllocator(self):
9397     """Run the allocator based on input opcode.
9398
9399     """
9400     nics = [n.ToDict() for n in self.nics]
9401     ial = IAllocator(self.cfg, self.rpc,
9402                      mode=constants.IALLOCATOR_MODE_ALLOC,
9403                      name=self.op.instance_name,
9404                      disk_template=self.op.disk_template,
9405                      tags=self.op.tags,
9406                      os=self.op.os_type,
9407                      vcpus=self.be_full[constants.BE_VCPUS],
9408                      memory=self.be_full[constants.BE_MAXMEM],
9409                      spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9410                      disks=self.disks,
9411                      nics=nics,
9412                      hypervisor=self.op.hypervisor,
9413                      )
9414
9415     ial.Run(self.op.iallocator)
9416
9417     if not ial.success:
9418       raise errors.OpPrereqError("Can't compute nodes using"
9419                                  " iallocator '%s': %s" %
9420                                  (self.op.iallocator, ial.info),
9421                                  errors.ECODE_NORES)
9422     if len(ial.result) != ial.required_nodes:
9423       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9424                                  " of nodes (%s), required %s" %
9425                                  (self.op.iallocator, len(ial.result),
9426                                   ial.required_nodes), errors.ECODE_FAULT)
9427     self.op.pnode = ial.result[0]
9428     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9429                  self.op.instance_name, self.op.iallocator,
9430                  utils.CommaJoin(ial.result))
9431     if ial.required_nodes == 2:
9432       self.op.snode = ial.result[1]
9433
9434   def BuildHooksEnv(self):
9435     """Build hooks env.
9436
9437     This runs on master, primary and secondary nodes of the instance.
9438
9439     """
9440     env = {
9441       "ADD_MODE": self.op.mode,
9442       }
9443     if self.op.mode == constants.INSTANCE_IMPORT:
9444       env["SRC_NODE"] = self.op.src_node
9445       env["SRC_PATH"] = self.op.src_path
9446       env["SRC_IMAGES"] = self.src_images
9447
9448     env.update(_BuildInstanceHookEnv(
9449       name=self.op.instance_name,
9450       primary_node=self.op.pnode,
9451       secondary_nodes=self.secondaries,
9452       status=self.op.start,
9453       os_type=self.op.os_type,
9454       minmem=self.be_full[constants.BE_MINMEM],
9455       maxmem=self.be_full[constants.BE_MAXMEM],
9456       vcpus=self.be_full[constants.BE_VCPUS],
9457       nics=_NICListToTuple(self, self.nics),
9458       disk_template=self.op.disk_template,
9459       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9460              for d in self.disks],
9461       bep=self.be_full,
9462       hvp=self.hv_full,
9463       hypervisor_name=self.op.hypervisor,
9464       tags=self.op.tags,
9465     ))
9466
9467     return env
9468
9469   def BuildHooksNodes(self):
9470     """Build hooks nodes.
9471
9472     """
9473     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9474     return nl, nl
9475
9476   def _ReadExportInfo(self):
9477     """Reads the export information from disk.
9478
9479     It will override the opcode source node and path with the actual
9480     information, if these two were not specified before.
9481
9482     @return: the export information
9483
9484     """
9485     assert self.op.mode == constants.INSTANCE_IMPORT
9486
9487     src_node = self.op.src_node
9488     src_path = self.op.src_path
9489
9490     if src_node is None:
9491       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9492       exp_list = self.rpc.call_export_list(locked_nodes)
9493       found = False
9494       for node in exp_list:
9495         if exp_list[node].fail_msg:
9496           continue
9497         if src_path in exp_list[node].payload:
9498           found = True
9499           self.op.src_node = src_node = node
9500           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9501                                                        src_path)
9502           break
9503       if not found:
9504         raise errors.OpPrereqError("No export found for relative path %s" %
9505                                     src_path, errors.ECODE_INVAL)
9506
9507     _CheckNodeOnline(self, src_node)
9508     result = self.rpc.call_export_info(src_node, src_path)
9509     result.Raise("No export or invalid export found in dir %s" % src_path)
9510
9511     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9512     if not export_info.has_section(constants.INISECT_EXP):
9513       raise errors.ProgrammerError("Corrupted export config",
9514                                    errors.ECODE_ENVIRON)
9515
9516     ei_version = export_info.get(constants.INISECT_EXP, "version")
9517     if (int(ei_version) != constants.EXPORT_VERSION):
9518       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9519                                  (ei_version, constants.EXPORT_VERSION),
9520                                  errors.ECODE_ENVIRON)
9521     return export_info
9522
9523   def _ReadExportParams(self, einfo):
9524     """Use export parameters as defaults.
9525
9526     In case the opcode doesn't specify (as in override) some instance
9527     parameters, then try to use them from the export information, if
9528     that declares them.
9529
9530     """
9531     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9532
9533     if self.op.disk_template is None:
9534       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9535         self.op.disk_template = einfo.get(constants.INISECT_INS,
9536                                           "disk_template")
9537         if self.op.disk_template not in constants.DISK_TEMPLATES:
9538           raise errors.OpPrereqError("Disk template specified in configuration"
9539                                      " file is not one of the allowed values:"
9540                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9541       else:
9542         raise errors.OpPrereqError("No disk template specified and the export"
9543                                    " is missing the disk_template information",
9544                                    errors.ECODE_INVAL)
9545
9546     if not self.op.disks:
9547       disks = []
9548       # TODO: import the disk iv_name too
9549       for idx in range(constants.MAX_DISKS):
9550         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9551           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9552           disks.append({constants.IDISK_SIZE: disk_sz})
9553       self.op.disks = disks
9554       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9555         raise errors.OpPrereqError("No disk info specified and the export"
9556                                    " is missing the disk information",
9557                                    errors.ECODE_INVAL)
9558
9559     if not self.op.nics:
9560       nics = []
9561       for idx in range(constants.MAX_NICS):
9562         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9563           ndict = {}
9564           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9565             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9566             ndict[name] = v
9567           nics.append(ndict)
9568         else:
9569           break
9570       self.op.nics = nics
9571
9572     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9573       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9574
9575     if (self.op.hypervisor is None and
9576         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9577       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9578
9579     if einfo.has_section(constants.INISECT_HYP):
9580       # use the export parameters but do not override the ones
9581       # specified by the user
9582       for name, value in einfo.items(constants.INISECT_HYP):
9583         if name not in self.op.hvparams:
9584           self.op.hvparams[name] = value
9585
9586     if einfo.has_section(constants.INISECT_BEP):
9587       # use the parameters, without overriding
9588       for name, value in einfo.items(constants.INISECT_BEP):
9589         if name not in self.op.beparams:
9590           self.op.beparams[name] = value
9591         # Compatibility for the old "memory" be param
9592         if name == constants.BE_MEMORY:
9593           if constants.BE_MAXMEM not in self.op.beparams:
9594             self.op.beparams[constants.BE_MAXMEM] = value
9595           if constants.BE_MINMEM not in self.op.beparams:
9596             self.op.beparams[constants.BE_MINMEM] = value
9597     else:
9598       # try to read the parameters old style, from the main section
9599       for name in constants.BES_PARAMETERS:
9600         if (name not in self.op.beparams and
9601             einfo.has_option(constants.INISECT_INS, name)):
9602           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9603
9604     if einfo.has_section(constants.INISECT_OSP):
9605       # use the parameters, without overriding
9606       for name, value in einfo.items(constants.INISECT_OSP):
9607         if name not in self.op.osparams:
9608           self.op.osparams[name] = value
9609
9610   def _RevertToDefaults(self, cluster):
9611     """Revert the instance parameters to the default values.
9612
9613     """
9614     # hvparams
9615     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9616     for name in self.op.hvparams.keys():
9617       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9618         del self.op.hvparams[name]
9619     # beparams
9620     be_defs = cluster.SimpleFillBE({})
9621     for name in self.op.beparams.keys():
9622       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9623         del self.op.beparams[name]
9624     # nic params
9625     nic_defs = cluster.SimpleFillNIC({})
9626     for nic in self.op.nics:
9627       for name in constants.NICS_PARAMETERS:
9628         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9629           del nic[name]
9630     # osparams
9631     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9632     for name in self.op.osparams.keys():
9633       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9634         del self.op.osparams[name]
9635
9636   def _CalculateFileStorageDir(self):
9637     """Calculate final instance file storage dir.
9638
9639     """
9640     # file storage dir calculation/check
9641     self.instance_file_storage_dir = None
9642     if self.op.disk_template in constants.DTS_FILEBASED:
9643       # build the full file storage dir path
9644       joinargs = []
9645
9646       if self.op.disk_template == constants.DT_SHARED_FILE:
9647         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9648       else:
9649         get_fsd_fn = self.cfg.GetFileStorageDir
9650
9651       cfg_storagedir = get_fsd_fn()
9652       if not cfg_storagedir:
9653         raise errors.OpPrereqError("Cluster file storage dir not defined")
9654       joinargs.append(cfg_storagedir)
9655
9656       if self.op.file_storage_dir is not None:
9657         joinargs.append(self.op.file_storage_dir)
9658
9659       joinargs.append(self.op.instance_name)
9660
9661       # pylint: disable=W0142
9662       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9663
9664   def CheckPrereq(self): # pylint: disable=R0914
9665     """Check prerequisites.
9666
9667     """
9668     self._CalculateFileStorageDir()
9669
9670     if self.op.mode == constants.INSTANCE_IMPORT:
9671       export_info = self._ReadExportInfo()
9672       self._ReadExportParams(export_info)
9673       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9674     else:
9675       self._old_instance_name = None
9676
9677     if (not self.cfg.GetVGName() and
9678         self.op.disk_template not in constants.DTS_NOT_LVM):
9679       raise errors.OpPrereqError("Cluster does not support lvm-based"
9680                                  " instances", errors.ECODE_STATE)
9681
9682     if (self.op.hypervisor is None or
9683         self.op.hypervisor == constants.VALUE_AUTO):
9684       self.op.hypervisor = self.cfg.GetHypervisorType()
9685
9686     cluster = self.cfg.GetClusterInfo()
9687     enabled_hvs = cluster.enabled_hypervisors
9688     if self.op.hypervisor not in enabled_hvs:
9689       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9690                                  " cluster (%s)" % (self.op.hypervisor,
9691                                   ",".join(enabled_hvs)),
9692                                  errors.ECODE_STATE)
9693
9694     # Check tag validity
9695     for tag in self.op.tags:
9696       objects.TaggableObject.ValidateTag(tag)
9697
9698     # check hypervisor parameter syntax (locally)
9699     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9700     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9701                                       self.op.hvparams)
9702     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9703     hv_type.CheckParameterSyntax(filled_hvp)
9704     self.hv_full = filled_hvp
9705     # check that we don't specify global parameters on an instance
9706     _CheckGlobalHvParams(self.op.hvparams)
9707
9708     # fill and remember the beparams dict
9709     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9710     for param, value in self.op.beparams.iteritems():
9711       if value == constants.VALUE_AUTO:
9712         self.op.beparams[param] = default_beparams[param]
9713     objects.UpgradeBeParams(self.op.beparams)
9714     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9715     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9716
9717     # build os parameters
9718     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9719
9720     # now that hvp/bep are in final format, let's reset to defaults,
9721     # if told to do so
9722     if self.op.identify_defaults:
9723       self._RevertToDefaults(cluster)
9724
9725     # NIC buildup
9726     self.nics = []
9727     for idx, nic in enumerate(self.op.nics):
9728       nic_mode_req = nic.get(constants.INIC_MODE, None)
9729       nic_mode = nic_mode_req
9730       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9731         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9732
9733       # in routed mode, for the first nic, the default ip is 'auto'
9734       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9735         default_ip_mode = constants.VALUE_AUTO
9736       else:
9737         default_ip_mode = constants.VALUE_NONE
9738
9739       # ip validity checks
9740       ip = nic.get(constants.INIC_IP, default_ip_mode)
9741       if ip is None or ip.lower() == constants.VALUE_NONE:
9742         nic_ip = None
9743       elif ip.lower() == constants.VALUE_AUTO:
9744         if not self.op.name_check:
9745           raise errors.OpPrereqError("IP address set to auto but name checks"
9746                                      " have been skipped",
9747                                      errors.ECODE_INVAL)
9748         nic_ip = self.hostname1.ip
9749       else:
9750         if not netutils.IPAddress.IsValid(ip):
9751           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9752                                      errors.ECODE_INVAL)
9753         nic_ip = ip
9754
9755       # TODO: check the ip address for uniqueness
9756       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9757         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9758                                    errors.ECODE_INVAL)
9759
9760       # MAC address verification
9761       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9762       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9763         mac = utils.NormalizeAndValidateMac(mac)
9764
9765         try:
9766           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9767         except errors.ReservationError:
9768           raise errors.OpPrereqError("MAC address %s already in use"
9769                                      " in cluster" % mac,
9770                                      errors.ECODE_NOTUNIQUE)
9771
9772       #  Build nic parameters
9773       link = nic.get(constants.INIC_LINK, None)
9774       if link == constants.VALUE_AUTO:
9775         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9776       nicparams = {}
9777       if nic_mode_req:
9778         nicparams[constants.NIC_MODE] = nic_mode
9779       if link:
9780         nicparams[constants.NIC_LINK] = link
9781
9782       check_params = cluster.SimpleFillNIC(nicparams)
9783       objects.NIC.CheckParameterSyntax(check_params)
9784       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9785
9786     # disk checks/pre-build
9787     default_vg = self.cfg.GetVGName()
9788     self.disks = []
9789     for disk in self.op.disks:
9790       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9791       if mode not in constants.DISK_ACCESS_SET:
9792         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9793                                    mode, errors.ECODE_INVAL)
9794       size = disk.get(constants.IDISK_SIZE, None)
9795       if size is None:
9796         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9797       try:
9798         size = int(size)
9799       except (TypeError, ValueError):
9800         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9801                                    errors.ECODE_INVAL)
9802
9803       data_vg = disk.get(constants.IDISK_VG, default_vg)
9804       new_disk = {
9805         constants.IDISK_SIZE: size,
9806         constants.IDISK_MODE: mode,
9807         constants.IDISK_VG: data_vg,
9808         }
9809       if constants.IDISK_METAVG in disk:
9810         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9811       if constants.IDISK_ADOPT in disk:
9812         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9813       self.disks.append(new_disk)
9814
9815     if self.op.mode == constants.INSTANCE_IMPORT:
9816       disk_images = []
9817       for idx in range(len(self.disks)):
9818         option = "disk%d_dump" % idx
9819         if export_info.has_option(constants.INISECT_INS, option):
9820           # FIXME: are the old os-es, disk sizes, etc. useful?
9821           export_name = export_info.get(constants.INISECT_INS, option)
9822           image = utils.PathJoin(self.op.src_path, export_name)
9823           disk_images.append(image)
9824         else:
9825           disk_images.append(False)
9826
9827       self.src_images = disk_images
9828
9829       if self.op.instance_name == self._old_instance_name:
9830         for idx, nic in enumerate(self.nics):
9831           if nic.mac == constants.VALUE_AUTO:
9832             nic_mac_ini = "nic%d_mac" % idx
9833             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9834
9835     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9836
9837     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9838     if self.op.ip_check:
9839       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9840         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9841                                    (self.check_ip, self.op.instance_name),
9842                                    errors.ECODE_NOTUNIQUE)
9843
9844     #### mac address generation
9845     # By generating here the mac address both the allocator and the hooks get
9846     # the real final mac address rather than the 'auto' or 'generate' value.
9847     # There is a race condition between the generation and the instance object
9848     # creation, which means that we know the mac is valid now, but we're not
9849     # sure it will be when we actually add the instance. If things go bad
9850     # adding the instance will abort because of a duplicate mac, and the
9851     # creation job will fail.
9852     for nic in self.nics:
9853       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9854         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9855
9856     #### allocator run
9857
9858     if self.op.iallocator is not None:
9859       self._RunAllocator()
9860
9861     # Release all unneeded node locks
9862     _ReleaseLocks(self, locking.LEVEL_NODE,
9863                   keep=filter(None, [self.op.pnode, self.op.snode,
9864                                      self.op.src_node]))
9865     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9866                   keep=filter(None, [self.op.pnode, self.op.snode,
9867                                      self.op.src_node]))
9868
9869     #### node related checks
9870
9871     # check primary node
9872     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9873     assert self.pnode is not None, \
9874       "Cannot retrieve locked node %s" % self.op.pnode
9875     if pnode.offline:
9876       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9877                                  pnode.name, errors.ECODE_STATE)
9878     if pnode.drained:
9879       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9880                                  pnode.name, errors.ECODE_STATE)
9881     if not pnode.vm_capable:
9882       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9883                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9884
9885     self.secondaries = []
9886
9887     # mirror node verification
9888     if self.op.disk_template in constants.DTS_INT_MIRROR:
9889       if self.op.snode == pnode.name:
9890         raise errors.OpPrereqError("The secondary node cannot be the"
9891                                    " primary node", errors.ECODE_INVAL)
9892       _CheckNodeOnline(self, self.op.snode)
9893       _CheckNodeNotDrained(self, self.op.snode)
9894       _CheckNodeVmCapable(self, self.op.snode)
9895       self.secondaries.append(self.op.snode)
9896
9897       snode = self.cfg.GetNodeInfo(self.op.snode)
9898       if pnode.group != snode.group:
9899         self.LogWarning("The primary and secondary nodes are in two"
9900                         " different node groups; the disk parameters"
9901                         " from the first disk's node group will be"
9902                         " used")
9903
9904     nodenames = [pnode.name] + self.secondaries
9905
9906     # Verify instance specs
9907     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9908     ispec = {
9909       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9910       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9911       constants.ISPEC_DISK_COUNT: len(self.disks),
9912       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9913       constants.ISPEC_NIC_COUNT: len(self.nics),
9914       constants.ISPEC_SPINDLE_USE: spindle_use,
9915       }
9916
9917     group_info = self.cfg.GetNodeGroup(pnode.group)
9918     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9919     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9920     if not self.op.ignore_ipolicy and res:
9921       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9922                                   " policy: %s") % (pnode.group,
9923                                                     utils.CommaJoin(res)),
9924                                   errors.ECODE_INVAL)
9925
9926     if not self.adopt_disks:
9927       if self.op.disk_template == constants.DT_RBD:
9928         # _CheckRADOSFreeSpace() is just a placeholder.
9929         # Any function that checks prerequisites can be placed here.
9930         # Check if there is enough space on the RADOS cluster.
9931         _CheckRADOSFreeSpace()
9932       else:
9933         # Check lv size requirements, if not adopting
9934         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9935         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9936
9937     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9938       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9939                                 disk[constants.IDISK_ADOPT])
9940                      for disk in self.disks])
9941       if len(all_lvs) != len(self.disks):
9942         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9943                                    errors.ECODE_INVAL)
9944       for lv_name in all_lvs:
9945         try:
9946           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9947           # to ReserveLV uses the same syntax
9948           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9949         except errors.ReservationError:
9950           raise errors.OpPrereqError("LV named %s used by another instance" %
9951                                      lv_name, errors.ECODE_NOTUNIQUE)
9952
9953       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9954       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9955
9956       node_lvs = self.rpc.call_lv_list([pnode.name],
9957                                        vg_names.payload.keys())[pnode.name]
9958       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9959       node_lvs = node_lvs.payload
9960
9961       delta = all_lvs.difference(node_lvs.keys())
9962       if delta:
9963         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9964                                    utils.CommaJoin(delta),
9965                                    errors.ECODE_INVAL)
9966       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9967       if online_lvs:
9968         raise errors.OpPrereqError("Online logical volumes found, cannot"
9969                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9970                                    errors.ECODE_STATE)
9971       # update the size of disk based on what is found
9972       for dsk in self.disks:
9973         dsk[constants.IDISK_SIZE] = \
9974           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9975                                         dsk[constants.IDISK_ADOPT])][0]))
9976
9977     elif self.op.disk_template == constants.DT_BLOCK:
9978       # Normalize and de-duplicate device paths
9979       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9980                        for disk in self.disks])
9981       if len(all_disks) != len(self.disks):
9982         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9983                                    errors.ECODE_INVAL)
9984       baddisks = [d for d in all_disks
9985                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9986       if baddisks:
9987         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9988                                    " cannot be adopted" %
9989                                    (", ".join(baddisks),
9990                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9991                                    errors.ECODE_INVAL)
9992
9993       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9994                                             list(all_disks))[pnode.name]
9995       node_disks.Raise("Cannot get block device information from node %s" %
9996                        pnode.name)
9997       node_disks = node_disks.payload
9998       delta = all_disks.difference(node_disks.keys())
9999       if delta:
10000         raise errors.OpPrereqError("Missing block device(s): %s" %
10001                                    utils.CommaJoin(delta),
10002                                    errors.ECODE_INVAL)
10003       for dsk in self.disks:
10004         dsk[constants.IDISK_SIZE] = \
10005           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10006
10007     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10008
10009     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10010     # check OS parameters (remotely)
10011     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10012
10013     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10014
10015     # memory check on primary node
10016     #TODO(dynmem): use MINMEM for checking
10017     if self.op.start:
10018       _CheckNodeFreeMemory(self, self.pnode.name,
10019                            "creating instance %s" % self.op.instance_name,
10020                            self.be_full[constants.BE_MAXMEM],
10021                            self.op.hypervisor)
10022
10023     self.dry_run_result = list(nodenames)
10024
10025   def Exec(self, feedback_fn):
10026     """Create and add the instance to the cluster.
10027
10028     """
10029     instance = self.op.instance_name
10030     pnode_name = self.pnode.name
10031
10032     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10033                 self.owned_locks(locking.LEVEL_NODE)), \
10034       "Node locks differ from node resource locks"
10035
10036     ht_kind = self.op.hypervisor
10037     if ht_kind in constants.HTS_REQ_PORT:
10038       network_port = self.cfg.AllocatePort()
10039     else:
10040       network_port = None
10041
10042     # This is ugly but we got a chicken-egg problem here
10043     # We can only take the group disk parameters, as the instance
10044     # has no disks yet (we are generating them right here).
10045     node = self.cfg.GetNodeInfo(pnode_name)
10046     nodegroup = self.cfg.GetNodeGroup(node.group)
10047     disks = _GenerateDiskTemplate(self,
10048                                   self.op.disk_template,
10049                                   instance, pnode_name,
10050                                   self.secondaries,
10051                                   self.disks,
10052                                   self.instance_file_storage_dir,
10053                                   self.op.file_driver,
10054                                   0,
10055                                   feedback_fn,
10056                                   self.cfg.GetGroupDiskParams(nodegroup))
10057
10058     iobj = objects.Instance(name=instance, os=self.op.os_type,
10059                             primary_node=pnode_name,
10060                             nics=self.nics, disks=disks,
10061                             disk_template=self.op.disk_template,
10062                             admin_state=constants.ADMINST_DOWN,
10063                             network_port=network_port,
10064                             beparams=self.op.beparams,
10065                             hvparams=self.op.hvparams,
10066                             hypervisor=self.op.hypervisor,
10067                             osparams=self.op.osparams,
10068                             )
10069
10070     if self.op.tags:
10071       for tag in self.op.tags:
10072         iobj.AddTag(tag)
10073
10074     if self.adopt_disks:
10075       if self.op.disk_template == constants.DT_PLAIN:
10076         # rename LVs to the newly-generated names; we need to construct
10077         # 'fake' LV disks with the old data, plus the new unique_id
10078         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10079         rename_to = []
10080         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10081           rename_to.append(t_dsk.logical_id)
10082           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10083           self.cfg.SetDiskID(t_dsk, pnode_name)
10084         result = self.rpc.call_blockdev_rename(pnode_name,
10085                                                zip(tmp_disks, rename_to))
10086         result.Raise("Failed to rename adoped LVs")
10087     else:
10088       feedback_fn("* creating instance disks...")
10089       try:
10090         _CreateDisks(self, iobj)
10091       except errors.OpExecError:
10092         self.LogWarning("Device creation failed, reverting...")
10093         try:
10094           _RemoveDisks(self, iobj)
10095         finally:
10096           self.cfg.ReleaseDRBDMinors(instance)
10097           raise
10098
10099     feedback_fn("adding instance %s to cluster config" % instance)
10100
10101     self.cfg.AddInstance(iobj, self.proc.GetECId())
10102
10103     # Declare that we don't want to remove the instance lock anymore, as we've
10104     # added the instance to the config
10105     del self.remove_locks[locking.LEVEL_INSTANCE]
10106
10107     if self.op.mode == constants.INSTANCE_IMPORT:
10108       # Release unused nodes
10109       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10110     else:
10111       # Release all nodes
10112       _ReleaseLocks(self, locking.LEVEL_NODE)
10113
10114     disk_abort = False
10115     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10116       feedback_fn("* wiping instance disks...")
10117       try:
10118         _WipeDisks(self, iobj)
10119       except errors.OpExecError, err:
10120         logging.exception("Wiping disks failed")
10121         self.LogWarning("Wiping instance disks failed (%s)", err)
10122         disk_abort = True
10123
10124     if disk_abort:
10125       # Something is already wrong with the disks, don't do anything else
10126       pass
10127     elif self.op.wait_for_sync:
10128       disk_abort = not _WaitForSync(self, iobj)
10129     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10130       # make sure the disks are not degraded (still sync-ing is ok)
10131       feedback_fn("* checking mirrors status")
10132       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10133     else:
10134       disk_abort = False
10135
10136     if disk_abort:
10137       _RemoveDisks(self, iobj)
10138       self.cfg.RemoveInstance(iobj.name)
10139       # Make sure the instance lock gets removed
10140       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10141       raise errors.OpExecError("There are some degraded disks for"
10142                                " this instance")
10143
10144     # Release all node resource locks
10145     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10146
10147     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10148       # we need to set the disks ID to the primary node, since the
10149       # preceding code might or might have not done it, depending on
10150       # disk template and other options
10151       for disk in iobj.disks:
10152         self.cfg.SetDiskID(disk, pnode_name)
10153       if self.op.mode == constants.INSTANCE_CREATE:
10154         if not self.op.no_install:
10155           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10156                         not self.op.wait_for_sync)
10157           if pause_sync:
10158             feedback_fn("* pausing disk sync to install instance OS")
10159             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10160                                                               (iobj.disks,
10161                                                                iobj), True)
10162             for idx, success in enumerate(result.payload):
10163               if not success:
10164                 logging.warn("pause-sync of instance %s for disk %d failed",
10165                              instance, idx)
10166
10167           feedback_fn("* running the instance OS create scripts...")
10168           # FIXME: pass debug option from opcode to backend
10169           os_add_result = \
10170             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10171                                           self.op.debug_level)
10172           if pause_sync:
10173             feedback_fn("* resuming disk sync")
10174             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10175                                                               (iobj.disks,
10176                                                                iobj), False)
10177             for idx, success in enumerate(result.payload):
10178               if not success:
10179                 logging.warn("resume-sync of instance %s for disk %d failed",
10180                              instance, idx)
10181
10182           os_add_result.Raise("Could not add os for instance %s"
10183                               " on node %s" % (instance, pnode_name))
10184
10185       else:
10186         if self.op.mode == constants.INSTANCE_IMPORT:
10187           feedback_fn("* running the instance OS import scripts...")
10188
10189           transfers = []
10190
10191           for idx, image in enumerate(self.src_images):
10192             if not image:
10193               continue
10194
10195             # FIXME: pass debug option from opcode to backend
10196             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10197                                                constants.IEIO_FILE, (image, ),
10198                                                constants.IEIO_SCRIPT,
10199                                                (iobj.disks[idx], idx),
10200                                                None)
10201             transfers.append(dt)
10202
10203           import_result = \
10204             masterd.instance.TransferInstanceData(self, feedback_fn,
10205                                                   self.op.src_node, pnode_name,
10206                                                   self.pnode.secondary_ip,
10207                                                   iobj, transfers)
10208           if not compat.all(import_result):
10209             self.LogWarning("Some disks for instance %s on node %s were not"
10210                             " imported successfully" % (instance, pnode_name))
10211
10212           rename_from = self._old_instance_name
10213
10214         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10215           feedback_fn("* preparing remote import...")
10216           # The source cluster will stop the instance before attempting to make
10217           # a connection. In some cases stopping an instance can take a long
10218           # time, hence the shutdown timeout is added to the connection
10219           # timeout.
10220           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10221                              self.op.source_shutdown_timeout)
10222           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10223
10224           assert iobj.primary_node == self.pnode.name
10225           disk_results = \
10226             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10227                                           self.source_x509_ca,
10228                                           self._cds, timeouts)
10229           if not compat.all(disk_results):
10230             # TODO: Should the instance still be started, even if some disks
10231             # failed to import (valid for local imports, too)?
10232             self.LogWarning("Some disks for instance %s on node %s were not"
10233                             " imported successfully" % (instance, pnode_name))
10234
10235           rename_from = self.source_instance_name
10236
10237         else:
10238           # also checked in the prereq part
10239           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10240                                        % self.op.mode)
10241
10242         # Run rename script on newly imported instance
10243         assert iobj.name == instance
10244         feedback_fn("Running rename script for %s" % instance)
10245         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10246                                                    rename_from,
10247                                                    self.op.debug_level)
10248         if result.fail_msg:
10249           self.LogWarning("Failed to run rename script for %s on node"
10250                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10251
10252     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10253
10254     if self.op.start:
10255       iobj.admin_state = constants.ADMINST_UP
10256       self.cfg.Update(iobj, feedback_fn)
10257       logging.info("Starting instance %s on node %s", instance, pnode_name)
10258       feedback_fn("* starting instance...")
10259       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10260                                             False)
10261       result.Raise("Could not start instance")
10262
10263     return list(iobj.all_nodes)
10264
10265
10266 def _CheckRADOSFreeSpace():
10267   """Compute disk size requirements inside the RADOS cluster.
10268
10269   """
10270   # For the RADOS cluster we assume there is always enough space.
10271   pass
10272
10273
10274 class LUInstanceConsole(NoHooksLU):
10275   """Connect to an instance's console.
10276
10277   This is somewhat special in that it returns the command line that
10278   you need to run on the master node in order to connect to the
10279   console.
10280
10281   """
10282   REQ_BGL = False
10283
10284   def ExpandNames(self):
10285     self.share_locks = _ShareAll()
10286     self._ExpandAndLockInstance()
10287
10288   def CheckPrereq(self):
10289     """Check prerequisites.
10290
10291     This checks that the instance is in the cluster.
10292
10293     """
10294     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10295     assert self.instance is not None, \
10296       "Cannot retrieve locked instance %s" % self.op.instance_name
10297     _CheckNodeOnline(self, self.instance.primary_node)
10298
10299   def Exec(self, feedback_fn):
10300     """Connect to the console of an instance
10301
10302     """
10303     instance = self.instance
10304     node = instance.primary_node
10305
10306     node_insts = self.rpc.call_instance_list([node],
10307                                              [instance.hypervisor])[node]
10308     node_insts.Raise("Can't get node information from %s" % node)
10309
10310     if instance.name not in node_insts.payload:
10311       if instance.admin_state == constants.ADMINST_UP:
10312         state = constants.INSTST_ERRORDOWN
10313       elif instance.admin_state == constants.ADMINST_DOWN:
10314         state = constants.INSTST_ADMINDOWN
10315       else:
10316         state = constants.INSTST_ADMINOFFLINE
10317       raise errors.OpExecError("Instance %s is not running (state %s)" %
10318                                (instance.name, state))
10319
10320     logging.debug("Connecting to console of %s on %s", instance.name, node)
10321
10322     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10323
10324
10325 def _GetInstanceConsole(cluster, instance):
10326   """Returns console information for an instance.
10327
10328   @type cluster: L{objects.Cluster}
10329   @type instance: L{objects.Instance}
10330   @rtype: dict
10331
10332   """
10333   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10334   # beparams and hvparams are passed separately, to avoid editing the
10335   # instance and then saving the defaults in the instance itself.
10336   hvparams = cluster.FillHV(instance)
10337   beparams = cluster.FillBE(instance)
10338   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10339
10340   assert console.instance == instance.name
10341   assert console.Validate()
10342
10343   return console.ToDict()
10344
10345
10346 class LUInstanceReplaceDisks(LogicalUnit):
10347   """Replace the disks of an instance.
10348
10349   """
10350   HPATH = "mirrors-replace"
10351   HTYPE = constants.HTYPE_INSTANCE
10352   REQ_BGL = False
10353
10354   def CheckArguments(self):
10355     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10356                                   self.op.iallocator)
10357
10358   def ExpandNames(self):
10359     self._ExpandAndLockInstance()
10360
10361     assert locking.LEVEL_NODE not in self.needed_locks
10362     assert locking.LEVEL_NODE_RES not in self.needed_locks
10363     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10364
10365     assert self.op.iallocator is None or self.op.remote_node is None, \
10366       "Conflicting options"
10367
10368     if self.op.remote_node is not None:
10369       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10370
10371       # Warning: do not remove the locking of the new secondary here
10372       # unless DRBD8.AddChildren is changed to work in parallel;
10373       # currently it doesn't since parallel invocations of
10374       # FindUnusedMinor will conflict
10375       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10376       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10377     else:
10378       self.needed_locks[locking.LEVEL_NODE] = []
10379       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10380
10381       if self.op.iallocator is not None:
10382         # iallocator will select a new node in the same group
10383         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10384
10385     self.needed_locks[locking.LEVEL_NODE_RES] = []
10386
10387     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10388                                    self.op.iallocator, self.op.remote_node,
10389                                    self.op.disks, False, self.op.early_release,
10390                                    self.op.ignore_ipolicy)
10391
10392     self.tasklets = [self.replacer]
10393
10394   def DeclareLocks(self, level):
10395     if level == locking.LEVEL_NODEGROUP:
10396       assert self.op.remote_node is None
10397       assert self.op.iallocator is not None
10398       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10399
10400       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10401       # Lock all groups used by instance optimistically; this requires going
10402       # via the node before it's locked, requiring verification later on
10403       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10404         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10405
10406     elif level == locking.LEVEL_NODE:
10407       if self.op.iallocator is not None:
10408         assert self.op.remote_node is None
10409         assert not self.needed_locks[locking.LEVEL_NODE]
10410
10411         # Lock member nodes of all locked groups
10412         self.needed_locks[locking.LEVEL_NODE] = [node_name
10413           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10414           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10415       else:
10416         self._LockInstancesNodes()
10417     elif level == locking.LEVEL_NODE_RES:
10418       # Reuse node locks
10419       self.needed_locks[locking.LEVEL_NODE_RES] = \
10420         self.needed_locks[locking.LEVEL_NODE]
10421
10422   def BuildHooksEnv(self):
10423     """Build hooks env.
10424
10425     This runs on the master, the primary and all the secondaries.
10426
10427     """
10428     instance = self.replacer.instance
10429     env = {
10430       "MODE": self.op.mode,
10431       "NEW_SECONDARY": self.op.remote_node,
10432       "OLD_SECONDARY": instance.secondary_nodes[0],
10433       }
10434     env.update(_BuildInstanceHookEnvByObject(self, instance))
10435     return env
10436
10437   def BuildHooksNodes(self):
10438     """Build hooks nodes.
10439
10440     """
10441     instance = self.replacer.instance
10442     nl = [
10443       self.cfg.GetMasterNode(),
10444       instance.primary_node,
10445       ]
10446     if self.op.remote_node is not None:
10447       nl.append(self.op.remote_node)
10448     return nl, nl
10449
10450   def CheckPrereq(self):
10451     """Check prerequisites.
10452
10453     """
10454     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10455             self.op.iallocator is None)
10456
10457     # Verify if node group locks are still correct
10458     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10459     if owned_groups:
10460       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10461
10462     return LogicalUnit.CheckPrereq(self)
10463
10464
10465 class TLReplaceDisks(Tasklet):
10466   """Replaces disks for an instance.
10467
10468   Note: Locking is not within the scope of this class.
10469
10470   """
10471   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10472                disks, delay_iallocator, early_release, ignore_ipolicy):
10473     """Initializes this class.
10474
10475     """
10476     Tasklet.__init__(self, lu)
10477
10478     # Parameters
10479     self.instance_name = instance_name
10480     self.mode = mode
10481     self.iallocator_name = iallocator_name
10482     self.remote_node = remote_node
10483     self.disks = disks
10484     self.delay_iallocator = delay_iallocator
10485     self.early_release = early_release
10486     self.ignore_ipolicy = ignore_ipolicy
10487
10488     # Runtime data
10489     self.instance = None
10490     self.new_node = None
10491     self.target_node = None
10492     self.other_node = None
10493     self.remote_node_info = None
10494     self.node_secondary_ip = None
10495
10496   @staticmethod
10497   def CheckArguments(mode, remote_node, iallocator):
10498     """Helper function for users of this class.
10499
10500     """
10501     # check for valid parameter combination
10502     if mode == constants.REPLACE_DISK_CHG:
10503       if remote_node is None and iallocator is None:
10504         raise errors.OpPrereqError("When changing the secondary either an"
10505                                    " iallocator script must be used or the"
10506                                    " new node given", errors.ECODE_INVAL)
10507
10508       if remote_node is not None and iallocator is not None:
10509         raise errors.OpPrereqError("Give either the iallocator or the new"
10510                                    " secondary, not both", errors.ECODE_INVAL)
10511
10512     elif remote_node is not None or iallocator is not None:
10513       # Not replacing the secondary
10514       raise errors.OpPrereqError("The iallocator and new node options can"
10515                                  " only be used when changing the"
10516                                  " secondary node", errors.ECODE_INVAL)
10517
10518   @staticmethod
10519   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10520     """Compute a new secondary node using an IAllocator.
10521
10522     """
10523     ial = IAllocator(lu.cfg, lu.rpc,
10524                      mode=constants.IALLOCATOR_MODE_RELOC,
10525                      name=instance_name,
10526                      relocate_from=list(relocate_from))
10527
10528     ial.Run(iallocator_name)
10529
10530     if not ial.success:
10531       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10532                                  " %s" % (iallocator_name, ial.info),
10533                                  errors.ECODE_NORES)
10534
10535     if len(ial.result) != ial.required_nodes:
10536       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10537                                  " of nodes (%s), required %s" %
10538                                  (iallocator_name,
10539                                   len(ial.result), ial.required_nodes),
10540                                  errors.ECODE_FAULT)
10541
10542     remote_node_name = ial.result[0]
10543
10544     lu.LogInfo("Selected new secondary for instance '%s': %s",
10545                instance_name, remote_node_name)
10546
10547     return remote_node_name
10548
10549   def _FindFaultyDisks(self, node_name):
10550     """Wrapper for L{_FindFaultyInstanceDisks}.
10551
10552     """
10553     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10554                                     node_name, True)
10555
10556   def _CheckDisksActivated(self, instance):
10557     """Checks if the instance disks are activated.
10558
10559     @param instance: The instance to check disks
10560     @return: True if they are activated, False otherwise
10561
10562     """
10563     nodes = instance.all_nodes
10564
10565     for idx, dev in enumerate(instance.disks):
10566       for node in nodes:
10567         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10568         self.cfg.SetDiskID(dev, node)
10569
10570         result = _BlockdevFind(self, node, dev, instance)
10571
10572         if result.offline:
10573           continue
10574         elif result.fail_msg or not result.payload:
10575           return False
10576
10577     return True
10578
10579   def CheckPrereq(self):
10580     """Check prerequisites.
10581
10582     This checks that the instance is in the cluster.
10583
10584     """
10585     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10586     assert instance is not None, \
10587       "Cannot retrieve locked instance %s" % self.instance_name
10588
10589     if instance.disk_template != constants.DT_DRBD8:
10590       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10591                                  " instances", errors.ECODE_INVAL)
10592
10593     if len(instance.secondary_nodes) != 1:
10594       raise errors.OpPrereqError("The instance has a strange layout,"
10595                                  " expected one secondary but found %d" %
10596                                  len(instance.secondary_nodes),
10597                                  errors.ECODE_FAULT)
10598
10599     if not self.delay_iallocator:
10600       self._CheckPrereq2()
10601
10602   def _CheckPrereq2(self):
10603     """Check prerequisites, second part.
10604
10605     This function should always be part of CheckPrereq. It was separated and is
10606     now called from Exec because during node evacuation iallocator was only
10607     called with an unmodified cluster model, not taking planned changes into
10608     account.
10609
10610     """
10611     instance = self.instance
10612     secondary_node = instance.secondary_nodes[0]
10613
10614     if self.iallocator_name is None:
10615       remote_node = self.remote_node
10616     else:
10617       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10618                                        instance.name, instance.secondary_nodes)
10619
10620     if remote_node is None:
10621       self.remote_node_info = None
10622     else:
10623       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10624              "Remote node '%s' is not locked" % remote_node
10625
10626       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10627       assert self.remote_node_info is not None, \
10628         "Cannot retrieve locked node %s" % remote_node
10629
10630     if remote_node == self.instance.primary_node:
10631       raise errors.OpPrereqError("The specified node is the primary node of"
10632                                  " the instance", errors.ECODE_INVAL)
10633
10634     if remote_node == secondary_node:
10635       raise errors.OpPrereqError("The specified node is already the"
10636                                  " secondary node of the instance",
10637                                  errors.ECODE_INVAL)
10638
10639     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10640                                     constants.REPLACE_DISK_CHG):
10641       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10642                                  errors.ECODE_INVAL)
10643
10644     if self.mode == constants.REPLACE_DISK_AUTO:
10645       if not self._CheckDisksActivated(instance):
10646         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10647                                    " first" % self.instance_name,
10648                                    errors.ECODE_STATE)
10649       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10650       faulty_secondary = self._FindFaultyDisks(secondary_node)
10651
10652       if faulty_primary and faulty_secondary:
10653         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10654                                    " one node and can not be repaired"
10655                                    " automatically" % self.instance_name,
10656                                    errors.ECODE_STATE)
10657
10658       if faulty_primary:
10659         self.disks = faulty_primary
10660         self.target_node = instance.primary_node
10661         self.other_node = secondary_node
10662         check_nodes = [self.target_node, self.other_node]
10663       elif faulty_secondary:
10664         self.disks = faulty_secondary
10665         self.target_node = secondary_node
10666         self.other_node = instance.primary_node
10667         check_nodes = [self.target_node, self.other_node]
10668       else:
10669         self.disks = []
10670         check_nodes = []
10671
10672     else:
10673       # Non-automatic modes
10674       if self.mode == constants.REPLACE_DISK_PRI:
10675         self.target_node = instance.primary_node
10676         self.other_node = secondary_node
10677         check_nodes = [self.target_node, self.other_node]
10678
10679       elif self.mode == constants.REPLACE_DISK_SEC:
10680         self.target_node = secondary_node
10681         self.other_node = instance.primary_node
10682         check_nodes = [self.target_node, self.other_node]
10683
10684       elif self.mode == constants.REPLACE_DISK_CHG:
10685         self.new_node = remote_node
10686         self.other_node = instance.primary_node
10687         self.target_node = secondary_node
10688         check_nodes = [self.new_node, self.other_node]
10689
10690         _CheckNodeNotDrained(self.lu, remote_node)
10691         _CheckNodeVmCapable(self.lu, remote_node)
10692
10693         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10694         assert old_node_info is not None
10695         if old_node_info.offline and not self.early_release:
10696           # doesn't make sense to delay the release
10697           self.early_release = True
10698           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10699                           " early-release mode", secondary_node)
10700
10701       else:
10702         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10703                                      self.mode)
10704
10705       # If not specified all disks should be replaced
10706       if not self.disks:
10707         self.disks = range(len(self.instance.disks))
10708
10709     # TODO: This is ugly, but right now we can't distinguish between internal
10710     # submitted opcode and external one. We should fix that.
10711     if self.remote_node_info:
10712       # We change the node, lets verify it still meets instance policy
10713       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10714       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10715                                        new_group_info)
10716       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10717                               ignore=self.ignore_ipolicy)
10718
10719     for node in check_nodes:
10720       _CheckNodeOnline(self.lu, node)
10721
10722     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10723                                                           self.other_node,
10724                                                           self.target_node]
10725                               if node_name is not None)
10726
10727     # Release unneeded node and node resource locks
10728     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10729     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10730
10731     # Release any owned node group
10732     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10733       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10734
10735     # Check whether disks are valid
10736     for disk_idx in self.disks:
10737       instance.FindDisk(disk_idx)
10738
10739     # Get secondary node IP addresses
10740     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10741                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10742
10743   def Exec(self, feedback_fn):
10744     """Execute disk replacement.
10745
10746     This dispatches the disk replacement to the appropriate handler.
10747
10748     """
10749     if self.delay_iallocator:
10750       self._CheckPrereq2()
10751
10752     if __debug__:
10753       # Verify owned locks before starting operation
10754       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10755       assert set(owned_nodes) == set(self.node_secondary_ip), \
10756           ("Incorrect node locks, owning %s, expected %s" %
10757            (owned_nodes, self.node_secondary_ip.keys()))
10758       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10759               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10760
10761       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10762       assert list(owned_instances) == [self.instance_name], \
10763           "Instance '%s' not locked" % self.instance_name
10764
10765       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10766           "Should not own any node group lock at this point"
10767
10768     if not self.disks:
10769       feedback_fn("No disks need replacement")
10770       return
10771
10772     feedback_fn("Replacing disk(s) %s for %s" %
10773                 (utils.CommaJoin(self.disks), self.instance.name))
10774
10775     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10776
10777     # Activate the instance disks if we're replacing them on a down instance
10778     if activate_disks:
10779       _StartInstanceDisks(self.lu, self.instance, True)
10780
10781     try:
10782       # Should we replace the secondary node?
10783       if self.new_node is not None:
10784         fn = self._ExecDrbd8Secondary
10785       else:
10786         fn = self._ExecDrbd8DiskOnly
10787
10788       result = fn(feedback_fn)
10789     finally:
10790       # Deactivate the instance disks if we're replacing them on a
10791       # down instance
10792       if activate_disks:
10793         _SafeShutdownInstanceDisks(self.lu, self.instance)
10794
10795     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10796
10797     if __debug__:
10798       # Verify owned locks
10799       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10800       nodes = frozenset(self.node_secondary_ip)
10801       assert ((self.early_release and not owned_nodes) or
10802               (not self.early_release and not (set(owned_nodes) - nodes))), \
10803         ("Not owning the correct locks, early_release=%s, owned=%r,"
10804          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10805
10806     return result
10807
10808   def _CheckVolumeGroup(self, nodes):
10809     self.lu.LogInfo("Checking volume groups")
10810
10811     vgname = self.cfg.GetVGName()
10812
10813     # Make sure volume group exists on all involved nodes
10814     results = self.rpc.call_vg_list(nodes)
10815     if not results:
10816       raise errors.OpExecError("Can't list volume groups on the nodes")
10817
10818     for node in nodes:
10819       res = results[node]
10820       res.Raise("Error checking node %s" % node)
10821       if vgname not in res.payload:
10822         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10823                                  (vgname, node))
10824
10825   def _CheckDisksExistence(self, nodes):
10826     # Check disk existence
10827     for idx, dev in enumerate(self.instance.disks):
10828       if idx not in self.disks:
10829         continue
10830
10831       for node in nodes:
10832         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10833         self.cfg.SetDiskID(dev, node)
10834
10835         result = _BlockdevFind(self, node, dev, self.instance)
10836
10837         msg = result.fail_msg
10838         if msg or not result.payload:
10839           if not msg:
10840             msg = "disk not found"
10841           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10842                                    (idx, node, msg))
10843
10844   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10845     for idx, dev in enumerate(self.instance.disks):
10846       if idx not in self.disks:
10847         continue
10848
10849       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10850                       (idx, node_name))
10851
10852       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10853                                    on_primary, ldisk=ldisk):
10854         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10855                                  " replace disks for instance %s" %
10856                                  (node_name, self.instance.name))
10857
10858   def _CreateNewStorage(self, node_name):
10859     """Create new storage on the primary or secondary node.
10860
10861     This is only used for same-node replaces, not for changing the
10862     secondary node, hence we don't want to modify the existing disk.
10863
10864     """
10865     iv_names = {}
10866
10867     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10868     for idx, dev in enumerate(disks):
10869       if idx not in self.disks:
10870         continue
10871
10872       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10873
10874       self.cfg.SetDiskID(dev, node_name)
10875
10876       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10877       names = _GenerateUniqueNames(self.lu, lv_names)
10878
10879       (data_disk, meta_disk) = dev.children
10880       vg_data = data_disk.logical_id[0]
10881       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10882                              logical_id=(vg_data, names[0]),
10883                              params=data_disk.params)
10884       vg_meta = meta_disk.logical_id[0]
10885       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10886                              logical_id=(vg_meta, names[1]),
10887                              params=meta_disk.params)
10888
10889       new_lvs = [lv_data, lv_meta]
10890       old_lvs = [child.Copy() for child in dev.children]
10891       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10892
10893       # we pass force_create=True to force the LVM creation
10894       for new_lv in new_lvs:
10895         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10896                              _GetInstanceInfoText(self.instance), False)
10897
10898     return iv_names
10899
10900   def _CheckDevices(self, node_name, iv_names):
10901     for name, (dev, _, _) in iv_names.iteritems():
10902       self.cfg.SetDiskID(dev, node_name)
10903
10904       result = _BlockdevFind(self, node_name, dev, self.instance)
10905
10906       msg = result.fail_msg
10907       if msg or not result.payload:
10908         if not msg:
10909           msg = "disk not found"
10910         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10911                                  (name, msg))
10912
10913       if result.payload.is_degraded:
10914         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10915
10916   def _RemoveOldStorage(self, node_name, iv_names):
10917     for name, (_, old_lvs, _) in iv_names.iteritems():
10918       self.lu.LogInfo("Remove logical volumes for %s" % name)
10919
10920       for lv in old_lvs:
10921         self.cfg.SetDiskID(lv, node_name)
10922
10923         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10924         if msg:
10925           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10926                              hint="remove unused LVs manually")
10927
10928   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10929     """Replace a disk on the primary or secondary for DRBD 8.
10930
10931     The algorithm for replace is quite complicated:
10932
10933       1. for each disk to be replaced:
10934
10935         1. create new LVs on the target node with unique names
10936         1. detach old LVs from the drbd device
10937         1. rename old LVs to name_replaced.<time_t>
10938         1. rename new LVs to old LVs
10939         1. attach the new LVs (with the old names now) to the drbd device
10940
10941       1. wait for sync across all devices
10942
10943       1. for each modified disk:
10944
10945         1. remove old LVs (which have the name name_replaces.<time_t>)
10946
10947     Failures are not very well handled.
10948
10949     """
10950     steps_total = 6
10951
10952     # Step: check device activation
10953     self.lu.LogStep(1, steps_total, "Check device existence")
10954     self._CheckDisksExistence([self.other_node, self.target_node])
10955     self._CheckVolumeGroup([self.target_node, self.other_node])
10956
10957     # Step: check other node consistency
10958     self.lu.LogStep(2, steps_total, "Check peer consistency")
10959     self._CheckDisksConsistency(self.other_node,
10960                                 self.other_node == self.instance.primary_node,
10961                                 False)
10962
10963     # Step: create new storage
10964     self.lu.LogStep(3, steps_total, "Allocate new storage")
10965     iv_names = self._CreateNewStorage(self.target_node)
10966
10967     # Step: for each lv, detach+rename*2+attach
10968     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10969     for dev, old_lvs, new_lvs in iv_names.itervalues():
10970       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10971
10972       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10973                                                      old_lvs)
10974       result.Raise("Can't detach drbd from local storage on node"
10975                    " %s for device %s" % (self.target_node, dev.iv_name))
10976       #dev.children = []
10977       #cfg.Update(instance)
10978
10979       # ok, we created the new LVs, so now we know we have the needed
10980       # storage; as such, we proceed on the target node to rename
10981       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10982       # using the assumption that logical_id == physical_id (which in
10983       # turn is the unique_id on that node)
10984
10985       # FIXME(iustin): use a better name for the replaced LVs
10986       temp_suffix = int(time.time())
10987       ren_fn = lambda d, suff: (d.physical_id[0],
10988                                 d.physical_id[1] + "_replaced-%s" % suff)
10989
10990       # Build the rename list based on what LVs exist on the node
10991       rename_old_to_new = []
10992       for to_ren in old_lvs:
10993         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10994         if not result.fail_msg and result.payload:
10995           # device exists
10996           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10997
10998       self.lu.LogInfo("Renaming the old LVs on the target node")
10999       result = self.rpc.call_blockdev_rename(self.target_node,
11000                                              rename_old_to_new)
11001       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11002
11003       # Now we rename the new LVs to the old LVs
11004       self.lu.LogInfo("Renaming the new LVs on the target node")
11005       rename_new_to_old = [(new, old.physical_id)
11006                            for old, new in zip(old_lvs, new_lvs)]
11007       result = self.rpc.call_blockdev_rename(self.target_node,
11008                                              rename_new_to_old)
11009       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11010
11011       # Intermediate steps of in memory modifications
11012       for old, new in zip(old_lvs, new_lvs):
11013         new.logical_id = old.logical_id
11014         self.cfg.SetDiskID(new, self.target_node)
11015
11016       # We need to modify old_lvs so that removal later removes the
11017       # right LVs, not the newly added ones; note that old_lvs is a
11018       # copy here
11019       for disk in old_lvs:
11020         disk.logical_id = ren_fn(disk, temp_suffix)
11021         self.cfg.SetDiskID(disk, self.target_node)
11022
11023       # Now that the new lvs have the old name, we can add them to the device
11024       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11025       result = self.rpc.call_blockdev_addchildren(self.target_node,
11026                                                   (dev, self.instance), new_lvs)
11027       msg = result.fail_msg
11028       if msg:
11029         for new_lv in new_lvs:
11030           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11031                                                new_lv).fail_msg
11032           if msg2:
11033             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11034                                hint=("cleanup manually the unused logical"
11035                                      "volumes"))
11036         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11037
11038     cstep = itertools.count(5)
11039
11040     if self.early_release:
11041       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11042       self._RemoveOldStorage(self.target_node, iv_names)
11043       # TODO: Check if releasing locks early still makes sense
11044       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11045     else:
11046       # Release all resource locks except those used by the instance
11047       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11048                     keep=self.node_secondary_ip.keys())
11049
11050     # Release all node locks while waiting for sync
11051     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11052
11053     # TODO: Can the instance lock be downgraded here? Take the optional disk
11054     # shutdown in the caller into consideration.
11055
11056     # Wait for sync
11057     # This can fail as the old devices are degraded and _WaitForSync
11058     # does a combined result over all disks, so we don't check its return value
11059     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11060     _WaitForSync(self.lu, self.instance)
11061
11062     # Check all devices manually
11063     self._CheckDevices(self.instance.primary_node, iv_names)
11064
11065     # Step: remove old storage
11066     if not self.early_release:
11067       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11068       self._RemoveOldStorage(self.target_node, iv_names)
11069
11070   def _ExecDrbd8Secondary(self, feedback_fn):
11071     """Replace the secondary node for DRBD 8.
11072
11073     The algorithm for replace is quite complicated:
11074       - for all disks of the instance:
11075         - create new LVs on the new node with same names
11076         - shutdown the drbd device on the old secondary
11077         - disconnect the drbd network on the primary
11078         - create the drbd device on the new secondary
11079         - network attach the drbd on the primary, using an artifice:
11080           the drbd code for Attach() will connect to the network if it
11081           finds a device which is connected to the good local disks but
11082           not network enabled
11083       - wait for sync across all devices
11084       - remove all disks from the old secondary
11085
11086     Failures are not very well handled.
11087
11088     """
11089     steps_total = 6
11090
11091     pnode = self.instance.primary_node
11092
11093     # Step: check device activation
11094     self.lu.LogStep(1, steps_total, "Check device existence")
11095     self._CheckDisksExistence([self.instance.primary_node])
11096     self._CheckVolumeGroup([self.instance.primary_node])
11097
11098     # Step: check other node consistency
11099     self.lu.LogStep(2, steps_total, "Check peer consistency")
11100     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11101
11102     # Step: create new storage
11103     self.lu.LogStep(3, steps_total, "Allocate new storage")
11104     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11105     for idx, dev in enumerate(disks):
11106       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11107                       (self.new_node, idx))
11108       # we pass force_create=True to force LVM creation
11109       for new_lv in dev.children:
11110         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11111                              True, _GetInstanceInfoText(self.instance), False)
11112
11113     # Step 4: dbrd minors and drbd setups changes
11114     # after this, we must manually remove the drbd minors on both the
11115     # error and the success paths
11116     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11117     minors = self.cfg.AllocateDRBDMinor([self.new_node
11118                                          for dev in self.instance.disks],
11119                                         self.instance.name)
11120     logging.debug("Allocated minors %r", minors)
11121
11122     iv_names = {}
11123     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11124       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11125                       (self.new_node, idx))
11126       # create new devices on new_node; note that we create two IDs:
11127       # one without port, so the drbd will be activated without
11128       # networking information on the new node at this stage, and one
11129       # with network, for the latter activation in step 4
11130       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11131       if self.instance.primary_node == o_node1:
11132         p_minor = o_minor1
11133       else:
11134         assert self.instance.primary_node == o_node2, "Three-node instance?"
11135         p_minor = o_minor2
11136
11137       new_alone_id = (self.instance.primary_node, self.new_node, None,
11138                       p_minor, new_minor, o_secret)
11139       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11140                     p_minor, new_minor, o_secret)
11141
11142       iv_names[idx] = (dev, dev.children, new_net_id)
11143       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11144                     new_net_id)
11145       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11146                               logical_id=new_alone_id,
11147                               children=dev.children,
11148                               size=dev.size,
11149                               params={})
11150       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11151                                              self.cfg)
11152       try:
11153         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11154                               anno_new_drbd,
11155                               _GetInstanceInfoText(self.instance), False)
11156       except errors.GenericError:
11157         self.cfg.ReleaseDRBDMinors(self.instance.name)
11158         raise
11159
11160     # We have new devices, shutdown the drbd on the old secondary
11161     for idx, dev in enumerate(self.instance.disks):
11162       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11163       self.cfg.SetDiskID(dev, self.target_node)
11164       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11165                                             (dev, self.instance)).fail_msg
11166       if msg:
11167         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11168                            "node: %s" % (idx, msg),
11169                            hint=("Please cleanup this device manually as"
11170                                  " soon as possible"))
11171
11172     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11173     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11174                                                self.instance.disks)[pnode]
11175
11176     msg = result.fail_msg
11177     if msg:
11178       # detaches didn't succeed (unlikely)
11179       self.cfg.ReleaseDRBDMinors(self.instance.name)
11180       raise errors.OpExecError("Can't detach the disks from the network on"
11181                                " old node: %s" % (msg,))
11182
11183     # if we managed to detach at least one, we update all the disks of
11184     # the instance to point to the new secondary
11185     self.lu.LogInfo("Updating instance configuration")
11186     for dev, _, new_logical_id in iv_names.itervalues():
11187       dev.logical_id = new_logical_id
11188       self.cfg.SetDiskID(dev, self.instance.primary_node)
11189
11190     self.cfg.Update(self.instance, feedback_fn)
11191
11192     # Release all node locks (the configuration has been updated)
11193     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11194
11195     # and now perform the drbd attach
11196     self.lu.LogInfo("Attaching primary drbds to new secondary"
11197                     " (standalone => connected)")
11198     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11199                                             self.new_node],
11200                                            self.node_secondary_ip,
11201                                            (self.instance.disks, self.instance),
11202                                            self.instance.name,
11203                                            False)
11204     for to_node, to_result in result.items():
11205       msg = to_result.fail_msg
11206       if msg:
11207         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11208                            to_node, msg,
11209                            hint=("please do a gnt-instance info to see the"
11210                                  " status of disks"))
11211
11212     cstep = itertools.count(5)
11213
11214     if self.early_release:
11215       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11216       self._RemoveOldStorage(self.target_node, iv_names)
11217       # TODO: Check if releasing locks early still makes sense
11218       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11219     else:
11220       # Release all resource locks except those used by the instance
11221       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11222                     keep=self.node_secondary_ip.keys())
11223
11224     # TODO: Can the instance lock be downgraded here? Take the optional disk
11225     # shutdown in the caller into consideration.
11226
11227     # Wait for sync
11228     # This can fail as the old devices are degraded and _WaitForSync
11229     # does a combined result over all disks, so we don't check its return value
11230     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11231     _WaitForSync(self.lu, self.instance)
11232
11233     # Check all devices manually
11234     self._CheckDevices(self.instance.primary_node, iv_names)
11235
11236     # Step: remove old storage
11237     if not self.early_release:
11238       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11239       self._RemoveOldStorage(self.target_node, iv_names)
11240
11241
11242 class LURepairNodeStorage(NoHooksLU):
11243   """Repairs the volume group on a node.
11244
11245   """
11246   REQ_BGL = False
11247
11248   def CheckArguments(self):
11249     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11250
11251     storage_type = self.op.storage_type
11252
11253     if (constants.SO_FIX_CONSISTENCY not in
11254         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11255       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11256                                  " repaired" % storage_type,
11257                                  errors.ECODE_INVAL)
11258
11259   def ExpandNames(self):
11260     self.needed_locks = {
11261       locking.LEVEL_NODE: [self.op.node_name],
11262       }
11263
11264   def _CheckFaultyDisks(self, instance, node_name):
11265     """Ensure faulty disks abort the opcode or at least warn."""
11266     try:
11267       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11268                                   node_name, True):
11269         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11270                                    " node '%s'" % (instance.name, node_name),
11271                                    errors.ECODE_STATE)
11272     except errors.OpPrereqError, err:
11273       if self.op.ignore_consistency:
11274         self.proc.LogWarning(str(err.args[0]))
11275       else:
11276         raise
11277
11278   def CheckPrereq(self):
11279     """Check prerequisites.
11280
11281     """
11282     # Check whether any instance on this node has faulty disks
11283     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11284       if inst.admin_state != constants.ADMINST_UP:
11285         continue
11286       check_nodes = set(inst.all_nodes)
11287       check_nodes.discard(self.op.node_name)
11288       for inst_node_name in check_nodes:
11289         self._CheckFaultyDisks(inst, inst_node_name)
11290
11291   def Exec(self, feedback_fn):
11292     feedback_fn("Repairing storage unit '%s' on %s ..." %
11293                 (self.op.name, self.op.node_name))
11294
11295     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11296     result = self.rpc.call_storage_execute(self.op.node_name,
11297                                            self.op.storage_type, st_args,
11298                                            self.op.name,
11299                                            constants.SO_FIX_CONSISTENCY)
11300     result.Raise("Failed to repair storage unit '%s' on %s" %
11301                  (self.op.name, self.op.node_name))
11302
11303
11304 class LUNodeEvacuate(NoHooksLU):
11305   """Evacuates instances off a list of nodes.
11306
11307   """
11308   REQ_BGL = False
11309
11310   _MODE2IALLOCATOR = {
11311     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11312     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11313     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11314     }
11315   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11316   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11317           constants.IALLOCATOR_NEVAC_MODES)
11318
11319   def CheckArguments(self):
11320     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11321
11322   def ExpandNames(self):
11323     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11324
11325     if self.op.remote_node is not None:
11326       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11327       assert self.op.remote_node
11328
11329       if self.op.remote_node == self.op.node_name:
11330         raise errors.OpPrereqError("Can not use evacuated node as a new"
11331                                    " secondary node", errors.ECODE_INVAL)
11332
11333       if self.op.mode != constants.NODE_EVAC_SEC:
11334         raise errors.OpPrereqError("Without the use of an iallocator only"
11335                                    " secondary instances can be evacuated",
11336                                    errors.ECODE_INVAL)
11337
11338     # Declare locks
11339     self.share_locks = _ShareAll()
11340     self.needed_locks = {
11341       locking.LEVEL_INSTANCE: [],
11342       locking.LEVEL_NODEGROUP: [],
11343       locking.LEVEL_NODE: [],
11344       }
11345
11346     # Determine nodes (via group) optimistically, needs verification once locks
11347     # have been acquired
11348     self.lock_nodes = self._DetermineNodes()
11349
11350   def _DetermineNodes(self):
11351     """Gets the list of nodes to operate on.
11352
11353     """
11354     if self.op.remote_node is None:
11355       # Iallocator will choose any node(s) in the same group
11356       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11357     else:
11358       group_nodes = frozenset([self.op.remote_node])
11359
11360     # Determine nodes to be locked
11361     return set([self.op.node_name]) | group_nodes
11362
11363   def _DetermineInstances(self):
11364     """Builds list of instances to operate on.
11365
11366     """
11367     assert self.op.mode in constants.NODE_EVAC_MODES
11368
11369     if self.op.mode == constants.NODE_EVAC_PRI:
11370       # Primary instances only
11371       inst_fn = _GetNodePrimaryInstances
11372       assert self.op.remote_node is None, \
11373         "Evacuating primary instances requires iallocator"
11374     elif self.op.mode == constants.NODE_EVAC_SEC:
11375       # Secondary instances only
11376       inst_fn = _GetNodeSecondaryInstances
11377     else:
11378       # All instances
11379       assert self.op.mode == constants.NODE_EVAC_ALL
11380       inst_fn = _GetNodeInstances
11381       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11382       # per instance
11383       raise errors.OpPrereqError("Due to an issue with the iallocator"
11384                                  " interface it is not possible to evacuate"
11385                                  " all instances at once; specify explicitly"
11386                                  " whether to evacuate primary or secondary"
11387                                  " instances",
11388                                  errors.ECODE_INVAL)
11389
11390     return inst_fn(self.cfg, self.op.node_name)
11391
11392   def DeclareLocks(self, level):
11393     if level == locking.LEVEL_INSTANCE:
11394       # Lock instances optimistically, needs verification once node and group
11395       # locks have been acquired
11396       self.needed_locks[locking.LEVEL_INSTANCE] = \
11397         set(i.name for i in self._DetermineInstances())
11398
11399     elif level == locking.LEVEL_NODEGROUP:
11400       # Lock node groups for all potential target nodes optimistically, needs
11401       # verification once nodes have been acquired
11402       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11403         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11404
11405     elif level == locking.LEVEL_NODE:
11406       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11407
11408   def CheckPrereq(self):
11409     # Verify locks
11410     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11411     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11412     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11413
11414     need_nodes = self._DetermineNodes()
11415
11416     if not owned_nodes.issuperset(need_nodes):
11417       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11418                                  " locks were acquired, current nodes are"
11419                                  " are '%s', used to be '%s'; retry the"
11420                                  " operation" %
11421                                  (self.op.node_name,
11422                                   utils.CommaJoin(need_nodes),
11423                                   utils.CommaJoin(owned_nodes)),
11424                                  errors.ECODE_STATE)
11425
11426     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11427     if owned_groups != wanted_groups:
11428       raise errors.OpExecError("Node groups changed since locks were acquired,"
11429                                " current groups are '%s', used to be '%s';"
11430                                " retry the operation" %
11431                                (utils.CommaJoin(wanted_groups),
11432                                 utils.CommaJoin(owned_groups)))
11433
11434     # Determine affected instances
11435     self.instances = self._DetermineInstances()
11436     self.instance_names = [i.name for i in self.instances]
11437
11438     if set(self.instance_names) != owned_instances:
11439       raise errors.OpExecError("Instances on node '%s' changed since locks"
11440                                " were acquired, current instances are '%s',"
11441                                " used to be '%s'; retry the operation" %
11442                                (self.op.node_name,
11443                                 utils.CommaJoin(self.instance_names),
11444                                 utils.CommaJoin(owned_instances)))
11445
11446     if self.instance_names:
11447       self.LogInfo("Evacuating instances from node '%s': %s",
11448                    self.op.node_name,
11449                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11450     else:
11451       self.LogInfo("No instances to evacuate from node '%s'",
11452                    self.op.node_name)
11453
11454     if self.op.remote_node is not None:
11455       for i in self.instances:
11456         if i.primary_node == self.op.remote_node:
11457           raise errors.OpPrereqError("Node %s is the primary node of"
11458                                      " instance %s, cannot use it as"
11459                                      " secondary" %
11460                                      (self.op.remote_node, i.name),
11461                                      errors.ECODE_INVAL)
11462
11463   def Exec(self, feedback_fn):
11464     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11465
11466     if not self.instance_names:
11467       # No instances to evacuate
11468       jobs = []
11469
11470     elif self.op.iallocator is not None:
11471       # TODO: Implement relocation to other group
11472       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11473                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11474                        instances=list(self.instance_names))
11475
11476       ial.Run(self.op.iallocator)
11477
11478       if not ial.success:
11479         raise errors.OpPrereqError("Can't compute node evacuation using"
11480                                    " iallocator '%s': %s" %
11481                                    (self.op.iallocator, ial.info),
11482                                    errors.ECODE_NORES)
11483
11484       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11485
11486     elif self.op.remote_node is not None:
11487       assert self.op.mode == constants.NODE_EVAC_SEC
11488       jobs = [
11489         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11490                                         remote_node=self.op.remote_node,
11491                                         disks=[],
11492                                         mode=constants.REPLACE_DISK_CHG,
11493                                         early_release=self.op.early_release)]
11494         for instance_name in self.instance_names
11495         ]
11496
11497     else:
11498       raise errors.ProgrammerError("No iallocator or remote node")
11499
11500     return ResultWithJobs(jobs)
11501
11502
11503 def _SetOpEarlyRelease(early_release, op):
11504   """Sets C{early_release} flag on opcodes if available.
11505
11506   """
11507   try:
11508     op.early_release = early_release
11509   except AttributeError:
11510     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11511
11512   return op
11513
11514
11515 def _NodeEvacDest(use_nodes, group, nodes):
11516   """Returns group or nodes depending on caller's choice.
11517
11518   """
11519   if use_nodes:
11520     return utils.CommaJoin(nodes)
11521   else:
11522     return group
11523
11524
11525 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11526   """Unpacks the result of change-group and node-evacuate iallocator requests.
11527
11528   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11529   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11530
11531   @type lu: L{LogicalUnit}
11532   @param lu: Logical unit instance
11533   @type alloc_result: tuple/list
11534   @param alloc_result: Result from iallocator
11535   @type early_release: bool
11536   @param early_release: Whether to release locks early if possible
11537   @type use_nodes: bool
11538   @param use_nodes: Whether to display node names instead of groups
11539
11540   """
11541   (moved, failed, jobs) = alloc_result
11542
11543   if failed:
11544     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11545                                  for (name, reason) in failed)
11546     lu.LogWarning("Unable to evacuate instances %s", failreason)
11547     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11548
11549   if moved:
11550     lu.LogInfo("Instances to be moved: %s",
11551                utils.CommaJoin("%s (to %s)" %
11552                                (name, _NodeEvacDest(use_nodes, group, nodes))
11553                                for (name, group, nodes) in moved))
11554
11555   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11556               map(opcodes.OpCode.LoadOpCode, ops))
11557           for ops in jobs]
11558
11559
11560 class LUInstanceGrowDisk(LogicalUnit):
11561   """Grow a disk of an instance.
11562
11563   """
11564   HPATH = "disk-grow"
11565   HTYPE = constants.HTYPE_INSTANCE
11566   REQ_BGL = False
11567
11568   def ExpandNames(self):
11569     self._ExpandAndLockInstance()
11570     self.needed_locks[locking.LEVEL_NODE] = []
11571     self.needed_locks[locking.LEVEL_NODE_RES] = []
11572     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11573     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11574
11575   def DeclareLocks(self, level):
11576     if level == locking.LEVEL_NODE:
11577       self._LockInstancesNodes()
11578     elif level == locking.LEVEL_NODE_RES:
11579       # Copy node locks
11580       self.needed_locks[locking.LEVEL_NODE_RES] = \
11581         self.needed_locks[locking.LEVEL_NODE][:]
11582
11583   def BuildHooksEnv(self):
11584     """Build hooks env.
11585
11586     This runs on the master, the primary and all the secondaries.
11587
11588     """
11589     env = {
11590       "DISK": self.op.disk,
11591       "AMOUNT": self.op.amount,
11592       "ABSOLUTE": self.op.absolute,
11593       }
11594     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11595     return env
11596
11597   def BuildHooksNodes(self):
11598     """Build hooks nodes.
11599
11600     """
11601     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11602     return (nl, nl)
11603
11604   def CheckPrereq(self):
11605     """Check prerequisites.
11606
11607     This checks that the instance is in the cluster.
11608
11609     """
11610     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11611     assert instance is not None, \
11612       "Cannot retrieve locked instance %s" % self.op.instance_name
11613     nodenames = list(instance.all_nodes)
11614     for node in nodenames:
11615       _CheckNodeOnline(self, node)
11616
11617     self.instance = instance
11618
11619     if instance.disk_template not in constants.DTS_GROWABLE:
11620       raise errors.OpPrereqError("Instance's disk layout does not support"
11621                                  " growing", errors.ECODE_INVAL)
11622
11623     self.disk = instance.FindDisk(self.op.disk)
11624
11625     if self.op.absolute:
11626       self.target = self.op.amount
11627       self.delta = self.target - self.disk.size
11628       if self.delta < 0:
11629         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11630                                    "current disk size (%s)" %
11631                                    (utils.FormatUnit(self.target, "h"),
11632                                     utils.FormatUnit(self.disk.size, "h")),
11633                                    errors.ECODE_STATE)
11634     else:
11635       self.delta = self.op.amount
11636       self.target = self.disk.size + self.delta
11637       if self.delta < 0:
11638         raise errors.OpPrereqError("Requested increment (%s) is negative" %
11639                                    utils.FormatUnit(self.delta, "h"),
11640                                    errors.ECODE_INVAL)
11641
11642     if instance.disk_template not in (constants.DT_FILE,
11643                                       constants.DT_SHARED_FILE,
11644                                       constants.DT_RBD):
11645       # TODO: check the free disk space for file, when that feature will be
11646       # supported
11647       _CheckNodesFreeDiskPerVG(self, nodenames,
11648                                self.disk.ComputeGrowth(self.delta))
11649
11650   def Exec(self, feedback_fn):
11651     """Execute disk grow.
11652
11653     """
11654     instance = self.instance
11655     disk = self.disk
11656
11657     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11658     assert (self.owned_locks(locking.LEVEL_NODE) ==
11659             self.owned_locks(locking.LEVEL_NODE_RES))
11660
11661     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11662     if not disks_ok:
11663       raise errors.OpExecError("Cannot activate block device to grow")
11664
11665     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11666                 (self.op.disk, instance.name,
11667                  utils.FormatUnit(self.delta, "h"),
11668                  utils.FormatUnit(self.target, "h")))
11669
11670     # First run all grow ops in dry-run mode
11671     for node in instance.all_nodes:
11672       self.cfg.SetDiskID(disk, node)
11673       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11674                                            True)
11675       result.Raise("Grow request failed to node %s" % node)
11676
11677     # We know that (as far as we can test) operations across different
11678     # nodes will succeed, time to run it for real
11679     for node in instance.all_nodes:
11680       self.cfg.SetDiskID(disk, node)
11681       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11682                                            False)
11683       result.Raise("Grow request failed to node %s" % node)
11684
11685       # TODO: Rewrite code to work properly
11686       # DRBD goes into sync mode for a short amount of time after executing the
11687       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11688       # calling "resize" in sync mode fails. Sleeping for a short amount of
11689       # time is a work-around.
11690       time.sleep(5)
11691
11692     disk.RecordGrow(self.delta)
11693     self.cfg.Update(instance, feedback_fn)
11694
11695     # Changes have been recorded, release node lock
11696     _ReleaseLocks(self, locking.LEVEL_NODE)
11697
11698     # Downgrade lock while waiting for sync
11699     self.glm.downgrade(locking.LEVEL_INSTANCE)
11700
11701     if self.op.wait_for_sync:
11702       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11703       if disk_abort:
11704         self.proc.LogWarning("Disk sync-ing has not returned a good"
11705                              " status; please check the instance")
11706       if instance.admin_state != constants.ADMINST_UP:
11707         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11708     elif instance.admin_state != constants.ADMINST_UP:
11709       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11710                            " not supposed to be running because no wait for"
11711                            " sync mode was requested")
11712
11713     assert self.owned_locks(locking.LEVEL_NODE_RES)
11714     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11715
11716
11717 class LUInstanceQueryData(NoHooksLU):
11718   """Query runtime instance data.
11719
11720   """
11721   REQ_BGL = False
11722
11723   def ExpandNames(self):
11724     self.needed_locks = {}
11725
11726     # Use locking if requested or when non-static information is wanted
11727     if not (self.op.static or self.op.use_locking):
11728       self.LogWarning("Non-static data requested, locks need to be acquired")
11729       self.op.use_locking = True
11730
11731     if self.op.instances or not self.op.use_locking:
11732       # Expand instance names right here
11733       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11734     else:
11735       # Will use acquired locks
11736       self.wanted_names = None
11737
11738     if self.op.use_locking:
11739       self.share_locks = _ShareAll()
11740
11741       if self.wanted_names is None:
11742         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11743       else:
11744         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11745
11746       self.needed_locks[locking.LEVEL_NODEGROUP] = []
11747       self.needed_locks[locking.LEVEL_NODE] = []
11748       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11749
11750   def DeclareLocks(self, level):
11751     if self.op.use_locking:
11752       if level == locking.LEVEL_NODEGROUP:
11753         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11754
11755         # Lock all groups used by instances optimistically; this requires going
11756         # via the node before it's locked, requiring verification later on
11757         self.needed_locks[locking.LEVEL_NODEGROUP] = \
11758           frozenset(group_uuid
11759                     for instance_name in owned_instances
11760                     for group_uuid in
11761                       self.cfg.GetInstanceNodeGroups(instance_name))
11762
11763       elif level == locking.LEVEL_NODE:
11764         self._LockInstancesNodes()
11765
11766   def CheckPrereq(self):
11767     """Check prerequisites.
11768
11769     This only checks the optional instance list against the existing names.
11770
11771     """
11772     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11773     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11774     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11775
11776     if self.wanted_names is None:
11777       assert self.op.use_locking, "Locking was not used"
11778       self.wanted_names = owned_instances
11779
11780     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11781
11782     if self.op.use_locking:
11783       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11784                                 None)
11785     else:
11786       assert not (owned_instances or owned_groups or owned_nodes)
11787
11788     self.wanted_instances = instances.values()
11789
11790   def _ComputeBlockdevStatus(self, node, instance, dev):
11791     """Returns the status of a block device
11792
11793     """
11794     if self.op.static or not node:
11795       return None
11796
11797     self.cfg.SetDiskID(dev, node)
11798
11799     result = self.rpc.call_blockdev_find(node, dev)
11800     if result.offline:
11801       return None
11802
11803     result.Raise("Can't compute disk status for %s" % instance.name)
11804
11805     status = result.payload
11806     if status is None:
11807       return None
11808
11809     return (status.dev_path, status.major, status.minor,
11810             status.sync_percent, status.estimated_time,
11811             status.is_degraded, status.ldisk_status)
11812
11813   def _ComputeDiskStatus(self, instance, snode, dev):
11814     """Compute block device status.
11815
11816     """
11817     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11818
11819     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11820
11821   def _ComputeDiskStatusInner(self, instance, snode, dev):
11822     """Compute block device status.
11823
11824     @attention: The device has to be annotated already.
11825
11826     """
11827     if dev.dev_type in constants.LDS_DRBD:
11828       # we change the snode then (otherwise we use the one passed in)
11829       if dev.logical_id[0] == instance.primary_node:
11830         snode = dev.logical_id[1]
11831       else:
11832         snode = dev.logical_id[0]
11833
11834     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11835                                               instance, dev)
11836     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11837
11838     if dev.children:
11839       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11840                                         instance, snode),
11841                          dev.children)
11842     else:
11843       dev_children = []
11844
11845     return {
11846       "iv_name": dev.iv_name,
11847       "dev_type": dev.dev_type,
11848       "logical_id": dev.logical_id,
11849       "physical_id": dev.physical_id,
11850       "pstatus": dev_pstatus,
11851       "sstatus": dev_sstatus,
11852       "children": dev_children,
11853       "mode": dev.mode,
11854       "size": dev.size,
11855       }
11856
11857   def Exec(self, feedback_fn):
11858     """Gather and return data"""
11859     result = {}
11860
11861     cluster = self.cfg.GetClusterInfo()
11862
11863     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11864     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11865
11866     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11867                                                  for node in nodes.values()))
11868
11869     group2name_fn = lambda uuid: groups[uuid].name
11870
11871     for instance in self.wanted_instances:
11872       pnode = nodes[instance.primary_node]
11873
11874       if self.op.static or pnode.offline:
11875         remote_state = None
11876         if pnode.offline:
11877           self.LogWarning("Primary node %s is marked offline, returning static"
11878                           " information only for instance %s" %
11879                           (pnode.name, instance.name))
11880       else:
11881         remote_info = self.rpc.call_instance_info(instance.primary_node,
11882                                                   instance.name,
11883                                                   instance.hypervisor)
11884         remote_info.Raise("Error checking node %s" % instance.primary_node)
11885         remote_info = remote_info.payload
11886         if remote_info and "state" in remote_info:
11887           remote_state = "up"
11888         else:
11889           if instance.admin_state == constants.ADMINST_UP:
11890             remote_state = "down"
11891           else:
11892             remote_state = instance.admin_state
11893
11894       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11895                   instance.disks)
11896
11897       snodes_group_uuids = [nodes[snode_name].group
11898                             for snode_name in instance.secondary_nodes]
11899
11900       result[instance.name] = {
11901         "name": instance.name,
11902         "config_state": instance.admin_state,
11903         "run_state": remote_state,
11904         "pnode": instance.primary_node,
11905         "pnode_group_uuid": pnode.group,
11906         "pnode_group_name": group2name_fn(pnode.group),
11907         "snodes": instance.secondary_nodes,
11908         "snodes_group_uuids": snodes_group_uuids,
11909         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11910         "os": instance.os,
11911         # this happens to be the same format used for hooks
11912         "nics": _NICListToTuple(self, instance.nics),
11913         "disk_template": instance.disk_template,
11914         "disks": disks,
11915         "hypervisor": instance.hypervisor,
11916         "network_port": instance.network_port,
11917         "hv_instance": instance.hvparams,
11918         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11919         "be_instance": instance.beparams,
11920         "be_actual": cluster.FillBE(instance),
11921         "os_instance": instance.osparams,
11922         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11923         "serial_no": instance.serial_no,
11924         "mtime": instance.mtime,
11925         "ctime": instance.ctime,
11926         "uuid": instance.uuid,
11927         }
11928
11929     return result
11930
11931
11932 def PrepareContainerMods(mods, private_fn):
11933   """Prepares a list of container modifications by adding a private data field.
11934
11935   @type mods: list of tuples; (operation, index, parameters)
11936   @param mods: List of modifications
11937   @type private_fn: callable or None
11938   @param private_fn: Callable for constructing a private data field for a
11939     modification
11940   @rtype: list
11941
11942   """
11943   if private_fn is None:
11944     fn = lambda: None
11945   else:
11946     fn = private_fn
11947
11948   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11949
11950
11951 #: Type description for changes as returned by L{ApplyContainerMods}'s
11952 #: callbacks
11953 _TApplyContModsCbChanges = \
11954   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11955     ht.TNonEmptyString,
11956     ht.TAny,
11957     ])))
11958
11959
11960 def ApplyContainerMods(kind, container, chgdesc, mods,
11961                        create_fn, modify_fn, remove_fn):
11962   """Applies descriptions in C{mods} to C{container}.
11963
11964   @type kind: string
11965   @param kind: One-word item description
11966   @type container: list
11967   @param container: Container to modify
11968   @type chgdesc: None or list
11969   @param chgdesc: List of applied changes
11970   @type mods: list
11971   @param mods: Modifications as returned by L{PrepareContainerMods}
11972   @type create_fn: callable
11973   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11974     receives absolute item index, parameters and private data object as added
11975     by L{PrepareContainerMods}, returns tuple containing new item and changes
11976     as list
11977   @type modify_fn: callable
11978   @param modify_fn: Callback for modifying an existing item
11979     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11980     and private data object as added by L{PrepareContainerMods}, returns
11981     changes as list
11982   @type remove_fn: callable
11983   @param remove_fn: Callback on removing item; receives absolute item index,
11984     item and private data object as added by L{PrepareContainerMods}
11985
11986   """
11987   for (op, idx, params, private) in mods:
11988     if idx == -1:
11989       # Append
11990       absidx = len(container) - 1
11991     elif idx < 0:
11992       raise IndexError("Not accepting negative indices other than -1")
11993     elif idx > len(container):
11994       raise IndexError("Got %s index %s, but there are only %s" %
11995                        (kind, idx, len(container)))
11996     else:
11997       absidx = idx
11998
11999     changes = None
12000
12001     if op == constants.DDM_ADD:
12002       # Calculate where item will be added
12003       if idx == -1:
12004         addidx = len(container)
12005       else:
12006         addidx = idx
12007
12008       if create_fn is None:
12009         item = params
12010       else:
12011         (item, changes) = create_fn(addidx, params, private)
12012
12013       if idx == -1:
12014         container.append(item)
12015       else:
12016         assert idx >= 0
12017         assert idx <= len(container)
12018         # list.insert does so before the specified index
12019         container.insert(idx, item)
12020     else:
12021       # Retrieve existing item
12022       try:
12023         item = container[absidx]
12024       except IndexError:
12025         raise IndexError("Invalid %s index %s" % (kind, idx))
12026
12027       if op == constants.DDM_REMOVE:
12028         assert not params
12029
12030         if remove_fn is not None:
12031           remove_fn(absidx, item, private)
12032
12033         changes = [("%s/%s" % (kind, absidx), "remove")]
12034
12035         assert container[absidx] == item
12036         del container[absidx]
12037       elif op == constants.DDM_MODIFY:
12038         if modify_fn is not None:
12039           changes = modify_fn(absidx, item, params, private)
12040       else:
12041         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12042
12043     assert _TApplyContModsCbChanges(changes)
12044
12045     if not (chgdesc is None or changes is None):
12046       chgdesc.extend(changes)
12047
12048
12049 def _UpdateIvNames(base_index, disks):
12050   """Updates the C{iv_name} attribute of disks.
12051
12052   @type disks: list of L{objects.Disk}
12053
12054   """
12055   for (idx, disk) in enumerate(disks):
12056     disk.iv_name = "disk/%s" % (base_index + idx, )
12057
12058
12059 class _InstNicModPrivate:
12060   """Data structure for network interface modifications.
12061
12062   Used by L{LUInstanceSetParams}.
12063
12064   """
12065   def __init__(self):
12066     self.params = None
12067     self.filled = None
12068
12069
12070 class LUInstanceSetParams(LogicalUnit):
12071   """Modifies an instances's parameters.
12072
12073   """
12074   HPATH = "instance-modify"
12075   HTYPE = constants.HTYPE_INSTANCE
12076   REQ_BGL = False
12077
12078   @staticmethod
12079   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12080     assert ht.TList(mods)
12081     assert not mods or len(mods[0]) in (2, 3)
12082
12083     if mods and len(mods[0]) == 2:
12084       result = []
12085
12086       addremove = 0
12087       for op, params in mods:
12088         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12089           result.append((op, -1, params))
12090           addremove += 1
12091
12092           if addremove > 1:
12093             raise errors.OpPrereqError("Only one %s add or remove operation is"
12094                                        " supported at a time" % kind,
12095                                        errors.ECODE_INVAL)
12096         else:
12097           result.append((constants.DDM_MODIFY, op, params))
12098
12099       assert verify_fn(result)
12100     else:
12101       result = mods
12102
12103     return result
12104
12105   @staticmethod
12106   def _CheckMods(kind, mods, key_types, item_fn):
12107     """Ensures requested disk/NIC modifications are valid.
12108
12109     """
12110     for (op, _, params) in mods:
12111       assert ht.TDict(params)
12112
12113       utils.ForceDictType(params, key_types)
12114
12115       if op == constants.DDM_REMOVE:
12116         if params:
12117           raise errors.OpPrereqError("No settings should be passed when"
12118                                      " removing a %s" % kind,
12119                                      errors.ECODE_INVAL)
12120       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12121         item_fn(op, params)
12122       else:
12123         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12124
12125   @staticmethod
12126   def _VerifyDiskModification(op, params):
12127     """Verifies a disk modification.
12128
12129     """
12130     if op == constants.DDM_ADD:
12131       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12132       if mode not in constants.DISK_ACCESS_SET:
12133         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12134                                    errors.ECODE_INVAL)
12135
12136       size = params.get(constants.IDISK_SIZE, None)
12137       if size is None:
12138         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12139                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12140
12141       try:
12142         size = int(size)
12143       except (TypeError, ValueError), err:
12144         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12145                                    errors.ECODE_INVAL)
12146
12147       params[constants.IDISK_SIZE] = size
12148
12149     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12150       raise errors.OpPrereqError("Disk size change not possible, use"
12151                                  " grow-disk", errors.ECODE_INVAL)
12152
12153   @staticmethod
12154   def _VerifyNicModification(op, params):
12155     """Verifies a network interface modification.
12156
12157     """
12158     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12159       ip = params.get(constants.INIC_IP, None)
12160       if ip is None:
12161         pass
12162       elif ip.lower() == constants.VALUE_NONE:
12163         params[constants.INIC_IP] = None
12164       elif not netutils.IPAddress.IsValid(ip):
12165         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12166                                    errors.ECODE_INVAL)
12167
12168       bridge = params.get("bridge", None)
12169       link = params.get(constants.INIC_LINK, None)
12170       if bridge and link:
12171         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12172                                    " at the same time", errors.ECODE_INVAL)
12173       elif bridge and bridge.lower() == constants.VALUE_NONE:
12174         params["bridge"] = None
12175       elif link and link.lower() == constants.VALUE_NONE:
12176         params[constants.INIC_LINK] = None
12177
12178       if op == constants.DDM_ADD:
12179         macaddr = params.get(constants.INIC_MAC, None)
12180         if macaddr is None:
12181           params[constants.INIC_MAC] = constants.VALUE_AUTO
12182
12183       if constants.INIC_MAC in params:
12184         macaddr = params[constants.INIC_MAC]
12185         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12186           macaddr = utils.NormalizeAndValidateMac(macaddr)
12187
12188         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12189           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12190                                      " modifying an existing NIC",
12191                                      errors.ECODE_INVAL)
12192
12193   def CheckArguments(self):
12194     if not (self.op.nics or self.op.disks or self.op.disk_template or
12195             self.op.hvparams or self.op.beparams or self.op.os_name or
12196             self.op.offline is not None or self.op.runtime_mem):
12197       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12198
12199     if self.op.hvparams:
12200       _CheckGlobalHvParams(self.op.hvparams)
12201
12202     self.op.disks = \
12203       self._UpgradeDiskNicMods("disk", self.op.disks,
12204         opcodes.OpInstanceSetParams.TestDiskModifications)
12205     self.op.nics = \
12206       self._UpgradeDiskNicMods("NIC", self.op.nics,
12207         opcodes.OpInstanceSetParams.TestNicModifications)
12208
12209     # Check disk modifications
12210     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12211                     self._VerifyDiskModification)
12212
12213     if self.op.disks and self.op.disk_template is not None:
12214       raise errors.OpPrereqError("Disk template conversion and other disk"
12215                                  " changes not supported at the same time",
12216                                  errors.ECODE_INVAL)
12217
12218     if (self.op.disk_template and
12219         self.op.disk_template in constants.DTS_INT_MIRROR and
12220         self.op.remote_node is None):
12221       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12222                                  " one requires specifying a secondary node",
12223                                  errors.ECODE_INVAL)
12224
12225     # Check NIC modifications
12226     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12227                     self._VerifyNicModification)
12228
12229   def ExpandNames(self):
12230     self._ExpandAndLockInstance()
12231     # Can't even acquire node locks in shared mode as upcoming changes in
12232     # Ganeti 2.6 will start to modify the node object on disk conversion
12233     self.needed_locks[locking.LEVEL_NODE] = []
12234     self.needed_locks[locking.LEVEL_NODE_RES] = []
12235     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12236
12237   def DeclareLocks(self, level):
12238     # TODO: Acquire group lock in shared mode (disk parameters)
12239     if level == locking.LEVEL_NODE:
12240       self._LockInstancesNodes()
12241       if self.op.disk_template and self.op.remote_node:
12242         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12243         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12244     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12245       # Copy node locks
12246       self.needed_locks[locking.LEVEL_NODE_RES] = \
12247         self.needed_locks[locking.LEVEL_NODE][:]
12248
12249   def BuildHooksEnv(self):
12250     """Build hooks env.
12251
12252     This runs on the master, primary and secondaries.
12253
12254     """
12255     args = dict()
12256     if constants.BE_MINMEM in self.be_new:
12257       args["minmem"] = self.be_new[constants.BE_MINMEM]
12258     if constants.BE_MAXMEM in self.be_new:
12259       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12260     if constants.BE_VCPUS in self.be_new:
12261       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12262     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12263     # information at all.
12264
12265     if self._new_nics is not None:
12266       nics = []
12267
12268       for nic in self._new_nics:
12269         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12270         mode = nicparams[constants.NIC_MODE]
12271         link = nicparams[constants.NIC_LINK]
12272         nics.append((nic.ip, nic.mac, mode, link))
12273
12274       args["nics"] = nics
12275
12276     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12277     if self.op.disk_template:
12278       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12279     if self.op.runtime_mem:
12280       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12281
12282     return env
12283
12284   def BuildHooksNodes(self):
12285     """Build hooks nodes.
12286
12287     """
12288     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12289     return (nl, nl)
12290
12291   def _PrepareNicModification(self, params, private, old_ip, old_params,
12292                               cluster, pnode):
12293     update_params_dict = dict([(key, params[key])
12294                                for key in constants.NICS_PARAMETERS
12295                                if key in params])
12296
12297     if "bridge" in params:
12298       update_params_dict[constants.NIC_LINK] = params["bridge"]
12299
12300     new_params = _GetUpdatedParams(old_params, update_params_dict)
12301     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12302
12303     new_filled_params = cluster.SimpleFillNIC(new_params)
12304     objects.NIC.CheckParameterSyntax(new_filled_params)
12305
12306     new_mode = new_filled_params[constants.NIC_MODE]
12307     if new_mode == constants.NIC_MODE_BRIDGED:
12308       bridge = new_filled_params[constants.NIC_LINK]
12309       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12310       if msg:
12311         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12312         if self.op.force:
12313           self.warn.append(msg)
12314         else:
12315           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12316
12317     elif new_mode == constants.NIC_MODE_ROUTED:
12318       ip = params.get(constants.INIC_IP, old_ip)
12319       if ip is None:
12320         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12321                                    " on a routed NIC", errors.ECODE_INVAL)
12322
12323     if constants.INIC_MAC in params:
12324       mac = params[constants.INIC_MAC]
12325       if mac is None:
12326         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12327                                    errors.ECODE_INVAL)
12328       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12329         # otherwise generate the MAC address
12330         params[constants.INIC_MAC] = \
12331           self.cfg.GenerateMAC(self.proc.GetECId())
12332       else:
12333         # or validate/reserve the current one
12334         try:
12335           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12336         except errors.ReservationError:
12337           raise errors.OpPrereqError("MAC address '%s' already in use"
12338                                      " in cluster" % mac,
12339                                      errors.ECODE_NOTUNIQUE)
12340
12341     private.params = new_params
12342     private.filled = new_filled_params
12343
12344   def CheckPrereq(self):
12345     """Check prerequisites.
12346
12347     This only checks the instance list against the existing names.
12348
12349     """
12350     # checking the new params on the primary/secondary nodes
12351
12352     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12353     cluster = self.cluster = self.cfg.GetClusterInfo()
12354     assert self.instance is not None, \
12355       "Cannot retrieve locked instance %s" % self.op.instance_name
12356     pnode = instance.primary_node
12357     nodelist = list(instance.all_nodes)
12358     pnode_info = self.cfg.GetNodeInfo(pnode)
12359     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12360
12361     # Prepare disk/NIC modifications
12362     self.diskmod = PrepareContainerMods(self.op.disks, None)
12363     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12364
12365     # OS change
12366     if self.op.os_name and not self.op.force:
12367       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12368                       self.op.force_variant)
12369       instance_os = self.op.os_name
12370     else:
12371       instance_os = instance.os
12372
12373     assert not (self.op.disk_template and self.op.disks), \
12374       "Can't modify disk template and apply disk changes at the same time"
12375
12376     if self.op.disk_template:
12377       if instance.disk_template == self.op.disk_template:
12378         raise errors.OpPrereqError("Instance already has disk template %s" %
12379                                    instance.disk_template, errors.ECODE_INVAL)
12380
12381       if (instance.disk_template,
12382           self.op.disk_template) not in self._DISK_CONVERSIONS:
12383         raise errors.OpPrereqError("Unsupported disk template conversion from"
12384                                    " %s to %s" % (instance.disk_template,
12385                                                   self.op.disk_template),
12386                                    errors.ECODE_INVAL)
12387       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12388                           msg="cannot change disk template")
12389       if self.op.disk_template in constants.DTS_INT_MIRROR:
12390         if self.op.remote_node == pnode:
12391           raise errors.OpPrereqError("Given new secondary node %s is the same"
12392                                      " as the primary node of the instance" %
12393                                      self.op.remote_node, errors.ECODE_STATE)
12394         _CheckNodeOnline(self, self.op.remote_node)
12395         _CheckNodeNotDrained(self, self.op.remote_node)
12396         # FIXME: here we assume that the old instance type is DT_PLAIN
12397         assert instance.disk_template == constants.DT_PLAIN
12398         disks = [{constants.IDISK_SIZE: d.size,
12399                   constants.IDISK_VG: d.logical_id[0]}
12400                  for d in instance.disks]
12401         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12402         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12403
12404         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12405         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12406         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12407         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12408                                 ignore=self.op.ignore_ipolicy)
12409         if pnode_info.group != snode_info.group:
12410           self.LogWarning("The primary and secondary nodes are in two"
12411                           " different node groups; the disk parameters"
12412                           " from the first disk's node group will be"
12413                           " used")
12414
12415     # hvparams processing
12416     if self.op.hvparams:
12417       hv_type = instance.hypervisor
12418       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12419       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12420       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12421
12422       # local check
12423       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12424       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12425       self.hv_proposed = self.hv_new = hv_new # the new actual values
12426       self.hv_inst = i_hvdict # the new dict (without defaults)
12427     else:
12428       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12429                                               instance.hvparams)
12430       self.hv_new = self.hv_inst = {}
12431
12432     # beparams processing
12433     if self.op.beparams:
12434       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12435                                    use_none=True)
12436       objects.UpgradeBeParams(i_bedict)
12437       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12438       be_new = cluster.SimpleFillBE(i_bedict)
12439       self.be_proposed = self.be_new = be_new # the new actual values
12440       self.be_inst = i_bedict # the new dict (without defaults)
12441     else:
12442       self.be_new = self.be_inst = {}
12443       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12444     be_old = cluster.FillBE(instance)
12445
12446     # CPU param validation -- checking every time a parameter is
12447     # changed to cover all cases where either CPU mask or vcpus have
12448     # changed
12449     if (constants.BE_VCPUS in self.be_proposed and
12450         constants.HV_CPU_MASK in self.hv_proposed):
12451       cpu_list = \
12452         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12453       # Verify mask is consistent with number of vCPUs. Can skip this
12454       # test if only 1 entry in the CPU mask, which means same mask
12455       # is applied to all vCPUs.
12456       if (len(cpu_list) > 1 and
12457           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12458         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12459                                    " CPU mask [%s]" %
12460                                    (self.be_proposed[constants.BE_VCPUS],
12461                                     self.hv_proposed[constants.HV_CPU_MASK]),
12462                                    errors.ECODE_INVAL)
12463
12464       # Only perform this test if a new CPU mask is given
12465       if constants.HV_CPU_MASK in self.hv_new:
12466         # Calculate the largest CPU number requested
12467         max_requested_cpu = max(map(max, cpu_list))
12468         # Check that all of the instance's nodes have enough physical CPUs to
12469         # satisfy the requested CPU mask
12470         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12471                                 max_requested_cpu + 1, instance.hypervisor)
12472
12473     # osparams processing
12474     if self.op.osparams:
12475       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12476       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12477       self.os_inst = i_osdict # the new dict (without defaults)
12478     else:
12479       self.os_inst = {}
12480
12481     self.warn = []
12482
12483     #TODO(dynmem): do the appropriate check involving MINMEM
12484     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12485         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12486       mem_check_list = [pnode]
12487       if be_new[constants.BE_AUTO_BALANCE]:
12488         # either we changed auto_balance to yes or it was from before
12489         mem_check_list.extend(instance.secondary_nodes)
12490       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12491                                                   instance.hypervisor)
12492       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12493                                          [instance.hypervisor])
12494       pninfo = nodeinfo[pnode]
12495       msg = pninfo.fail_msg
12496       if msg:
12497         # Assume the primary node is unreachable and go ahead
12498         self.warn.append("Can't get info from primary node %s: %s" %
12499                          (pnode, msg))
12500       else:
12501         (_, _, (pnhvinfo, )) = pninfo.payload
12502         if not isinstance(pnhvinfo.get("memory_free", None), int):
12503           self.warn.append("Node data from primary node %s doesn't contain"
12504                            " free memory information" % pnode)
12505         elif instance_info.fail_msg:
12506           self.warn.append("Can't get instance runtime information: %s" %
12507                           instance_info.fail_msg)
12508         else:
12509           if instance_info.payload:
12510             current_mem = int(instance_info.payload["memory"])
12511           else:
12512             # Assume instance not running
12513             # (there is a slight race condition here, but it's not very
12514             # probable, and we have no other way to check)
12515             # TODO: Describe race condition
12516             current_mem = 0
12517           #TODO(dynmem): do the appropriate check involving MINMEM
12518           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12519                       pnhvinfo["memory_free"])
12520           if miss_mem > 0:
12521             raise errors.OpPrereqError("This change will prevent the instance"
12522                                        " from starting, due to %d MB of memory"
12523                                        " missing on its primary node" %
12524                                        miss_mem,
12525                                        errors.ECODE_NORES)
12526
12527       if be_new[constants.BE_AUTO_BALANCE]:
12528         for node, nres in nodeinfo.items():
12529           if node not in instance.secondary_nodes:
12530             continue
12531           nres.Raise("Can't get info from secondary node %s" % node,
12532                      prereq=True, ecode=errors.ECODE_STATE)
12533           (_, _, (nhvinfo, )) = nres.payload
12534           if not isinstance(nhvinfo.get("memory_free", None), int):
12535             raise errors.OpPrereqError("Secondary node %s didn't return free"
12536                                        " memory information" % node,
12537                                        errors.ECODE_STATE)
12538           #TODO(dynmem): do the appropriate check involving MINMEM
12539           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12540             raise errors.OpPrereqError("This change will prevent the instance"
12541                                        " from failover to its secondary node"
12542                                        " %s, due to not enough memory" % node,
12543                                        errors.ECODE_STATE)
12544
12545     if self.op.runtime_mem:
12546       remote_info = self.rpc.call_instance_info(instance.primary_node,
12547                                                 instance.name,
12548                                                 instance.hypervisor)
12549       remote_info.Raise("Error checking node %s" % instance.primary_node)
12550       if not remote_info.payload: # not running already
12551         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12552                                    errors.ECODE_STATE)
12553
12554       current_memory = remote_info.payload["memory"]
12555       if (not self.op.force and
12556            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12557             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12558         raise errors.OpPrereqError("Instance %s must have memory between %d"
12559                                    " and %d MB of memory unless --force is"
12560                                    " given" % (instance.name,
12561                                     self.be_proposed[constants.BE_MINMEM],
12562                                     self.be_proposed[constants.BE_MAXMEM]),
12563                                    errors.ECODE_INVAL)
12564
12565       if self.op.runtime_mem > current_memory:
12566         _CheckNodeFreeMemory(self, instance.primary_node,
12567                              "ballooning memory for instance %s" %
12568                              instance.name,
12569                              self.op.memory - current_memory,
12570                              instance.hypervisor)
12571
12572     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12573       raise errors.OpPrereqError("Disk operations not supported for"
12574                                  " diskless instances",
12575                                  errors.ECODE_INVAL)
12576
12577     def _PrepareNicCreate(_, params, private):
12578       self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12579       return (None, None)
12580
12581     def _PrepareNicMod(_, nic, params, private):
12582       self._PrepareNicModification(params, private, nic.ip,
12583                                    nic.nicparams, cluster, pnode)
12584       return None
12585
12586     # Verify NIC changes (operating on copy)
12587     nics = instance.nics[:]
12588     ApplyContainerMods("NIC", nics, None, self.nicmod,
12589                        _PrepareNicCreate, _PrepareNicMod, None)
12590     if len(nics) > constants.MAX_NICS:
12591       raise errors.OpPrereqError("Instance has too many network interfaces"
12592                                  " (%d), cannot add more" % constants.MAX_NICS,
12593                                  errors.ECODE_STATE)
12594
12595     # Verify disk changes (operating on a copy)
12596     disks = instance.disks[:]
12597     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12598     if len(disks) > constants.MAX_DISKS:
12599       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12600                                  " more" % constants.MAX_DISKS,
12601                                  errors.ECODE_STATE)
12602
12603     if self.op.offline is not None:
12604       if self.op.offline:
12605         msg = "can't change to offline"
12606       else:
12607         msg = "can't change to online"
12608       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12609
12610     # Pre-compute NIC changes (necessary to use result in hooks)
12611     self._nic_chgdesc = []
12612     if self.nicmod:
12613       # Operate on copies as this is still in prereq
12614       nics = [nic.Copy() for nic in instance.nics]
12615       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12616                          self._CreateNewNic, self._ApplyNicMods, None)
12617       self._new_nics = nics
12618     else:
12619       self._new_nics = None
12620
12621   def _ConvertPlainToDrbd(self, feedback_fn):
12622     """Converts an instance from plain to drbd.
12623
12624     """
12625     feedback_fn("Converting template to drbd")
12626     instance = self.instance
12627     pnode = instance.primary_node
12628     snode = self.op.remote_node
12629
12630     assert instance.disk_template == constants.DT_PLAIN
12631
12632     # create a fake disk info for _GenerateDiskTemplate
12633     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12634                   constants.IDISK_VG: d.logical_id[0]}
12635                  for d in instance.disks]
12636     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12637                                       instance.name, pnode, [snode],
12638                                       disk_info, None, None, 0, feedback_fn,
12639                                       self.diskparams)
12640     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12641                                         self.diskparams)
12642     info = _GetInstanceInfoText(instance)
12643     feedback_fn("Creating additional volumes...")
12644     # first, create the missing data and meta devices
12645     for disk in anno_disks:
12646       # unfortunately this is... not too nice
12647       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12648                             info, True)
12649       for child in disk.children:
12650         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12651     # at this stage, all new LVs have been created, we can rename the
12652     # old ones
12653     feedback_fn("Renaming original volumes...")
12654     rename_list = [(o, n.children[0].logical_id)
12655                    for (o, n) in zip(instance.disks, new_disks)]
12656     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12657     result.Raise("Failed to rename original LVs")
12658
12659     feedback_fn("Initializing DRBD devices...")
12660     # all child devices are in place, we can now create the DRBD devices
12661     for disk in anno_disks:
12662       for node in [pnode, snode]:
12663         f_create = node == pnode
12664         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12665
12666     # at this point, the instance has been modified
12667     instance.disk_template = constants.DT_DRBD8
12668     instance.disks = new_disks
12669     self.cfg.Update(instance, feedback_fn)
12670
12671     # Release node locks while waiting for sync
12672     _ReleaseLocks(self, locking.LEVEL_NODE)
12673
12674     # disks are created, waiting for sync
12675     disk_abort = not _WaitForSync(self, instance,
12676                                   oneshot=not self.op.wait_for_sync)
12677     if disk_abort:
12678       raise errors.OpExecError("There are some degraded disks for"
12679                                " this instance, please cleanup manually")
12680
12681     # Node resource locks will be released by caller
12682
12683   def _ConvertDrbdToPlain(self, feedback_fn):
12684     """Converts an instance from drbd to plain.
12685
12686     """
12687     instance = self.instance
12688
12689     assert len(instance.secondary_nodes) == 1
12690     assert instance.disk_template == constants.DT_DRBD8
12691
12692     pnode = instance.primary_node
12693     snode = instance.secondary_nodes[0]
12694     feedback_fn("Converting template to plain")
12695
12696     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12697     new_disks = [d.children[0] for d in instance.disks]
12698
12699     # copy over size and mode
12700     for parent, child in zip(old_disks, new_disks):
12701       child.size = parent.size
12702       child.mode = parent.mode
12703
12704     # this is a DRBD disk, return its port to the pool
12705     # NOTE: this must be done right before the call to cfg.Update!
12706     for disk in old_disks:
12707       tcp_port = disk.logical_id[2]
12708       self.cfg.AddTcpUdpPort(tcp_port)
12709
12710     # update instance structure
12711     instance.disks = new_disks
12712     instance.disk_template = constants.DT_PLAIN
12713     self.cfg.Update(instance, feedback_fn)
12714
12715     # Release locks in case removing disks takes a while
12716     _ReleaseLocks(self, locking.LEVEL_NODE)
12717
12718     feedback_fn("Removing volumes on the secondary node...")
12719     for disk in old_disks:
12720       self.cfg.SetDiskID(disk, snode)
12721       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12722       if msg:
12723         self.LogWarning("Could not remove block device %s on node %s,"
12724                         " continuing anyway: %s", disk.iv_name, snode, msg)
12725
12726     feedback_fn("Removing unneeded volumes on the primary node...")
12727     for idx, disk in enumerate(old_disks):
12728       meta = disk.children[1]
12729       self.cfg.SetDiskID(meta, pnode)
12730       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12731       if msg:
12732         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12733                         " continuing anyway: %s", idx, pnode, msg)
12734
12735   def _CreateNewDisk(self, idx, params, _):
12736     """Creates a new disk.
12737
12738     """
12739     instance = self.instance
12740
12741     # add a new disk
12742     if instance.disk_template in constants.DTS_FILEBASED:
12743       (file_driver, file_path) = instance.disks[0].logical_id
12744       file_path = os.path.dirname(file_path)
12745     else:
12746       file_driver = file_path = None
12747
12748     disk = \
12749       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12750                             instance.primary_node, instance.secondary_nodes,
12751                             [params], file_path, file_driver, idx,
12752                             self.Log, self.diskparams)[0]
12753
12754     info = _GetInstanceInfoText(instance)
12755
12756     logging.info("Creating volume %s for instance %s",
12757                  disk.iv_name, instance.name)
12758     # Note: this needs to be kept in sync with _CreateDisks
12759     #HARDCODE
12760     for node in instance.all_nodes:
12761       f_create = (node == instance.primary_node)
12762       try:
12763         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12764       except errors.OpExecError, err:
12765         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12766                         disk.iv_name, disk, node, err)
12767
12768     return (disk, [
12769       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12770       ])
12771
12772   @staticmethod
12773   def _ModifyDisk(idx, disk, params, _):
12774     """Modifies a disk.
12775
12776     """
12777     disk.mode = params[constants.IDISK_MODE]
12778
12779     return [
12780       ("disk.mode/%d" % idx, disk.mode),
12781       ]
12782
12783   def _RemoveDisk(self, idx, root, _):
12784     """Removes a disk.
12785
12786     """
12787     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12788     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12789       self.cfg.SetDiskID(disk, node)
12790       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12791       if msg:
12792         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12793                         " continuing anyway", idx, node, msg)
12794
12795     # if this is a DRBD disk, return its port to the pool
12796     if root.dev_type in constants.LDS_DRBD:
12797       self.cfg.AddTcpUdpPort(root.logical_id[2])
12798
12799   @staticmethod
12800   def _CreateNewNic(idx, params, private):
12801     """Creates data structure for a new network interface.
12802
12803     """
12804     mac = params[constants.INIC_MAC]
12805     ip = params.get(constants.INIC_IP, None)
12806     nicparams = private.params
12807
12808     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12809       ("nic.%d" % idx,
12810        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12811        (mac, ip, private.filled[constants.NIC_MODE],
12812        private.filled[constants.NIC_LINK])),
12813       ])
12814
12815   @staticmethod
12816   def _ApplyNicMods(idx, nic, params, private):
12817     """Modifies a network interface.
12818
12819     """
12820     changes = []
12821
12822     for key in [constants.INIC_MAC, constants.INIC_IP]:
12823       if key in params:
12824         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12825         setattr(nic, key, params[key])
12826
12827     if private.params:
12828       nic.nicparams = private.params
12829
12830       for (key, val) in params.items():
12831         changes.append(("nic.%s/%d" % (key, idx), val))
12832
12833     return changes
12834
12835   def Exec(self, feedback_fn):
12836     """Modifies an instance.
12837
12838     All parameters take effect only at the next restart of the instance.
12839
12840     """
12841     # Process here the warnings from CheckPrereq, as we don't have a
12842     # feedback_fn there.
12843     # TODO: Replace with self.LogWarning
12844     for warn in self.warn:
12845       feedback_fn("WARNING: %s" % warn)
12846
12847     assert ((self.op.disk_template is None) ^
12848             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12849       "Not owning any node resource locks"
12850
12851     result = []
12852     instance = self.instance
12853
12854     # runtime memory
12855     if self.op.runtime_mem:
12856       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12857                                                      instance,
12858                                                      self.op.runtime_mem)
12859       rpcres.Raise("Cannot modify instance runtime memory")
12860       result.append(("runtime_memory", self.op.runtime_mem))
12861
12862     # Apply disk changes
12863     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12864                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12865     _UpdateIvNames(0, instance.disks)
12866
12867     if self.op.disk_template:
12868       if __debug__:
12869         check_nodes = set(instance.all_nodes)
12870         if self.op.remote_node:
12871           check_nodes.add(self.op.remote_node)
12872         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12873           owned = self.owned_locks(level)
12874           assert not (check_nodes - owned), \
12875             ("Not owning the correct locks, owning %r, expected at least %r" %
12876              (owned, check_nodes))
12877
12878       r_shut = _ShutdownInstanceDisks(self, instance)
12879       if not r_shut:
12880         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12881                                  " proceed with disk template conversion")
12882       mode = (instance.disk_template, self.op.disk_template)
12883       try:
12884         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12885       except:
12886         self.cfg.ReleaseDRBDMinors(instance.name)
12887         raise
12888       result.append(("disk_template", self.op.disk_template))
12889
12890       assert instance.disk_template == self.op.disk_template, \
12891         ("Expected disk template '%s', found '%s'" %
12892          (self.op.disk_template, instance.disk_template))
12893
12894     # Release node and resource locks if there are any (they might already have
12895     # been released during disk conversion)
12896     _ReleaseLocks(self, locking.LEVEL_NODE)
12897     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12898
12899     # Apply NIC changes
12900     if self._new_nics is not None:
12901       instance.nics = self._new_nics
12902       result.extend(self._nic_chgdesc)
12903
12904     # hvparams changes
12905     if self.op.hvparams:
12906       instance.hvparams = self.hv_inst
12907       for key, val in self.op.hvparams.iteritems():
12908         result.append(("hv/%s" % key, val))
12909
12910     # beparams changes
12911     if self.op.beparams:
12912       instance.beparams = self.be_inst
12913       for key, val in self.op.beparams.iteritems():
12914         result.append(("be/%s" % key, val))
12915
12916     # OS change
12917     if self.op.os_name:
12918       instance.os = self.op.os_name
12919
12920     # osparams changes
12921     if self.op.osparams:
12922       instance.osparams = self.os_inst
12923       for key, val in self.op.osparams.iteritems():
12924         result.append(("os/%s" % key, val))
12925
12926     if self.op.offline is None:
12927       # Ignore
12928       pass
12929     elif self.op.offline:
12930       # Mark instance as offline
12931       self.cfg.MarkInstanceOffline(instance.name)
12932       result.append(("admin_state", constants.ADMINST_OFFLINE))
12933     else:
12934       # Mark instance as online, but stopped
12935       self.cfg.MarkInstanceDown(instance.name)
12936       result.append(("admin_state", constants.ADMINST_DOWN))
12937
12938     self.cfg.Update(instance, feedback_fn)
12939
12940     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12941                 self.owned_locks(locking.LEVEL_NODE)), \
12942       "All node locks should have been released by now"
12943
12944     return result
12945
12946   _DISK_CONVERSIONS = {
12947     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12948     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12949     }
12950
12951
12952 class LUInstanceChangeGroup(LogicalUnit):
12953   HPATH = "instance-change-group"
12954   HTYPE = constants.HTYPE_INSTANCE
12955   REQ_BGL = False
12956
12957   def ExpandNames(self):
12958     self.share_locks = _ShareAll()
12959     self.needed_locks = {
12960       locking.LEVEL_NODEGROUP: [],
12961       locking.LEVEL_NODE: [],
12962       }
12963
12964     self._ExpandAndLockInstance()
12965
12966     if self.op.target_groups:
12967       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12968                                   self.op.target_groups)
12969     else:
12970       self.req_target_uuids = None
12971
12972     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12973
12974   def DeclareLocks(self, level):
12975     if level == locking.LEVEL_NODEGROUP:
12976       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12977
12978       if self.req_target_uuids:
12979         lock_groups = set(self.req_target_uuids)
12980
12981         # Lock all groups used by instance optimistically; this requires going
12982         # via the node before it's locked, requiring verification later on
12983         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12984         lock_groups.update(instance_groups)
12985       else:
12986         # No target groups, need to lock all of them
12987         lock_groups = locking.ALL_SET
12988
12989       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12990
12991     elif level == locking.LEVEL_NODE:
12992       if self.req_target_uuids:
12993         # Lock all nodes used by instances
12994         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12995         self._LockInstancesNodes()
12996
12997         # Lock all nodes in all potential target groups
12998         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12999                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13000         member_nodes = [node_name
13001                         for group in lock_groups
13002                         for node_name in self.cfg.GetNodeGroup(group).members]
13003         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13004       else:
13005         # Lock all nodes as all groups are potential targets
13006         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13007
13008   def CheckPrereq(self):
13009     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13010     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13011     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13012
13013     assert (self.req_target_uuids is None or
13014             owned_groups.issuperset(self.req_target_uuids))
13015     assert owned_instances == set([self.op.instance_name])
13016
13017     # Get instance information
13018     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13019
13020     # Check if node groups for locked instance are still correct
13021     assert owned_nodes.issuperset(self.instance.all_nodes), \
13022       ("Instance %s's nodes changed while we kept the lock" %
13023        self.op.instance_name)
13024
13025     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13026                                            owned_groups)
13027
13028     if self.req_target_uuids:
13029       # User requested specific target groups
13030       self.target_uuids = frozenset(self.req_target_uuids)
13031     else:
13032       # All groups except those used by the instance are potential targets
13033       self.target_uuids = owned_groups - inst_groups
13034
13035     conflicting_groups = self.target_uuids & inst_groups
13036     if conflicting_groups:
13037       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13038                                  " used by the instance '%s'" %
13039                                  (utils.CommaJoin(conflicting_groups),
13040                                   self.op.instance_name),
13041                                  errors.ECODE_INVAL)
13042
13043     if not self.target_uuids:
13044       raise errors.OpPrereqError("There are no possible target groups",
13045                                  errors.ECODE_INVAL)
13046
13047   def BuildHooksEnv(self):
13048     """Build hooks env.
13049
13050     """
13051     assert self.target_uuids
13052
13053     env = {
13054       "TARGET_GROUPS": " ".join(self.target_uuids),
13055       }
13056
13057     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13058
13059     return env
13060
13061   def BuildHooksNodes(self):
13062     """Build hooks nodes.
13063
13064     """
13065     mn = self.cfg.GetMasterNode()
13066     return ([mn], [mn])
13067
13068   def Exec(self, feedback_fn):
13069     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13070
13071     assert instances == [self.op.instance_name], "Instance not locked"
13072
13073     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13074                      instances=instances, target_groups=list(self.target_uuids))
13075
13076     ial.Run(self.op.iallocator)
13077
13078     if not ial.success:
13079       raise errors.OpPrereqError("Can't compute solution for changing group of"
13080                                  " instance '%s' using iallocator '%s': %s" %
13081                                  (self.op.instance_name, self.op.iallocator,
13082                                   ial.info),
13083                                  errors.ECODE_NORES)
13084
13085     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13086
13087     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13088                  " instance '%s'", len(jobs), self.op.instance_name)
13089
13090     return ResultWithJobs(jobs)
13091
13092
13093 class LUBackupQuery(NoHooksLU):
13094   """Query the exports list
13095
13096   """
13097   REQ_BGL = False
13098
13099   def CheckArguments(self):
13100     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13101                              ["node", "export"], self.op.use_locking)
13102
13103   def ExpandNames(self):
13104     self.expq.ExpandNames(self)
13105
13106   def DeclareLocks(self, level):
13107     self.expq.DeclareLocks(self, level)
13108
13109   def Exec(self, feedback_fn):
13110     result = {}
13111
13112     for (node, expname) in self.expq.OldStyleQuery(self):
13113       if expname is None:
13114         result[node] = False
13115       else:
13116         result.setdefault(node, []).append(expname)
13117
13118     return result
13119
13120
13121 class _ExportQuery(_QueryBase):
13122   FIELDS = query.EXPORT_FIELDS
13123
13124   #: The node name is not a unique key for this query
13125   SORT_FIELD = "node"
13126
13127   def ExpandNames(self, lu):
13128     lu.needed_locks = {}
13129
13130     # The following variables interact with _QueryBase._GetNames
13131     if self.names:
13132       self.wanted = _GetWantedNodes(lu, self.names)
13133     else:
13134       self.wanted = locking.ALL_SET
13135
13136     self.do_locking = self.use_locking
13137
13138     if self.do_locking:
13139       lu.share_locks = _ShareAll()
13140       lu.needed_locks = {
13141         locking.LEVEL_NODE: self.wanted,
13142         }
13143
13144   def DeclareLocks(self, lu, level):
13145     pass
13146
13147   def _GetQueryData(self, lu):
13148     """Computes the list of nodes and their attributes.
13149
13150     """
13151     # Locking is not used
13152     # TODO
13153     assert not (compat.any(lu.glm.is_owned(level)
13154                            for level in locking.LEVELS
13155                            if level != locking.LEVEL_CLUSTER) or
13156                 self.do_locking or self.use_locking)
13157
13158     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13159
13160     result = []
13161
13162     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13163       if nres.fail_msg:
13164         result.append((node, None))
13165       else:
13166         result.extend((node, expname) for expname in nres.payload)
13167
13168     return result
13169
13170
13171 class LUBackupPrepare(NoHooksLU):
13172   """Prepares an instance for an export and returns useful information.
13173
13174   """
13175   REQ_BGL = False
13176
13177   def ExpandNames(self):
13178     self._ExpandAndLockInstance()
13179
13180   def CheckPrereq(self):
13181     """Check prerequisites.
13182
13183     """
13184     instance_name = self.op.instance_name
13185
13186     self.instance = self.cfg.GetInstanceInfo(instance_name)
13187     assert self.instance is not None, \
13188           "Cannot retrieve locked instance %s" % self.op.instance_name
13189     _CheckNodeOnline(self, self.instance.primary_node)
13190
13191     self._cds = _GetClusterDomainSecret()
13192
13193   def Exec(self, feedback_fn):
13194     """Prepares an instance for an export.
13195
13196     """
13197     instance = self.instance
13198
13199     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13200       salt = utils.GenerateSecret(8)
13201
13202       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13203       result = self.rpc.call_x509_cert_create(instance.primary_node,
13204                                               constants.RIE_CERT_VALIDITY)
13205       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13206
13207       (name, cert_pem) = result.payload
13208
13209       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13210                                              cert_pem)
13211
13212       return {
13213         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13214         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13215                           salt),
13216         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13217         }
13218
13219     return None
13220
13221
13222 class LUBackupExport(LogicalUnit):
13223   """Export an instance to an image in the cluster.
13224
13225   """
13226   HPATH = "instance-export"
13227   HTYPE = constants.HTYPE_INSTANCE
13228   REQ_BGL = False
13229
13230   def CheckArguments(self):
13231     """Check the arguments.
13232
13233     """
13234     self.x509_key_name = self.op.x509_key_name
13235     self.dest_x509_ca_pem = self.op.destination_x509_ca
13236
13237     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13238       if not self.x509_key_name:
13239         raise errors.OpPrereqError("Missing X509 key name for encryption",
13240                                    errors.ECODE_INVAL)
13241
13242       if not self.dest_x509_ca_pem:
13243         raise errors.OpPrereqError("Missing destination X509 CA",
13244                                    errors.ECODE_INVAL)
13245
13246   def ExpandNames(self):
13247     self._ExpandAndLockInstance()
13248
13249     # Lock all nodes for local exports
13250     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13251       # FIXME: lock only instance primary and destination node
13252       #
13253       # Sad but true, for now we have do lock all nodes, as we don't know where
13254       # the previous export might be, and in this LU we search for it and
13255       # remove it from its current node. In the future we could fix this by:
13256       #  - making a tasklet to search (share-lock all), then create the
13257       #    new one, then one to remove, after
13258       #  - removing the removal operation altogether
13259       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13260
13261   def DeclareLocks(self, level):
13262     """Last minute lock declaration."""
13263     # All nodes are locked anyway, so nothing to do here.
13264
13265   def BuildHooksEnv(self):
13266     """Build hooks env.
13267
13268     This will run on the master, primary node and target node.
13269
13270     """
13271     env = {
13272       "EXPORT_MODE": self.op.mode,
13273       "EXPORT_NODE": self.op.target_node,
13274       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13275       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13276       # TODO: Generic function for boolean env variables
13277       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13278       }
13279
13280     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13281
13282     return env
13283
13284   def BuildHooksNodes(self):
13285     """Build hooks nodes.
13286
13287     """
13288     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13289
13290     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13291       nl.append(self.op.target_node)
13292
13293     return (nl, nl)
13294
13295   def CheckPrereq(self):
13296     """Check prerequisites.
13297
13298     This checks that the instance and node names are valid.
13299
13300     """
13301     instance_name = self.op.instance_name
13302
13303     self.instance = self.cfg.GetInstanceInfo(instance_name)
13304     assert self.instance is not None, \
13305           "Cannot retrieve locked instance %s" % self.op.instance_name
13306     _CheckNodeOnline(self, self.instance.primary_node)
13307
13308     if (self.op.remove_instance and
13309         self.instance.admin_state == constants.ADMINST_UP and
13310         not self.op.shutdown):
13311       raise errors.OpPrereqError("Can not remove instance without shutting it"
13312                                  " down before")
13313
13314     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13315       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13316       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13317       assert self.dst_node is not None
13318
13319       _CheckNodeOnline(self, self.dst_node.name)
13320       _CheckNodeNotDrained(self, self.dst_node.name)
13321
13322       self._cds = None
13323       self.dest_disk_info = None
13324       self.dest_x509_ca = None
13325
13326     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13327       self.dst_node = None
13328
13329       if len(self.op.target_node) != len(self.instance.disks):
13330         raise errors.OpPrereqError(("Received destination information for %s"
13331                                     " disks, but instance %s has %s disks") %
13332                                    (len(self.op.target_node), instance_name,
13333                                     len(self.instance.disks)),
13334                                    errors.ECODE_INVAL)
13335
13336       cds = _GetClusterDomainSecret()
13337
13338       # Check X509 key name
13339       try:
13340         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13341       except (TypeError, ValueError), err:
13342         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13343
13344       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13345         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13346                                    errors.ECODE_INVAL)
13347
13348       # Load and verify CA
13349       try:
13350         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13351       except OpenSSL.crypto.Error, err:
13352         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13353                                    (err, ), errors.ECODE_INVAL)
13354
13355       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13356       if errcode is not None:
13357         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13358                                    (msg, ), errors.ECODE_INVAL)
13359
13360       self.dest_x509_ca = cert
13361
13362       # Verify target information
13363       disk_info = []
13364       for idx, disk_data in enumerate(self.op.target_node):
13365         try:
13366           (host, port, magic) = \
13367             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13368         except errors.GenericError, err:
13369           raise errors.OpPrereqError("Target info for disk %s: %s" %
13370                                      (idx, err), errors.ECODE_INVAL)
13371
13372         disk_info.append((host, port, magic))
13373
13374       assert len(disk_info) == len(self.op.target_node)
13375       self.dest_disk_info = disk_info
13376
13377     else:
13378       raise errors.ProgrammerError("Unhandled export mode %r" %
13379                                    self.op.mode)
13380
13381     # instance disk type verification
13382     # TODO: Implement export support for file-based disks
13383     for disk in self.instance.disks:
13384       if disk.dev_type == constants.LD_FILE:
13385         raise errors.OpPrereqError("Export not supported for instances with"
13386                                    " file-based disks", errors.ECODE_INVAL)
13387
13388   def _CleanupExports(self, feedback_fn):
13389     """Removes exports of current instance from all other nodes.
13390
13391     If an instance in a cluster with nodes A..D was exported to node C, its
13392     exports will be removed from the nodes A, B and D.
13393
13394     """
13395     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13396
13397     nodelist = self.cfg.GetNodeList()
13398     nodelist.remove(self.dst_node.name)
13399
13400     # on one-node clusters nodelist will be empty after the removal
13401     # if we proceed the backup would be removed because OpBackupQuery
13402     # substitutes an empty list with the full cluster node list.
13403     iname = self.instance.name
13404     if nodelist:
13405       feedback_fn("Removing old exports for instance %s" % iname)
13406       exportlist = self.rpc.call_export_list(nodelist)
13407       for node in exportlist:
13408         if exportlist[node].fail_msg:
13409           continue
13410         if iname in exportlist[node].payload:
13411           msg = self.rpc.call_export_remove(node, iname).fail_msg
13412           if msg:
13413             self.LogWarning("Could not remove older export for instance %s"
13414                             " on node %s: %s", iname, node, msg)
13415
13416   def Exec(self, feedback_fn):
13417     """Export an instance to an image in the cluster.
13418
13419     """
13420     assert self.op.mode in constants.EXPORT_MODES
13421
13422     instance = self.instance
13423     src_node = instance.primary_node
13424
13425     if self.op.shutdown:
13426       # shutdown the instance, but not the disks
13427       feedback_fn("Shutting down instance %s" % instance.name)
13428       result = self.rpc.call_instance_shutdown(src_node, instance,
13429                                                self.op.shutdown_timeout)
13430       # TODO: Maybe ignore failures if ignore_remove_failures is set
13431       result.Raise("Could not shutdown instance %s on"
13432                    " node %s" % (instance.name, src_node))
13433
13434     # set the disks ID correctly since call_instance_start needs the
13435     # correct drbd minor to create the symlinks
13436     for disk in instance.disks:
13437       self.cfg.SetDiskID(disk, src_node)
13438
13439     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13440
13441     if activate_disks:
13442       # Activate the instance disks if we'exporting a stopped instance
13443       feedback_fn("Activating disks for %s" % instance.name)
13444       _StartInstanceDisks(self, instance, None)
13445
13446     try:
13447       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13448                                                      instance)
13449
13450       helper.CreateSnapshots()
13451       try:
13452         if (self.op.shutdown and
13453             instance.admin_state == constants.ADMINST_UP and
13454             not self.op.remove_instance):
13455           assert not activate_disks
13456           feedback_fn("Starting instance %s" % instance.name)
13457           result = self.rpc.call_instance_start(src_node,
13458                                                 (instance, None, None), False)
13459           msg = result.fail_msg
13460           if msg:
13461             feedback_fn("Failed to start instance: %s" % msg)
13462             _ShutdownInstanceDisks(self, instance)
13463             raise errors.OpExecError("Could not start instance: %s" % msg)
13464
13465         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13466           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13467         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13468           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13469           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13470
13471           (key_name, _, _) = self.x509_key_name
13472
13473           dest_ca_pem = \
13474             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13475                                             self.dest_x509_ca)
13476
13477           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13478                                                      key_name, dest_ca_pem,
13479                                                      timeouts)
13480       finally:
13481         helper.Cleanup()
13482
13483       # Check for backwards compatibility
13484       assert len(dresults) == len(instance.disks)
13485       assert compat.all(isinstance(i, bool) for i in dresults), \
13486              "Not all results are boolean: %r" % dresults
13487
13488     finally:
13489       if activate_disks:
13490         feedback_fn("Deactivating disks for %s" % instance.name)
13491         _ShutdownInstanceDisks(self, instance)
13492
13493     if not (compat.all(dresults) and fin_resu):
13494       failures = []
13495       if not fin_resu:
13496         failures.append("export finalization")
13497       if not compat.all(dresults):
13498         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13499                                if not dsk)
13500         failures.append("disk export: disk(s) %s" % fdsk)
13501
13502       raise errors.OpExecError("Export failed, errors in %s" %
13503                                utils.CommaJoin(failures))
13504
13505     # At this point, the export was successful, we can cleanup/finish
13506
13507     # Remove instance if requested
13508     if self.op.remove_instance:
13509       feedback_fn("Removing instance %s" % instance.name)
13510       _RemoveInstance(self, feedback_fn, instance,
13511                       self.op.ignore_remove_failures)
13512
13513     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13514       self._CleanupExports(feedback_fn)
13515
13516     return fin_resu, dresults
13517
13518
13519 class LUBackupRemove(NoHooksLU):
13520   """Remove exports related to the named instance.
13521
13522   """
13523   REQ_BGL = False
13524
13525   def ExpandNames(self):
13526     self.needed_locks = {}
13527     # We need all nodes to be locked in order for RemoveExport to work, but we
13528     # don't need to lock the instance itself, as nothing will happen to it (and
13529     # we can remove exports also for a removed instance)
13530     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13531
13532   def Exec(self, feedback_fn):
13533     """Remove any export.
13534
13535     """
13536     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13537     # If the instance was not found we'll try with the name that was passed in.
13538     # This will only work if it was an FQDN, though.
13539     fqdn_warn = False
13540     if not instance_name:
13541       fqdn_warn = True
13542       instance_name = self.op.instance_name
13543
13544     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13545     exportlist = self.rpc.call_export_list(locked_nodes)
13546     found = False
13547     for node in exportlist:
13548       msg = exportlist[node].fail_msg
13549       if msg:
13550         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13551         continue
13552       if instance_name in exportlist[node].payload:
13553         found = True
13554         result = self.rpc.call_export_remove(node, instance_name)
13555         msg = result.fail_msg
13556         if msg:
13557           logging.error("Could not remove export for instance %s"
13558                         " on node %s: %s", instance_name, node, msg)
13559
13560     if fqdn_warn and not found:
13561       feedback_fn("Export not found. If trying to remove an export belonging"
13562                   " to a deleted instance please use its Fully Qualified"
13563                   " Domain Name.")
13564
13565
13566 class LUGroupAdd(LogicalUnit):
13567   """Logical unit for creating node groups.
13568
13569   """
13570   HPATH = "group-add"
13571   HTYPE = constants.HTYPE_GROUP
13572   REQ_BGL = False
13573
13574   def ExpandNames(self):
13575     # We need the new group's UUID here so that we can create and acquire the
13576     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13577     # that it should not check whether the UUID exists in the configuration.
13578     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13579     self.needed_locks = {}
13580     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13581
13582   def CheckPrereq(self):
13583     """Check prerequisites.
13584
13585     This checks that the given group name is not an existing node group
13586     already.
13587
13588     """
13589     try:
13590       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13591     except errors.OpPrereqError:
13592       pass
13593     else:
13594       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13595                                  " node group (UUID: %s)" %
13596                                  (self.op.group_name, existing_uuid),
13597                                  errors.ECODE_EXISTS)
13598
13599     if self.op.ndparams:
13600       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13601
13602     if self.op.hv_state:
13603       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13604     else:
13605       self.new_hv_state = None
13606
13607     if self.op.disk_state:
13608       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13609     else:
13610       self.new_disk_state = None
13611
13612     if self.op.diskparams:
13613       for templ in constants.DISK_TEMPLATES:
13614         if templ in self.op.diskparams:
13615           utils.ForceDictType(self.op.diskparams[templ],
13616                               constants.DISK_DT_TYPES)
13617       self.new_diskparams = self.op.diskparams
13618       try:
13619         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13620       except errors.OpPrereqError, err:
13621         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13622                                    errors.ECODE_INVAL)
13623     else:
13624       self.new_diskparams = {}
13625
13626     if self.op.ipolicy:
13627       cluster = self.cfg.GetClusterInfo()
13628       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13629       try:
13630         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13631       except errors.ConfigurationError, err:
13632         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13633                                    errors.ECODE_INVAL)
13634
13635   def BuildHooksEnv(self):
13636     """Build hooks env.
13637
13638     """
13639     return {
13640       "GROUP_NAME": self.op.group_name,
13641       }
13642
13643   def BuildHooksNodes(self):
13644     """Build hooks nodes.
13645
13646     """
13647     mn = self.cfg.GetMasterNode()
13648     return ([mn], [mn])
13649
13650   def Exec(self, feedback_fn):
13651     """Add the node group to the cluster.
13652
13653     """
13654     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13655                                   uuid=self.group_uuid,
13656                                   alloc_policy=self.op.alloc_policy,
13657                                   ndparams=self.op.ndparams,
13658                                   diskparams=self.new_diskparams,
13659                                   ipolicy=self.op.ipolicy,
13660                                   hv_state_static=self.new_hv_state,
13661                                   disk_state_static=self.new_disk_state)
13662
13663     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13664     del self.remove_locks[locking.LEVEL_NODEGROUP]
13665
13666
13667 class LUGroupAssignNodes(NoHooksLU):
13668   """Logical unit for assigning nodes to groups.
13669
13670   """
13671   REQ_BGL = False
13672
13673   def ExpandNames(self):
13674     # These raise errors.OpPrereqError on their own:
13675     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13676     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13677
13678     # We want to lock all the affected nodes and groups. We have readily
13679     # available the list of nodes, and the *destination* group. To gather the
13680     # list of "source" groups, we need to fetch node information later on.
13681     self.needed_locks = {
13682       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13683       locking.LEVEL_NODE: self.op.nodes,
13684       }
13685
13686   def DeclareLocks(self, level):
13687     if level == locking.LEVEL_NODEGROUP:
13688       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13689
13690       # Try to get all affected nodes' groups without having the group or node
13691       # lock yet. Needs verification later in the code flow.
13692       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13693
13694       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13695
13696   def CheckPrereq(self):
13697     """Check prerequisites.
13698
13699     """
13700     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13701     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13702             frozenset(self.op.nodes))
13703
13704     expected_locks = (set([self.group_uuid]) |
13705                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13706     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13707     if actual_locks != expected_locks:
13708       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13709                                " current groups are '%s', used to be '%s'" %
13710                                (utils.CommaJoin(expected_locks),
13711                                 utils.CommaJoin(actual_locks)))
13712
13713     self.node_data = self.cfg.GetAllNodesInfo()
13714     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13715     instance_data = self.cfg.GetAllInstancesInfo()
13716
13717     if self.group is None:
13718       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13719                                (self.op.group_name, self.group_uuid))
13720
13721     (new_splits, previous_splits) = \
13722       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13723                                              for node in self.op.nodes],
13724                                             self.node_data, instance_data)
13725
13726     if new_splits:
13727       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13728
13729       if not self.op.force:
13730         raise errors.OpExecError("The following instances get split by this"
13731                                  " change and --force was not given: %s" %
13732                                  fmt_new_splits)
13733       else:
13734         self.LogWarning("This operation will split the following instances: %s",
13735                         fmt_new_splits)
13736
13737         if previous_splits:
13738           self.LogWarning("In addition, these already-split instances continue"
13739                           " to be split across groups: %s",
13740                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13741
13742   def Exec(self, feedback_fn):
13743     """Assign nodes to a new group.
13744
13745     """
13746     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13747
13748     self.cfg.AssignGroupNodes(mods)
13749
13750   @staticmethod
13751   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13752     """Check for split instances after a node assignment.
13753
13754     This method considers a series of node assignments as an atomic operation,
13755     and returns information about split instances after applying the set of
13756     changes.
13757
13758     In particular, it returns information about newly split instances, and
13759     instances that were already split, and remain so after the change.
13760
13761     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13762     considered.
13763
13764     @type changes: list of (node_name, new_group_uuid) pairs.
13765     @param changes: list of node assignments to consider.
13766     @param node_data: a dict with data for all nodes
13767     @param instance_data: a dict with all instances to consider
13768     @rtype: a two-tuple
13769     @return: a list of instances that were previously okay and result split as a
13770       consequence of this change, and a list of instances that were previously
13771       split and this change does not fix.
13772
13773     """
13774     changed_nodes = dict((node, group) for node, group in changes
13775                          if node_data[node].group != group)
13776
13777     all_split_instances = set()
13778     previously_split_instances = set()
13779
13780     def InstanceNodes(instance):
13781       return [instance.primary_node] + list(instance.secondary_nodes)
13782
13783     for inst in instance_data.values():
13784       if inst.disk_template not in constants.DTS_INT_MIRROR:
13785         continue
13786
13787       instance_nodes = InstanceNodes(inst)
13788
13789       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13790         previously_split_instances.add(inst.name)
13791
13792       if len(set(changed_nodes.get(node, node_data[node].group)
13793                  for node in instance_nodes)) > 1:
13794         all_split_instances.add(inst.name)
13795
13796     return (list(all_split_instances - previously_split_instances),
13797             list(previously_split_instances & all_split_instances))
13798
13799
13800 class _GroupQuery(_QueryBase):
13801   FIELDS = query.GROUP_FIELDS
13802
13803   def ExpandNames(self, lu):
13804     lu.needed_locks = {}
13805
13806     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13807     self._cluster = lu.cfg.GetClusterInfo()
13808     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13809
13810     if not self.names:
13811       self.wanted = [name_to_uuid[name]
13812                      for name in utils.NiceSort(name_to_uuid.keys())]
13813     else:
13814       # Accept names to be either names or UUIDs.
13815       missing = []
13816       self.wanted = []
13817       all_uuid = frozenset(self._all_groups.keys())
13818
13819       for name in self.names:
13820         if name in all_uuid:
13821           self.wanted.append(name)
13822         elif name in name_to_uuid:
13823           self.wanted.append(name_to_uuid[name])
13824         else:
13825           missing.append(name)
13826
13827       if missing:
13828         raise errors.OpPrereqError("Some groups do not exist: %s" %
13829                                    utils.CommaJoin(missing),
13830                                    errors.ECODE_NOENT)
13831
13832   def DeclareLocks(self, lu, level):
13833     pass
13834
13835   def _GetQueryData(self, lu):
13836     """Computes the list of node groups and their attributes.
13837
13838     """
13839     do_nodes = query.GQ_NODE in self.requested_data
13840     do_instances = query.GQ_INST in self.requested_data
13841
13842     group_to_nodes = None
13843     group_to_instances = None
13844
13845     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13846     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13847     # latter GetAllInstancesInfo() is not enough, for we have to go through
13848     # instance->node. Hence, we will need to process nodes even if we only need
13849     # instance information.
13850     if do_nodes or do_instances:
13851       all_nodes = lu.cfg.GetAllNodesInfo()
13852       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13853       node_to_group = {}
13854
13855       for node in all_nodes.values():
13856         if node.group in group_to_nodes:
13857           group_to_nodes[node.group].append(node.name)
13858           node_to_group[node.name] = node.group
13859
13860       if do_instances:
13861         all_instances = lu.cfg.GetAllInstancesInfo()
13862         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13863
13864         for instance in all_instances.values():
13865           node = instance.primary_node
13866           if node in node_to_group:
13867             group_to_instances[node_to_group[node]].append(instance.name)
13868
13869         if not do_nodes:
13870           # Do not pass on node information if it was not requested.
13871           group_to_nodes = None
13872
13873     return query.GroupQueryData(self._cluster,
13874                                 [self._all_groups[uuid]
13875                                  for uuid in self.wanted],
13876                                 group_to_nodes, group_to_instances,
13877                                 query.GQ_DISKPARAMS in self.requested_data)
13878
13879
13880 class LUGroupQuery(NoHooksLU):
13881   """Logical unit for querying node groups.
13882
13883   """
13884   REQ_BGL = False
13885
13886   def CheckArguments(self):
13887     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13888                           self.op.output_fields, False)
13889
13890   def ExpandNames(self):
13891     self.gq.ExpandNames(self)
13892
13893   def DeclareLocks(self, level):
13894     self.gq.DeclareLocks(self, level)
13895
13896   def Exec(self, feedback_fn):
13897     return self.gq.OldStyleQuery(self)
13898
13899
13900 class LUGroupSetParams(LogicalUnit):
13901   """Modifies the parameters of a node group.
13902
13903   """
13904   HPATH = "group-modify"
13905   HTYPE = constants.HTYPE_GROUP
13906   REQ_BGL = False
13907
13908   def CheckArguments(self):
13909     all_changes = [
13910       self.op.ndparams,
13911       self.op.diskparams,
13912       self.op.alloc_policy,
13913       self.op.hv_state,
13914       self.op.disk_state,
13915       self.op.ipolicy,
13916       ]
13917
13918     if all_changes.count(None) == len(all_changes):
13919       raise errors.OpPrereqError("Please pass at least one modification",
13920                                  errors.ECODE_INVAL)
13921
13922   def ExpandNames(self):
13923     # This raises errors.OpPrereqError on its own:
13924     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13925
13926     self.needed_locks = {
13927       locking.LEVEL_INSTANCE: [],
13928       locking.LEVEL_NODEGROUP: [self.group_uuid],
13929       }
13930
13931     self.share_locks[locking.LEVEL_INSTANCE] = 1
13932
13933   def DeclareLocks(self, level):
13934     if level == locking.LEVEL_INSTANCE:
13935       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13936
13937       # Lock instances optimistically, needs verification once group lock has
13938       # been acquired
13939       self.needed_locks[locking.LEVEL_INSTANCE] = \
13940           self.cfg.GetNodeGroupInstances(self.group_uuid)
13941
13942   @staticmethod
13943   def _UpdateAndVerifyDiskParams(old, new):
13944     """Updates and verifies disk parameters.
13945
13946     """
13947     new_params = _GetUpdatedParams(old, new)
13948     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13949     return new_params
13950
13951   def CheckPrereq(self):
13952     """Check prerequisites.
13953
13954     """
13955     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13956
13957     # Check if locked instances are still correct
13958     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13959
13960     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13961     cluster = self.cfg.GetClusterInfo()
13962
13963     if self.group is None:
13964       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13965                                (self.op.group_name, self.group_uuid))
13966
13967     if self.op.ndparams:
13968       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13969       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13970       self.new_ndparams = new_ndparams
13971
13972     if self.op.diskparams:
13973       diskparams = self.group.diskparams
13974       uavdp = self._UpdateAndVerifyDiskParams
13975       # For each disktemplate subdict update and verify the values
13976       new_diskparams = dict((dt,
13977                              uavdp(diskparams.get(dt, {}),
13978                                    self.op.diskparams[dt]))
13979                             for dt in constants.DISK_TEMPLATES
13980                             if dt in self.op.diskparams)
13981       # As we've all subdicts of diskparams ready, lets merge the actual
13982       # dict with all updated subdicts
13983       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
13984       try:
13985         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13986       except errors.OpPrereqError, err:
13987         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13988                                    errors.ECODE_INVAL)
13989
13990     if self.op.hv_state:
13991       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13992                                                  self.group.hv_state_static)
13993
13994     if self.op.disk_state:
13995       self.new_disk_state = \
13996         _MergeAndVerifyDiskState(self.op.disk_state,
13997                                  self.group.disk_state_static)
13998
13999     if self.op.ipolicy:
14000       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14001                                             self.op.ipolicy,
14002                                             group_policy=True)
14003
14004       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14005       inst_filter = lambda inst: inst.name in owned_instances
14006       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14007       violations = \
14008           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14009                                                                self.group),
14010                                         new_ipolicy, instances)
14011
14012       if violations:
14013         self.LogWarning("After the ipolicy change the following instances"
14014                         " violate them: %s",
14015                         utils.CommaJoin(violations))
14016
14017   def BuildHooksEnv(self):
14018     """Build hooks env.
14019
14020     """
14021     return {
14022       "GROUP_NAME": self.op.group_name,
14023       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14024       }
14025
14026   def BuildHooksNodes(self):
14027     """Build hooks nodes.
14028
14029     """
14030     mn = self.cfg.GetMasterNode()
14031     return ([mn], [mn])
14032
14033   def Exec(self, feedback_fn):
14034     """Modifies the node group.
14035
14036     """
14037     result = []
14038
14039     if self.op.ndparams:
14040       self.group.ndparams = self.new_ndparams
14041       result.append(("ndparams", str(self.group.ndparams)))
14042
14043     if self.op.diskparams:
14044       self.group.diskparams = self.new_diskparams
14045       result.append(("diskparams", str(self.group.diskparams)))
14046
14047     if self.op.alloc_policy:
14048       self.group.alloc_policy = self.op.alloc_policy
14049
14050     if self.op.hv_state:
14051       self.group.hv_state_static = self.new_hv_state
14052
14053     if self.op.disk_state:
14054       self.group.disk_state_static = self.new_disk_state
14055
14056     if self.op.ipolicy:
14057       self.group.ipolicy = self.new_ipolicy
14058
14059     self.cfg.Update(self.group, feedback_fn)
14060     return result
14061
14062
14063 class LUGroupRemove(LogicalUnit):
14064   HPATH = "group-remove"
14065   HTYPE = constants.HTYPE_GROUP
14066   REQ_BGL = False
14067
14068   def ExpandNames(self):
14069     # This will raises errors.OpPrereqError on its own:
14070     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14071     self.needed_locks = {
14072       locking.LEVEL_NODEGROUP: [self.group_uuid],
14073       }
14074
14075   def CheckPrereq(self):
14076     """Check prerequisites.
14077
14078     This checks that the given group name exists as a node group, that is
14079     empty (i.e., contains no nodes), and that is not the last group of the
14080     cluster.
14081
14082     """
14083     # Verify that the group is empty.
14084     group_nodes = [node.name
14085                    for node in self.cfg.GetAllNodesInfo().values()
14086                    if node.group == self.group_uuid]
14087
14088     if group_nodes:
14089       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14090                                  " nodes: %s" %
14091                                  (self.op.group_name,
14092                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14093                                  errors.ECODE_STATE)
14094
14095     # Verify the cluster would not be left group-less.
14096     if len(self.cfg.GetNodeGroupList()) == 1:
14097       raise errors.OpPrereqError("Group '%s' is the only group,"
14098                                  " cannot be removed" %
14099                                  self.op.group_name,
14100                                  errors.ECODE_STATE)
14101
14102   def BuildHooksEnv(self):
14103     """Build hooks env.
14104
14105     """
14106     return {
14107       "GROUP_NAME": self.op.group_name,
14108       }
14109
14110   def BuildHooksNodes(self):
14111     """Build hooks nodes.
14112
14113     """
14114     mn = self.cfg.GetMasterNode()
14115     return ([mn], [mn])
14116
14117   def Exec(self, feedback_fn):
14118     """Remove the node group.
14119
14120     """
14121     try:
14122       self.cfg.RemoveNodeGroup(self.group_uuid)
14123     except errors.ConfigurationError:
14124       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14125                                (self.op.group_name, self.group_uuid))
14126
14127     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14128
14129
14130 class LUGroupRename(LogicalUnit):
14131   HPATH = "group-rename"
14132   HTYPE = constants.HTYPE_GROUP
14133   REQ_BGL = False
14134
14135   def ExpandNames(self):
14136     # This raises errors.OpPrereqError on its own:
14137     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14138
14139     self.needed_locks = {
14140       locking.LEVEL_NODEGROUP: [self.group_uuid],
14141       }
14142
14143   def CheckPrereq(self):
14144     """Check prerequisites.
14145
14146     Ensures requested new name is not yet used.
14147
14148     """
14149     try:
14150       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14151     except errors.OpPrereqError:
14152       pass
14153     else:
14154       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14155                                  " node group (UUID: %s)" %
14156                                  (self.op.new_name, new_name_uuid),
14157                                  errors.ECODE_EXISTS)
14158
14159   def BuildHooksEnv(self):
14160     """Build hooks env.
14161
14162     """
14163     return {
14164       "OLD_NAME": self.op.group_name,
14165       "NEW_NAME": self.op.new_name,
14166       }
14167
14168   def BuildHooksNodes(self):
14169     """Build hooks nodes.
14170
14171     """
14172     mn = self.cfg.GetMasterNode()
14173
14174     all_nodes = self.cfg.GetAllNodesInfo()
14175     all_nodes.pop(mn, None)
14176
14177     run_nodes = [mn]
14178     run_nodes.extend(node.name for node in all_nodes.values()
14179                      if node.group == self.group_uuid)
14180
14181     return (run_nodes, run_nodes)
14182
14183   def Exec(self, feedback_fn):
14184     """Rename the node group.
14185
14186     """
14187     group = self.cfg.GetNodeGroup(self.group_uuid)
14188
14189     if group is None:
14190       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14191                                (self.op.group_name, self.group_uuid))
14192
14193     group.name = self.op.new_name
14194     self.cfg.Update(group, feedback_fn)
14195
14196     return self.op.new_name
14197
14198
14199 class LUGroupEvacuate(LogicalUnit):
14200   HPATH = "group-evacuate"
14201   HTYPE = constants.HTYPE_GROUP
14202   REQ_BGL = False
14203
14204   def ExpandNames(self):
14205     # This raises errors.OpPrereqError on its own:
14206     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14207
14208     if self.op.target_groups:
14209       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14210                                   self.op.target_groups)
14211     else:
14212       self.req_target_uuids = []
14213
14214     if self.group_uuid in self.req_target_uuids:
14215       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14216                                  " as a target group (targets are %s)" %
14217                                  (self.group_uuid,
14218                                   utils.CommaJoin(self.req_target_uuids)),
14219                                  errors.ECODE_INVAL)
14220
14221     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14222
14223     self.share_locks = _ShareAll()
14224     self.needed_locks = {
14225       locking.LEVEL_INSTANCE: [],
14226       locking.LEVEL_NODEGROUP: [],
14227       locking.LEVEL_NODE: [],
14228       }
14229
14230   def DeclareLocks(self, level):
14231     if level == locking.LEVEL_INSTANCE:
14232       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14233
14234       # Lock instances optimistically, needs verification once node and group
14235       # locks have been acquired
14236       self.needed_locks[locking.LEVEL_INSTANCE] = \
14237         self.cfg.GetNodeGroupInstances(self.group_uuid)
14238
14239     elif level == locking.LEVEL_NODEGROUP:
14240       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14241
14242       if self.req_target_uuids:
14243         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14244
14245         # Lock all groups used by instances optimistically; this requires going
14246         # via the node before it's locked, requiring verification later on
14247         lock_groups.update(group_uuid
14248                            for instance_name in
14249                              self.owned_locks(locking.LEVEL_INSTANCE)
14250                            for group_uuid in
14251                              self.cfg.GetInstanceNodeGroups(instance_name))
14252       else:
14253         # No target groups, need to lock all of them
14254         lock_groups = locking.ALL_SET
14255
14256       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14257
14258     elif level == locking.LEVEL_NODE:
14259       # This will only lock the nodes in the group to be evacuated which
14260       # contain actual instances
14261       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14262       self._LockInstancesNodes()
14263
14264       # Lock all nodes in group to be evacuated and target groups
14265       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14266       assert self.group_uuid in owned_groups
14267       member_nodes = [node_name
14268                       for group in owned_groups
14269                       for node_name in self.cfg.GetNodeGroup(group).members]
14270       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14271
14272   def CheckPrereq(self):
14273     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14274     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14275     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14276
14277     assert owned_groups.issuperset(self.req_target_uuids)
14278     assert self.group_uuid in owned_groups
14279
14280     # Check if locked instances are still correct
14281     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14282
14283     # Get instance information
14284     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14285
14286     # Check if node groups for locked instances are still correct
14287     _CheckInstancesNodeGroups(self.cfg, self.instances,
14288                               owned_groups, owned_nodes, self.group_uuid)
14289
14290     if self.req_target_uuids:
14291       # User requested specific target groups
14292       self.target_uuids = self.req_target_uuids
14293     else:
14294       # All groups except the one to be evacuated are potential targets
14295       self.target_uuids = [group_uuid for group_uuid in owned_groups
14296                            if group_uuid != self.group_uuid]
14297
14298       if not self.target_uuids:
14299         raise errors.OpPrereqError("There are no possible target groups",
14300                                    errors.ECODE_INVAL)
14301
14302   def BuildHooksEnv(self):
14303     """Build hooks env.
14304
14305     """
14306     return {
14307       "GROUP_NAME": self.op.group_name,
14308       "TARGET_GROUPS": " ".join(self.target_uuids),
14309       }
14310
14311   def BuildHooksNodes(self):
14312     """Build hooks nodes.
14313
14314     """
14315     mn = self.cfg.GetMasterNode()
14316
14317     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14318
14319     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14320
14321     return (run_nodes, run_nodes)
14322
14323   def Exec(self, feedback_fn):
14324     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14325
14326     assert self.group_uuid not in self.target_uuids
14327
14328     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14329                      instances=instances, target_groups=self.target_uuids)
14330
14331     ial.Run(self.op.iallocator)
14332
14333     if not ial.success:
14334       raise errors.OpPrereqError("Can't compute group evacuation using"
14335                                  " iallocator '%s': %s" %
14336                                  (self.op.iallocator, ial.info),
14337                                  errors.ECODE_NORES)
14338
14339     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14340
14341     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14342                  len(jobs), self.op.group_name)
14343
14344     return ResultWithJobs(jobs)
14345
14346
14347 class TagsLU(NoHooksLU): # pylint: disable=W0223
14348   """Generic tags LU.
14349
14350   This is an abstract class which is the parent of all the other tags LUs.
14351
14352   """
14353   def ExpandNames(self):
14354     self.group_uuid = None
14355     self.needed_locks = {}
14356
14357     if self.op.kind == constants.TAG_NODE:
14358       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14359       lock_level = locking.LEVEL_NODE
14360       lock_name = self.op.name
14361     elif self.op.kind == constants.TAG_INSTANCE:
14362       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14363       lock_level = locking.LEVEL_INSTANCE
14364       lock_name = self.op.name
14365     elif self.op.kind == constants.TAG_NODEGROUP:
14366       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14367       lock_level = locking.LEVEL_NODEGROUP
14368       lock_name = self.group_uuid
14369     else:
14370       lock_level = None
14371       lock_name = None
14372
14373     if lock_level and getattr(self.op, "use_locking", True):
14374       self.needed_locks[lock_level] = lock_name
14375
14376     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14377     # not possible to acquire the BGL based on opcode parameters)
14378
14379   def CheckPrereq(self):
14380     """Check prerequisites.
14381
14382     """
14383     if self.op.kind == constants.TAG_CLUSTER:
14384       self.target = self.cfg.GetClusterInfo()
14385     elif self.op.kind == constants.TAG_NODE:
14386       self.target = self.cfg.GetNodeInfo(self.op.name)
14387     elif self.op.kind == constants.TAG_INSTANCE:
14388       self.target = self.cfg.GetInstanceInfo(self.op.name)
14389     elif self.op.kind == constants.TAG_NODEGROUP:
14390       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14391     else:
14392       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14393                                  str(self.op.kind), errors.ECODE_INVAL)
14394
14395
14396 class LUTagsGet(TagsLU):
14397   """Returns the tags of a given object.
14398
14399   """
14400   REQ_BGL = False
14401
14402   def ExpandNames(self):
14403     TagsLU.ExpandNames(self)
14404
14405     # Share locks as this is only a read operation
14406     self.share_locks = _ShareAll()
14407
14408   def Exec(self, feedback_fn):
14409     """Returns the tag list.
14410
14411     """
14412     return list(self.target.GetTags())
14413
14414
14415 class LUTagsSearch(NoHooksLU):
14416   """Searches the tags for a given pattern.
14417
14418   """
14419   REQ_BGL = False
14420
14421   def ExpandNames(self):
14422     self.needed_locks = {}
14423
14424   def CheckPrereq(self):
14425     """Check prerequisites.
14426
14427     This checks the pattern passed for validity by compiling it.
14428
14429     """
14430     try:
14431       self.re = re.compile(self.op.pattern)
14432     except re.error, err:
14433       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14434                                  (self.op.pattern, err), errors.ECODE_INVAL)
14435
14436   def Exec(self, feedback_fn):
14437     """Returns the tag list.
14438
14439     """
14440     cfg = self.cfg
14441     tgts = [("/cluster", cfg.GetClusterInfo())]
14442     ilist = cfg.GetAllInstancesInfo().values()
14443     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14444     nlist = cfg.GetAllNodesInfo().values()
14445     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14446     tgts.extend(("/nodegroup/%s" % n.name, n)
14447                 for n in cfg.GetAllNodeGroupsInfo().values())
14448     results = []
14449     for path, target in tgts:
14450       for tag in target.GetTags():
14451         if self.re.search(tag):
14452           results.append((path, tag))
14453     return results
14454
14455
14456 class LUTagsSet(TagsLU):
14457   """Sets a tag on a given object.
14458
14459   """
14460   REQ_BGL = False
14461
14462   def CheckPrereq(self):
14463     """Check prerequisites.
14464
14465     This checks the type and length of the tag name and value.
14466
14467     """
14468     TagsLU.CheckPrereq(self)
14469     for tag in self.op.tags:
14470       objects.TaggableObject.ValidateTag(tag)
14471
14472   def Exec(self, feedback_fn):
14473     """Sets the tag.
14474
14475     """
14476     try:
14477       for tag in self.op.tags:
14478         self.target.AddTag(tag)
14479     except errors.TagError, err:
14480       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14481     self.cfg.Update(self.target, feedback_fn)
14482
14483
14484 class LUTagsDel(TagsLU):
14485   """Delete a list of tags from a given object.
14486
14487   """
14488   REQ_BGL = False
14489
14490   def CheckPrereq(self):
14491     """Check prerequisites.
14492
14493     This checks that we have the given tag.
14494
14495     """
14496     TagsLU.CheckPrereq(self)
14497     for tag in self.op.tags:
14498       objects.TaggableObject.ValidateTag(tag)
14499     del_tags = frozenset(self.op.tags)
14500     cur_tags = self.target.GetTags()
14501
14502     diff_tags = del_tags - cur_tags
14503     if diff_tags:
14504       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14505       raise errors.OpPrereqError("Tag(s) %s not found" %
14506                                  (utils.CommaJoin(diff_names), ),
14507                                  errors.ECODE_NOENT)
14508
14509   def Exec(self, feedback_fn):
14510     """Remove the tag from the object.
14511
14512     """
14513     for tag in self.op.tags:
14514       self.target.RemoveTag(tag)
14515     self.cfg.Update(self.target, feedback_fn)
14516
14517
14518 class LUTestDelay(NoHooksLU):
14519   """Sleep for a specified amount of time.
14520
14521   This LU sleeps on the master and/or nodes for a specified amount of
14522   time.
14523
14524   """
14525   REQ_BGL = False
14526
14527   def ExpandNames(self):
14528     """Expand names and set required locks.
14529
14530     This expands the node list, if any.
14531
14532     """
14533     self.needed_locks = {}
14534     if self.op.on_nodes:
14535       # _GetWantedNodes can be used here, but is not always appropriate to use
14536       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14537       # more information.
14538       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14539       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14540
14541   def _TestDelay(self):
14542     """Do the actual sleep.
14543
14544     """
14545     if self.op.on_master:
14546       if not utils.TestDelay(self.op.duration):
14547         raise errors.OpExecError("Error during master delay test")
14548     if self.op.on_nodes:
14549       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14550       for node, node_result in result.items():
14551         node_result.Raise("Failure during rpc call to node %s" % node)
14552
14553   def Exec(self, feedback_fn):
14554     """Execute the test delay opcode, with the wanted repetitions.
14555
14556     """
14557     if self.op.repeat == 0:
14558       self._TestDelay()
14559     else:
14560       top_value = self.op.repeat - 1
14561       for i in range(self.op.repeat):
14562         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14563         self._TestDelay()
14564
14565
14566 class LUTestJqueue(NoHooksLU):
14567   """Utility LU to test some aspects of the job queue.
14568
14569   """
14570   REQ_BGL = False
14571
14572   # Must be lower than default timeout for WaitForJobChange to see whether it
14573   # notices changed jobs
14574   _CLIENT_CONNECT_TIMEOUT = 20.0
14575   _CLIENT_CONFIRM_TIMEOUT = 60.0
14576
14577   @classmethod
14578   def _NotifyUsingSocket(cls, cb, errcls):
14579     """Opens a Unix socket and waits for another program to connect.
14580
14581     @type cb: callable
14582     @param cb: Callback to send socket name to client
14583     @type errcls: class
14584     @param errcls: Exception class to use for errors
14585
14586     """
14587     # Using a temporary directory as there's no easy way to create temporary
14588     # sockets without writing a custom loop around tempfile.mktemp and
14589     # socket.bind
14590     tmpdir = tempfile.mkdtemp()
14591     try:
14592       tmpsock = utils.PathJoin(tmpdir, "sock")
14593
14594       logging.debug("Creating temporary socket at %s", tmpsock)
14595       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14596       try:
14597         sock.bind(tmpsock)
14598         sock.listen(1)
14599
14600         # Send details to client
14601         cb(tmpsock)
14602
14603         # Wait for client to connect before continuing
14604         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14605         try:
14606           (conn, _) = sock.accept()
14607         except socket.error, err:
14608           raise errcls("Client didn't connect in time (%s)" % err)
14609       finally:
14610         sock.close()
14611     finally:
14612       # Remove as soon as client is connected
14613       shutil.rmtree(tmpdir)
14614
14615     # Wait for client to close
14616     try:
14617       try:
14618         # pylint: disable=E1101
14619         # Instance of '_socketobject' has no ... member
14620         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14621         conn.recv(1)
14622       except socket.error, err:
14623         raise errcls("Client failed to confirm notification (%s)" % err)
14624     finally:
14625       conn.close()
14626
14627   def _SendNotification(self, test, arg, sockname):
14628     """Sends a notification to the client.
14629
14630     @type test: string
14631     @param test: Test name
14632     @param arg: Test argument (depends on test)
14633     @type sockname: string
14634     @param sockname: Socket path
14635
14636     """
14637     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14638
14639   def _Notify(self, prereq, test, arg):
14640     """Notifies the client of a test.
14641
14642     @type prereq: bool
14643     @param prereq: Whether this is a prereq-phase test
14644     @type test: string
14645     @param test: Test name
14646     @param arg: Test argument (depends on test)
14647
14648     """
14649     if prereq:
14650       errcls = errors.OpPrereqError
14651     else:
14652       errcls = errors.OpExecError
14653
14654     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14655                                                   test, arg),
14656                                    errcls)
14657
14658   def CheckArguments(self):
14659     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14660     self.expandnames_calls = 0
14661
14662   def ExpandNames(self):
14663     checkargs_calls = getattr(self, "checkargs_calls", 0)
14664     if checkargs_calls < 1:
14665       raise errors.ProgrammerError("CheckArguments was not called")
14666
14667     self.expandnames_calls += 1
14668
14669     if self.op.notify_waitlock:
14670       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14671
14672     self.LogInfo("Expanding names")
14673
14674     # Get lock on master node (just to get a lock, not for a particular reason)
14675     self.needed_locks = {
14676       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14677       }
14678
14679   def Exec(self, feedback_fn):
14680     if self.expandnames_calls < 1:
14681       raise errors.ProgrammerError("ExpandNames was not called")
14682
14683     if self.op.notify_exec:
14684       self._Notify(False, constants.JQT_EXEC, None)
14685
14686     self.LogInfo("Executing")
14687
14688     if self.op.log_messages:
14689       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14690       for idx, msg in enumerate(self.op.log_messages):
14691         self.LogInfo("Sending log message %s", idx + 1)
14692         feedback_fn(constants.JQT_MSGPREFIX + msg)
14693         # Report how many test messages have been sent
14694         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14695
14696     if self.op.fail:
14697       raise errors.OpExecError("Opcode failure was requested")
14698
14699     return True
14700
14701
14702 class IAllocator(object):
14703   """IAllocator framework.
14704
14705   An IAllocator instance has three sets of attributes:
14706     - cfg that is needed to query the cluster
14707     - input data (all members of the _KEYS class attribute are required)
14708     - four buffer attributes (in|out_data|text), that represent the
14709       input (to the external script) in text and data structure format,
14710       and the output from it, again in two formats
14711     - the result variables from the script (success, info, nodes) for
14712       easy usage
14713
14714   """
14715   # pylint: disable=R0902
14716   # lots of instance attributes
14717
14718   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14719     self.cfg = cfg
14720     self.rpc = rpc_runner
14721     # init buffer variables
14722     self.in_text = self.out_text = self.in_data = self.out_data = None
14723     # init all input fields so that pylint is happy
14724     self.mode = mode
14725     self.memory = self.disks = self.disk_template = self.spindle_use = None
14726     self.os = self.tags = self.nics = self.vcpus = None
14727     self.hypervisor = None
14728     self.relocate_from = None
14729     self.name = None
14730     self.instances = None
14731     self.evac_mode = None
14732     self.target_groups = []
14733     # computed fields
14734     self.required_nodes = None
14735     # init result fields
14736     self.success = self.info = self.result = None
14737
14738     try:
14739       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14740     except KeyError:
14741       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14742                                    " IAllocator" % self.mode)
14743
14744     keyset = [n for (n, _) in keydata]
14745
14746     for key in kwargs:
14747       if key not in keyset:
14748         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14749                                      " IAllocator" % key)
14750       setattr(self, key, kwargs[key])
14751
14752     for key in keyset:
14753       if key not in kwargs:
14754         raise errors.ProgrammerError("Missing input parameter '%s' to"
14755                                      " IAllocator" % key)
14756     self._BuildInputData(compat.partial(fn, self), keydata)
14757
14758   def _ComputeClusterData(self):
14759     """Compute the generic allocator input data.
14760
14761     This is the data that is independent of the actual operation.
14762
14763     """
14764     cfg = self.cfg
14765     cluster_info = cfg.GetClusterInfo()
14766     # cluster data
14767     data = {
14768       "version": constants.IALLOCATOR_VERSION,
14769       "cluster_name": cfg.GetClusterName(),
14770       "cluster_tags": list(cluster_info.GetTags()),
14771       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14772       "ipolicy": cluster_info.ipolicy,
14773       }
14774     ninfo = cfg.GetAllNodesInfo()
14775     iinfo = cfg.GetAllInstancesInfo().values()
14776     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14777
14778     # node data
14779     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14780
14781     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14782       hypervisor_name = self.hypervisor
14783     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14784       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14785     else:
14786       hypervisor_name = cluster_info.primary_hypervisor
14787
14788     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14789                                         [hypervisor_name])
14790     node_iinfo = \
14791       self.rpc.call_all_instances_info(node_list,
14792                                        cluster_info.enabled_hypervisors)
14793
14794     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14795
14796     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14797     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14798                                                  i_list, config_ndata)
14799     assert len(data["nodes"]) == len(ninfo), \
14800         "Incomplete node data computed"
14801
14802     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14803
14804     self.in_data = data
14805
14806   @staticmethod
14807   def _ComputeNodeGroupData(cfg):
14808     """Compute node groups data.
14809
14810     """
14811     cluster = cfg.GetClusterInfo()
14812     ng = dict((guuid, {
14813       "name": gdata.name,
14814       "alloc_policy": gdata.alloc_policy,
14815       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14816       })
14817       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14818
14819     return ng
14820
14821   @staticmethod
14822   def _ComputeBasicNodeData(cfg, node_cfg):
14823     """Compute global node data.
14824
14825     @rtype: dict
14826     @returns: a dict of name: (node dict, node config)
14827
14828     """
14829     # fill in static (config-based) values
14830     node_results = dict((ninfo.name, {
14831       "tags": list(ninfo.GetTags()),
14832       "primary_ip": ninfo.primary_ip,
14833       "secondary_ip": ninfo.secondary_ip,
14834       "offline": ninfo.offline,
14835       "drained": ninfo.drained,
14836       "master_candidate": ninfo.master_candidate,
14837       "group": ninfo.group,
14838       "master_capable": ninfo.master_capable,
14839       "vm_capable": ninfo.vm_capable,
14840       "ndparams": cfg.GetNdParams(ninfo),
14841       })
14842       for ninfo in node_cfg.values())
14843
14844     return node_results
14845
14846   @staticmethod
14847   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14848                               node_results):
14849     """Compute global node data.
14850
14851     @param node_results: the basic node structures as filled from the config
14852
14853     """
14854     #TODO(dynmem): compute the right data on MAX and MIN memory
14855     # make a copy of the current dict
14856     node_results = dict(node_results)
14857     for nname, nresult in node_data.items():
14858       assert nname in node_results, "Missing basic data for node %s" % nname
14859       ninfo = node_cfg[nname]
14860
14861       if not (ninfo.offline or ninfo.drained):
14862         nresult.Raise("Can't get data for node %s" % nname)
14863         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14864                                 nname)
14865         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14866
14867         for attr in ["memory_total", "memory_free", "memory_dom0",
14868                      "vg_size", "vg_free", "cpu_total"]:
14869           if attr not in remote_info:
14870             raise errors.OpExecError("Node '%s' didn't return attribute"
14871                                      " '%s'" % (nname, attr))
14872           if not isinstance(remote_info[attr], int):
14873             raise errors.OpExecError("Node '%s' returned invalid value"
14874                                      " for '%s': %s" %
14875                                      (nname, attr, remote_info[attr]))
14876         # compute memory used by primary instances
14877         i_p_mem = i_p_up_mem = 0
14878         for iinfo, beinfo in i_list:
14879           if iinfo.primary_node == nname:
14880             i_p_mem += beinfo[constants.BE_MAXMEM]
14881             if iinfo.name not in node_iinfo[nname].payload:
14882               i_used_mem = 0
14883             else:
14884               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14885             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14886             remote_info["memory_free"] -= max(0, i_mem_diff)
14887
14888             if iinfo.admin_state == constants.ADMINST_UP:
14889               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14890
14891         # compute memory used by instances
14892         pnr_dyn = {
14893           "total_memory": remote_info["memory_total"],
14894           "reserved_memory": remote_info["memory_dom0"],
14895           "free_memory": remote_info["memory_free"],
14896           "total_disk": remote_info["vg_size"],
14897           "free_disk": remote_info["vg_free"],
14898           "total_cpus": remote_info["cpu_total"],
14899           "i_pri_memory": i_p_mem,
14900           "i_pri_up_memory": i_p_up_mem,
14901           }
14902         pnr_dyn.update(node_results[nname])
14903         node_results[nname] = pnr_dyn
14904
14905     return node_results
14906
14907   @staticmethod
14908   def _ComputeInstanceData(cluster_info, i_list):
14909     """Compute global instance data.
14910
14911     """
14912     instance_data = {}
14913     for iinfo, beinfo in i_list:
14914       nic_data = []
14915       for nic in iinfo.nics:
14916         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14917         nic_dict = {
14918           "mac": nic.mac,
14919           "ip": nic.ip,
14920           "mode": filled_params[constants.NIC_MODE],
14921           "link": filled_params[constants.NIC_LINK],
14922           }
14923         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14924           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14925         nic_data.append(nic_dict)
14926       pir = {
14927         "tags": list(iinfo.GetTags()),
14928         "admin_state": iinfo.admin_state,
14929         "vcpus": beinfo[constants.BE_VCPUS],
14930         "memory": beinfo[constants.BE_MAXMEM],
14931         "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14932         "os": iinfo.os,
14933         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14934         "nics": nic_data,
14935         "disks": [{constants.IDISK_SIZE: dsk.size,
14936                    constants.IDISK_MODE: dsk.mode}
14937                   for dsk in iinfo.disks],
14938         "disk_template": iinfo.disk_template,
14939         "hypervisor": iinfo.hypervisor,
14940         }
14941       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14942                                                  pir["disks"])
14943       instance_data[iinfo.name] = pir
14944
14945     return instance_data
14946
14947   def _AddNewInstance(self):
14948     """Add new instance data to allocator structure.
14949
14950     This in combination with _AllocatorGetClusterData will create the
14951     correct structure needed as input for the allocator.
14952
14953     The checks for the completeness of the opcode must have already been
14954     done.
14955
14956     """
14957     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14958
14959     if self.disk_template in constants.DTS_INT_MIRROR:
14960       self.required_nodes = 2
14961     else:
14962       self.required_nodes = 1
14963
14964     request = {
14965       "name": self.name,
14966       "disk_template": self.disk_template,
14967       "tags": self.tags,
14968       "os": self.os,
14969       "vcpus": self.vcpus,
14970       "memory": self.memory,
14971       "spindle_use": self.spindle_use,
14972       "disks": self.disks,
14973       "disk_space_total": disk_space,
14974       "nics": self.nics,
14975       "required_nodes": self.required_nodes,
14976       "hypervisor": self.hypervisor,
14977       }
14978
14979     return request
14980
14981   def _AddRelocateInstance(self):
14982     """Add relocate instance data to allocator structure.
14983
14984     This in combination with _IAllocatorGetClusterData will create the
14985     correct structure needed as input for the allocator.
14986
14987     The checks for the completeness of the opcode must have already been
14988     done.
14989
14990     """
14991     instance = self.cfg.GetInstanceInfo(self.name)
14992     if instance is None:
14993       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14994                                    " IAllocator" % self.name)
14995
14996     if instance.disk_template not in constants.DTS_MIRRORED:
14997       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14998                                  errors.ECODE_INVAL)
14999
15000     if instance.disk_template in constants.DTS_INT_MIRROR and \
15001         len(instance.secondary_nodes) != 1:
15002       raise errors.OpPrereqError("Instance has not exactly one secondary node",
15003                                  errors.ECODE_STATE)
15004
15005     self.required_nodes = 1
15006     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15007     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15008
15009     request = {
15010       "name": self.name,
15011       "disk_space_total": disk_space,
15012       "required_nodes": self.required_nodes,
15013       "relocate_from": self.relocate_from,
15014       }
15015     return request
15016
15017   def _AddNodeEvacuate(self):
15018     """Get data for node-evacuate requests.
15019
15020     """
15021     return {
15022       "instances": self.instances,
15023       "evac_mode": self.evac_mode,
15024       }
15025
15026   def _AddChangeGroup(self):
15027     """Get data for node-evacuate requests.
15028
15029     """
15030     return {
15031       "instances": self.instances,
15032       "target_groups": self.target_groups,
15033       }
15034
15035   def _BuildInputData(self, fn, keydata):
15036     """Build input data structures.
15037
15038     """
15039     self._ComputeClusterData()
15040
15041     request = fn()
15042     request["type"] = self.mode
15043     for keyname, keytype in keydata:
15044       if keyname not in request:
15045         raise errors.ProgrammerError("Request parameter %s is missing" %
15046                                      keyname)
15047       val = request[keyname]
15048       if not keytype(val):
15049         raise errors.ProgrammerError("Request parameter %s doesn't pass"
15050                                      " validation, value %s, expected"
15051                                      " type %s" % (keyname, val, keytype))
15052     self.in_data["request"] = request
15053
15054     self.in_text = serializer.Dump(self.in_data)
15055
15056   _STRING_LIST = ht.TListOf(ht.TString)
15057   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15058      # pylint: disable=E1101
15059      # Class '...' has no 'OP_ID' member
15060      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15061                           opcodes.OpInstanceMigrate.OP_ID,
15062                           opcodes.OpInstanceReplaceDisks.OP_ID])
15063      })))
15064
15065   _NEVAC_MOVED = \
15066     ht.TListOf(ht.TAnd(ht.TIsLength(3),
15067                        ht.TItems([ht.TNonEmptyString,
15068                                   ht.TNonEmptyString,
15069                                   ht.TListOf(ht.TNonEmptyString),
15070                                  ])))
15071   _NEVAC_FAILED = \
15072     ht.TListOf(ht.TAnd(ht.TIsLength(2),
15073                        ht.TItems([ht.TNonEmptyString,
15074                                   ht.TMaybeString,
15075                                  ])))
15076   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15077                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15078
15079   _MODE_DATA = {
15080     constants.IALLOCATOR_MODE_ALLOC:
15081       (_AddNewInstance,
15082        [
15083         ("name", ht.TString),
15084         ("memory", ht.TInt),
15085         ("spindle_use", ht.TInt),
15086         ("disks", ht.TListOf(ht.TDict)),
15087         ("disk_template", ht.TString),
15088         ("os", ht.TString),
15089         ("tags", _STRING_LIST),
15090         ("nics", ht.TListOf(ht.TDict)),
15091         ("vcpus", ht.TInt),
15092         ("hypervisor", ht.TString),
15093         ], ht.TList),
15094     constants.IALLOCATOR_MODE_RELOC:
15095       (_AddRelocateInstance,
15096        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15097        ht.TList),
15098      constants.IALLOCATOR_MODE_NODE_EVAC:
15099       (_AddNodeEvacuate, [
15100         ("instances", _STRING_LIST),
15101         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15102         ], _NEVAC_RESULT),
15103      constants.IALLOCATOR_MODE_CHG_GROUP:
15104       (_AddChangeGroup, [
15105         ("instances", _STRING_LIST),
15106         ("target_groups", _STRING_LIST),
15107         ], _NEVAC_RESULT),
15108     }
15109
15110   def Run(self, name, validate=True, call_fn=None):
15111     """Run an instance allocator and return the results.
15112
15113     """
15114     if call_fn is None:
15115       call_fn = self.rpc.call_iallocator_runner
15116
15117     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15118     result.Raise("Failure while running the iallocator script")
15119
15120     self.out_text = result.payload
15121     if validate:
15122       self._ValidateResult()
15123
15124   def _ValidateResult(self):
15125     """Process the allocator results.
15126
15127     This will process and if successful save the result in
15128     self.out_data and the other parameters.
15129
15130     """
15131     try:
15132       rdict = serializer.Load(self.out_text)
15133     except Exception, err:
15134       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15135
15136     if not isinstance(rdict, dict):
15137       raise errors.OpExecError("Can't parse iallocator results: not a dict")
15138
15139     # TODO: remove backwards compatiblity in later versions
15140     if "nodes" in rdict and "result" not in rdict:
15141       rdict["result"] = rdict["nodes"]
15142       del rdict["nodes"]
15143
15144     for key in "success", "info", "result":
15145       if key not in rdict:
15146         raise errors.OpExecError("Can't parse iallocator results:"
15147                                  " missing key '%s'" % key)
15148       setattr(self, key, rdict[key])
15149
15150     if not self._result_check(self.result):
15151       raise errors.OpExecError("Iallocator returned invalid result,"
15152                                " expected %s, got %s" %
15153                                (self._result_check, self.result),
15154                                errors.ECODE_INVAL)
15155
15156     if self.mode == constants.IALLOCATOR_MODE_RELOC:
15157       assert self.relocate_from is not None
15158       assert self.required_nodes == 1
15159
15160       node2group = dict((name, ndata["group"])
15161                         for (name, ndata) in self.in_data["nodes"].items())
15162
15163       fn = compat.partial(self._NodesToGroups, node2group,
15164                           self.in_data["nodegroups"])
15165
15166       instance = self.cfg.GetInstanceInfo(self.name)
15167       request_groups = fn(self.relocate_from + [instance.primary_node])
15168       result_groups = fn(rdict["result"] + [instance.primary_node])
15169
15170       if self.success and not set(result_groups).issubset(request_groups):
15171         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15172                                  " differ from original groups (%s)" %
15173                                  (utils.CommaJoin(result_groups),
15174                                   utils.CommaJoin(request_groups)))
15175
15176     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15177       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15178
15179     self.out_data = rdict
15180
15181   @staticmethod
15182   def _NodesToGroups(node2group, groups, nodes):
15183     """Returns a list of unique group names for a list of nodes.
15184
15185     @type node2group: dict
15186     @param node2group: Map from node name to group UUID
15187     @type groups: dict
15188     @param groups: Group information
15189     @type nodes: list
15190     @param nodes: Node names
15191
15192     """
15193     result = set()
15194
15195     for node in nodes:
15196       try:
15197         group_uuid = node2group[node]
15198       except KeyError:
15199         # Ignore unknown node
15200         pass
15201       else:
15202         try:
15203           group = groups[group_uuid]
15204         except KeyError:
15205           # Can't find group, let's use UUID
15206           group_name = group_uuid
15207         else:
15208           group_name = group["name"]
15209
15210         result.add(group_name)
15211
15212     return sorted(result)
15213
15214
15215 class LUTestAllocator(NoHooksLU):
15216   """Run allocator tests.
15217
15218   This LU runs the allocator tests
15219
15220   """
15221   def CheckPrereq(self):
15222     """Check prerequisites.
15223
15224     This checks the opcode parameters depending on the director and mode test.
15225
15226     """
15227     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15228       for attr in ["memory", "disks", "disk_template",
15229                    "os", "tags", "nics", "vcpus"]:
15230         if not hasattr(self.op, attr):
15231           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15232                                      attr, errors.ECODE_INVAL)
15233       iname = self.cfg.ExpandInstanceName(self.op.name)
15234       if iname is not None:
15235         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15236                                    iname, errors.ECODE_EXISTS)
15237       if not isinstance(self.op.nics, list):
15238         raise errors.OpPrereqError("Invalid parameter 'nics'",
15239                                    errors.ECODE_INVAL)
15240       if not isinstance(self.op.disks, list):
15241         raise errors.OpPrereqError("Invalid parameter 'disks'",
15242                                    errors.ECODE_INVAL)
15243       for row in self.op.disks:
15244         if (not isinstance(row, dict) or
15245             constants.IDISK_SIZE not in row or
15246             not isinstance(row[constants.IDISK_SIZE], int) or
15247             constants.IDISK_MODE not in row or
15248             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15249           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15250                                      " parameter", errors.ECODE_INVAL)
15251       if self.op.hypervisor is None:
15252         self.op.hypervisor = self.cfg.GetHypervisorType()
15253     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15254       fname = _ExpandInstanceName(self.cfg, self.op.name)
15255       self.op.name = fname
15256       self.relocate_from = \
15257           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15258     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15259                           constants.IALLOCATOR_MODE_NODE_EVAC):
15260       if not self.op.instances:
15261         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15262       self.op.instances = _GetWantedInstances(self, self.op.instances)
15263     else:
15264       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15265                                  self.op.mode, errors.ECODE_INVAL)
15266
15267     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15268       if self.op.allocator is None:
15269         raise errors.OpPrereqError("Missing allocator name",
15270                                    errors.ECODE_INVAL)
15271     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15272       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15273                                  self.op.direction, errors.ECODE_INVAL)
15274
15275   def Exec(self, feedback_fn):
15276     """Run the allocator test.
15277
15278     """
15279     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15280       ial = IAllocator(self.cfg, self.rpc,
15281                        mode=self.op.mode,
15282                        name=self.op.name,
15283                        memory=self.op.memory,
15284                        disks=self.op.disks,
15285                        disk_template=self.op.disk_template,
15286                        os=self.op.os,
15287                        tags=self.op.tags,
15288                        nics=self.op.nics,
15289                        vcpus=self.op.vcpus,
15290                        hypervisor=self.op.hypervisor,
15291                        )
15292     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15293       ial = IAllocator(self.cfg, self.rpc,
15294                        mode=self.op.mode,
15295                        name=self.op.name,
15296                        relocate_from=list(self.relocate_from),
15297                        )
15298     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15299       ial = IAllocator(self.cfg, self.rpc,
15300                        mode=self.op.mode,
15301                        instances=self.op.instances,
15302                        target_groups=self.op.target_groups)
15303     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15304       ial = IAllocator(self.cfg, self.rpc,
15305                        mode=self.op.mode,
15306                        instances=self.op.instances,
15307                        evac_mode=self.op.evac_mode)
15308     else:
15309       raise errors.ProgrammerError("Uncatched mode %s in"
15310                                    " LUTestAllocator.Exec", self.op.mode)
15311
15312     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15313       result = ial.in_text
15314     else:
15315       ial.Run(self.op.allocator, validate=False)
15316       result = ial.out_text
15317     return result
15318
15319
15320 #: Query type implementations
15321 _QUERY_IMPL = {
15322   constants.QR_CLUSTER: _ClusterQuery,
15323   constants.QR_INSTANCE: _InstanceQuery,
15324   constants.QR_NODE: _NodeQuery,
15325   constants.QR_GROUP: _GroupQuery,
15326   constants.QR_OS: _OsQuery,
15327   constants.QR_EXPORT: _ExportQuery,
15328   }
15329
15330 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15331
15332
15333 def _GetQueryImplementation(name):
15334   """Returns the implemtnation for a query type.
15335
15336   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15337
15338   """
15339   try:
15340     return _QUERY_IMPL[name]
15341   except KeyError:
15342     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15343                                errors.ECODE_INVAL)