code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60 from ganeti import ht
  61 from ganeti import rpc
  62 from ganeti import runtime
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   #: Field to sort by
 497   SORT_FIELD = "name"
 498
 499   def __init__(self, qfilter, fields, use_locking):
 500     """Initializes this class.
 501
 502     """
 503     self.use_locking = use_locking
 504
 505     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 506                              namefield=self.SORT_FIELD)
 507     self.requested_data = self.query.RequestedData()
 508     self.names = self.query.RequestedNames()
 509
 510     # Sort only if no names were requested
 511     self.sort_by_name = not self.names
 512
 513     self.do_locking = None
 514     self.wanted = None
 515
 516   def _GetNames(self, lu, all_names, lock_level):
 517     """Helper function to determine names asked for in the query.
 518
 519     """
 520     if self.do_locking:
 521       names = lu.owned_locks(lock_level)
 522     else:
 523       names = all_names
 524
 525     if self.wanted == locking.ALL_SET:
 526       assert not self.names
 527       # caller didn't specify names, so ordering is not important
 528       return utils.NiceSort(names)
 529
 530     # caller specified names and we must keep the same order
 531     assert self.names
 532     assert not self.do_locking or lu.glm.is_owned(lock_level)
 533
 534     missing = set(self.wanted).difference(names)
 535     if missing:
 536       raise errors.OpExecError("Some items were removed before retrieving"
 537                                " their data: %s" % missing)
 538
 539     # Return expanded names
 540     return self.wanted
 541
 542   def ExpandNames(self, lu):
 543     """Expand names for this query.
 544
 545     See L{LogicalUnit.ExpandNames}.
 546
 547     """
 548     raise NotImplementedError()
 549
 550   def DeclareLocks(self, lu, level):
 551     """Declare locks for this query.
 552
 553     See L{LogicalUnit.DeclareLocks}.
 554
 555     """
 556     raise NotImplementedError()
 557
 558   def _GetQueryData(self, lu):
 559     """Collects all data for this query.
 560
 561     @return: Query data object
 562
 563     """
 564     raise NotImplementedError()
 565
 566   def NewStyleQuery(self, lu):
 567     """Collect data and execute query.
 568
 569     """
 570     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 571                                   sort_by_name=self.sort_by_name)
 572
 573   def OldStyleQuery(self, lu):
 574     """Collect data and execute query.
 575
 576     """
 577     return self.query.OldStyleQuery(self._GetQueryData(lu),
 578                                     sort_by_name=self.sort_by_name)
 579
 580
 581 def _ShareAll():
 582   """Returns a dict declaring all lock levels shared.
 583
 584   """
 585   return dict.fromkeys(locking.LEVELS, 1)
 586
 587
 588 def _MakeLegacyNodeInfo(data):
 589   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 590
 591   Converts the data into a single dictionary. This is fine for most use cases,
 592   but some require information from more than one volume group or hypervisor.
 593
 594   """
 595   (bootid, (vg_info, ), (hv_info, )) = data
 596
 597   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 598     "bootid": bootid,
 599     })
 600
 601
 602 def _AnnotateDiskParams(instance, devs, cfg):
 603   """Little helper wrapper to the rpc annotation method.
 604
 605   @param instance: The instance object
 606   @type devs: List of L{objects.Disk}
 607   @param devs: The root devices (not any of its children!)
 608   @param cfg: The config object
 609   @returns The annotated disk copies
 610   @see L{rpc.AnnotateDiskParams}
 611
 612   """
 613   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 614                                 cfg.GetInstanceDiskParams(instance))
 615
 616
 617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 618                               cur_group_uuid):
 619   """Checks if node groups for locked instances are still correct.
 620
 621   @type cfg: L{config.ConfigWriter}
 622   @param cfg: Cluster configuration
 623   @type instances: dict; string as key, L{objects.Instance} as value
 624   @param instances: Dictionary, instance name as key, instance object as value
 625   @type owned_groups: iterable of string
 626   @param owned_groups: List of owned groups
 627   @type owned_nodes: iterable of string
 628   @param owned_nodes: List of owned nodes
 629   @type cur_group_uuid: string or None
 630   @param cur_group_uuid: Optional group UUID to check against instance's groups
 631
 632   """
 633   for (name, inst) in instances.items():
 634     assert owned_nodes.issuperset(inst.all_nodes), \
 635       "Instance %s's nodes changed while we kept the lock" % name
 636
 637     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 638
 639     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 640       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 641
 642
 643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 644   """Checks if the owned node groups are still correct for an instance.
 645
 646   @type cfg: L{config.ConfigWriter}
 647   @param cfg: The cluster configuration
 648   @type instance_name: string
 649   @param instance_name: Instance name
 650   @type owned_groups: set or frozenset
 651   @param owned_groups: List of currently owned node groups
 652
 653   """
 654   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 655
 656   if not owned_groups.issuperset(inst_groups):
 657     raise errors.OpPrereqError("Instance %s's node groups changed since"
 658                                " locks were acquired, current groups are"
 659                                " are '%s', owning groups '%s'; retry the"
 660                                " operation" %
 661                                (instance_name,
 662                                 utils.CommaJoin(inst_groups),
 663                                 utils.CommaJoin(owned_groups)),
 664                                errors.ECODE_STATE)
 665
 666   return inst_groups
 667
 668
 669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 670   """Checks if the instances in a node group are still correct.
 671
 672   @type cfg: L{config.ConfigWriter}
 673   @param cfg: The cluster configuration
 674   @type group_uuid: string
 675   @param group_uuid: Node group UUID
 676   @type owned_instances: set or frozenset
 677   @param owned_instances: List of currently owned instances
 678
 679   """
 680   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 681   if owned_instances != wanted_instances:
 682     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 683                                " locks were acquired, wanted '%s', have '%s';"
 684                                " retry the operation" %
 685                                (group_uuid,
 686                                 utils.CommaJoin(wanted_instances),
 687                                 utils.CommaJoin(owned_instances)),
 688                                errors.ECODE_STATE)
 689
 690   return wanted_instances
 691
 692
 693 def _SupportsOob(cfg, node):
 694   """Tells if node supports OOB.
 695
 696   @type cfg: L{config.ConfigWriter}
 697   @param cfg: The cluster configuration
 698   @type node: L{objects.Node}
 699   @param node: The node
 700   @return: The OOB script if supported or an empty string otherwise
 701
 702   """
 703   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 704
 705
 706 def _GetWantedNodes(lu, nodes):
 707   """Returns list of checked and expanded node names.
 708
 709   @type lu: L{LogicalUnit}
 710   @param lu: the logical unit on whose behalf we execute
 711   @type nodes: list
 712   @param nodes: list of node names or None for all nodes
 713   @rtype: list
 714   @return: the list of nodes, sorted
 715   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 716
 717   """
 718   if nodes:
 719     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 720
 721   return utils.NiceSort(lu.cfg.GetNodeList())
 722
 723
 724 def _GetWantedInstances(lu, instances):
 725   """Returns list of checked and expanded instance names.
 726
 727   @type lu: L{LogicalUnit}
 728   @param lu: the logical unit on whose behalf we execute
 729   @type instances: list
 730   @param instances: list of instance names or None for all instances
 731   @rtype: list
 732   @return: the list of instances, sorted
 733   @raise errors.OpPrereqError: if the instances parameter is wrong type
 734   @raise errors.OpPrereqError: if any of the passed instances is not found
 735
 736   """
 737   if instances:
 738     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 739   else:
 740     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 741   return wanted
 742
 743
 744 def _GetUpdatedParams(old_params, update_dict,
 745                       use_default=True, use_none=False):
 746   """Return the new version of a parameter dictionary.
 747
 748   @type old_params: dict
 749   @param old_params: old parameters
 750   @type update_dict: dict
 751   @param update_dict: dict containing new parameter values, or
 752       constants.VALUE_DEFAULT to reset the parameter to its default
 753       value
 754   @param use_default: boolean
 755   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 756       values as 'to be deleted' values
 757   @param use_none: boolean
 758   @type use_none: whether to recognise C{None} values as 'to be
 759       deleted' values
 760   @rtype: dict
 761   @return: the new parameter dictionary
 762
 763   """
 764   params_copy = copy.deepcopy(old_params)
 765   for key, val in update_dict.iteritems():
 766     if ((use_default and val == constants.VALUE_DEFAULT) or
 767         (use_none and val is None)):
 768       try:
 769         del params_copy[key]
 770       except KeyError:
 771         pass
 772     else:
 773       params_copy[key] = val
 774   return params_copy
 775
 776
 777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 778   """Return the new version of a instance policy.
 779
 780   @param group_policy: whether this policy applies to a group and thus
 781     we should support removal of policy entries
 782
 783   """
 784   use_none = use_default = group_policy
 785   ipolicy = copy.deepcopy(old_ipolicy)
 786   for key, value in new_ipolicy.items():
 787     if key not in constants.IPOLICY_ALL_KEYS:
 788       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 789                                  errors.ECODE_INVAL)
 790     if key in constants.IPOLICY_ISPECS:
 791       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 792       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 793                                        use_none=use_none,
 794                                        use_default=use_default)
 795     else:
 796       if not value or value == [constants.VALUE_DEFAULT]:
 797         if group_policy:
 798           del ipolicy[key]
 799         else:
 800           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 801                                      " on the cluster'" % key,
 802                                      errors.ECODE_INVAL)
 803       else:
 804         if key in constants.IPOLICY_PARAMETERS:
 805           # FIXME: we assume all such values are float
 806           try:
 807             ipolicy[key] = float(value)
 808           except (TypeError, ValueError), err:
 809             raise errors.OpPrereqError("Invalid value for attribute"
 810                                        " '%s': '%s', error: %s" %
 811                                        (key, value, err), errors.ECODE_INVAL)
 812         else:
 813           # FIXME: we assume all others are lists; this should be redone
 814           # in a nicer way
 815           ipolicy[key] = list(value)
 816   try:
 817     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 818   except errors.ConfigurationError, err:
 819     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 820                                errors.ECODE_INVAL)
 821   return ipolicy
 822
 823
 824 def _UpdateAndVerifySubDict(base, updates, type_check):
 825   """Updates and verifies a dict with sub dicts of the same type.
 826
 827   @param base: The dict with the old data
 828   @param updates: The dict with the new data
 829   @param type_check: Dict suitable to ForceDictType to verify correct types
 830   @returns: A new dict with updated and verified values
 831
 832   """
 833   def fn(old, value):
 834     new = _GetUpdatedParams(old, value)
 835     utils.ForceDictType(new, type_check)
 836     return new
 837
 838   ret = copy.deepcopy(base)
 839   ret.update(dict((key, fn(base.get(key, {}), value))
 840                   for key, value in updates.items()))
 841   return ret
 842
 843
 844 def _MergeAndVerifyHvState(op_input, obj_input):
 845   """Combines the hv state from an opcode with the one of the object
 846
 847   @param op_input: The input dict from the opcode
 848   @param obj_input: The input dict from the objects
 849   @return: The verified and updated dict
 850
 851   """
 852   if op_input:
 853     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 854     if invalid_hvs:
 855       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 856                                  " %s" % utils.CommaJoin(invalid_hvs),
 857                                  errors.ECODE_INVAL)
 858     if obj_input is None:
 859       obj_input = {}
 860     type_check = constants.HVSTS_PARAMETER_TYPES
 861     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 862
 863   return None
 864
 865
 866 def _MergeAndVerifyDiskState(op_input, obj_input):
 867   """Combines the disk state from an opcode with the one of the object
 868
 869   @param op_input: The input dict from the opcode
 870   @param obj_input: The input dict from the objects
 871   @return: The verified and updated dict
 872   """
 873   if op_input:
 874     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 875     if invalid_dst:
 876       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 877                                  utils.CommaJoin(invalid_dst),
 878                                  errors.ECODE_INVAL)
 879     type_check = constants.DSS_PARAMETER_TYPES
 880     if obj_input is None:
 881       obj_input = {}
 882     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 883                                               type_check))
 884                 for key, value in op_input.items())
 885
 886   return None
 887
 888
 889 def _ReleaseLocks(lu, level, names=None, keep=None):
 890   """Releases locks owned by an LU.
 891
 892   @type lu: L{LogicalUnit}
 893   @param level: Lock level
 894   @type names: list or None
 895   @param names: Names of locks to release
 896   @type keep: list or None
 897   @param keep: Names of locks to retain
 898
 899   """
 900   assert not (keep is not None and names is not None), \
 901          "Only one of the 'names' and the 'keep' parameters can be given"
 902
 903   if names is not None:
 904     should_release = names.__contains__
 905   elif keep:
 906     should_release = lambda name: name not in keep
 907   else:
 908     should_release = None
 909
 910   owned = lu.owned_locks(level)
 911   if not owned:
 912     # Not owning any lock at this level, do nothing
 913     pass
 914
 915   elif should_release:
 916     retain = []
 917     release = []
 918
 919     # Determine which locks to release
 920     for name in owned:
 921       if should_release(name):
 922         release.append(name)
 923       else:
 924         retain.append(name)
 925
 926     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 927
 928     # Release just some locks
 929     lu.glm.release(level, names=release)
 930
 931     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 932   else:
 933     # Release everything
 934     lu.glm.release(level)
 935
 936     assert not lu.glm.is_owned(level), "No locks should be owned"
 937
 938
 939 def _MapInstanceDisksToNodes(instances):
 940   """Creates a map from (node, volume) to instance name.
 941
 942   @type instances: list of L{objects.Instance}
 943   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 944
 945   """
 946   return dict(((node, vol), inst.name)
 947               for inst in instances
 948               for (node, vols) in inst.MapLVsByNode().items()
 949               for vol in vols)
 950
 951
 952 def _RunPostHook(lu, node_name):
 953   """Runs the post-hook for an opcode on a single node.
 954
 955   """
 956   hm = lu.proc.BuildHooksManager(lu)
 957   try:
 958     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 959   except:
 960     # pylint: disable=W0702
 961     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 962
 963
 964 def _CheckOutputFields(static, dynamic, selected):
 965   """Checks whether all selected fields are valid.
 966
 967   @type static: L{utils.FieldSet}
 968   @param static: static fields set
 969   @type dynamic: L{utils.FieldSet}
 970   @param dynamic: dynamic fields set
 971
 972   """
 973   f = utils.FieldSet()
 974   f.Extend(static)
 975   f.Extend(dynamic)
 976
 977   delta = f.NonMatching(selected)
 978   if delta:
 979     raise errors.OpPrereqError("Unknown output fields selected: %s"
 980                                % ",".join(delta), errors.ECODE_INVAL)
 981
 982
 983 def _CheckGlobalHvParams(params):
 984   """Validates that given hypervisor params are not global ones.
 985
 986   This will ensure that instances don't get customised versions of
 987   global params.
 988
 989   """
 990   used_globals = constants.HVC_GLOBALS.intersection(params)
 991   if used_globals:
 992     msg = ("The following hypervisor parameters are global and cannot"
 993            " be customized at instance level, please modify them at"
 994            " cluster level: %s" % utils.CommaJoin(used_globals))
 995     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 996
 997
 998 def _CheckNodeOnline(lu, node, msg=None):
 999   """Ensure that a given node is online.
1000
1001   @param lu: the LU on behalf of which we make the check
1002   @param node: the node to check
1003   @param msg: if passed, should be a message to replace the default one
1004   @raise errors.OpPrereqError: if the node is offline
1005
1006   """
1007   if msg is None:
1008     msg = "Can't use offline node"
1009   if lu.cfg.GetNodeInfo(node).offline:
1010     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1011
1012
1013 def _CheckNodeNotDrained(lu, node):
1014   """Ensure that a given node is not drained.
1015
1016   @param lu: the LU on behalf of which we make the check
1017   @param node: the node to check
1018   @raise errors.OpPrereqError: if the node is drained
1019
1020   """
1021   if lu.cfg.GetNodeInfo(node).drained:
1022     raise errors.OpPrereqError("Can't use drained node %s" % node,
1023                                errors.ECODE_STATE)
1024
1025
1026 def _CheckNodeVmCapable(lu, node):
1027   """Ensure that a given node is vm capable.
1028
1029   @param lu: the LU on behalf of which we make the check
1030   @param node: the node to check
1031   @raise errors.OpPrereqError: if the node is not vm capable
1032
1033   """
1034   if not lu.cfg.GetNodeInfo(node).vm_capable:
1035     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1036                                errors.ECODE_STATE)
1037
1038
1039 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1040   """Ensure that a node supports a given OS.
1041
1042   @param lu: the LU on behalf of which we make the check
1043   @param node: the node to check
1044   @param os_name: the OS to query about
1045   @param force_variant: whether to ignore variant errors
1046   @raise errors.OpPrereqError: if the node is not supporting the OS
1047
1048   """
1049   result = lu.rpc.call_os_get(node, os_name)
1050   result.Raise("OS '%s' not in supported OS list for node %s" %
1051                (os_name, node),
1052                prereq=True, ecode=errors.ECODE_INVAL)
1053   if not force_variant:
1054     _CheckOSVariant(result.payload, os_name)
1055
1056
1057 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1058   """Ensure that a node has the given secondary ip.
1059
1060   @type lu: L{LogicalUnit}
1061   @param lu: the LU on behalf of which we make the check
1062   @type node: string
1063   @param node: the node to check
1064   @type secondary_ip: string
1065   @param secondary_ip: the ip to check
1066   @type prereq: boolean
1067   @param prereq: whether to throw a prerequisite or an execute error
1068   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1069   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1070
1071   """
1072   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1073   result.Raise("Failure checking secondary ip on node %s" % node,
1074                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1075   if not result.payload:
1076     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1077            " please fix and re-run this command" % secondary_ip)
1078     if prereq:
1079       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1080     else:
1081       raise errors.OpExecError(msg)
1082
1083
1084 def _GetClusterDomainSecret():
1085   """Reads the cluster domain secret.
1086
1087   """
1088   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1089                                strict=True)
1090
1091
1092 def _CheckInstanceState(lu, instance, req_states, msg=None):
1093   """Ensure that an instance is in one of the required states.
1094
1095   @param lu: the LU on behalf of which we make the check
1096   @param instance: the instance to check
1097   @param msg: if passed, should be a message to replace the default one
1098   @raise errors.OpPrereqError: if the instance is not in the required state
1099
1100   """
1101   if msg is None:
1102     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1103   if instance.admin_state not in req_states:
1104     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1105                                (instance.name, instance.admin_state, msg),
1106                                errors.ECODE_STATE)
1107
1108   if constants.ADMINST_UP not in req_states:
1109     pnode = instance.primary_node
1110     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1111     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1112                 prereq=True, ecode=errors.ECODE_ENVIRON)
1113
1114     if instance.name in ins_l.payload:
1115       raise errors.OpPrereqError("Instance %s is running, %s" %
1116                                  (instance.name, msg), errors.ECODE_STATE)
1117
1118
1119 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1120   """Computes if value is in the desired range.
1121
1122   @param name: name of the parameter for which we perform the check
1123   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1124       not just 'disk')
1125   @param ipolicy: dictionary containing min, max and std values
1126   @param value: actual value that we want to use
1127   @return: None or element not meeting the criteria
1128
1129
1130   """
1131   if value in [None, constants.VALUE_AUTO]:
1132     return None
1133   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1134   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1135   if value > max_v or min_v > value:
1136     if qualifier:
1137       fqn = "%s/%s" % (name, qualifier)
1138     else:
1139       fqn = name
1140     return ("%s value %s is not in range [%s, %s]" %
1141             (fqn, value, min_v, max_v))
1142   return None
1143
1144
1145 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1146                                  nic_count, disk_sizes, spindle_use,
1147                                  _compute_fn=_ComputeMinMaxSpec):
1148   """Verifies ipolicy against provided specs.
1149
1150   @type ipolicy: dict
1151   @param ipolicy: The ipolicy
1152   @type mem_size: int
1153   @param mem_size: The memory size
1154   @type cpu_count: int
1155   @param cpu_count: Used cpu cores
1156   @type disk_count: int
1157   @param disk_count: Number of disks used
1158   @type nic_count: int
1159   @param nic_count: Number of nics used
1160   @type disk_sizes: list of ints
1161   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1162   @type spindle_use: int
1163   @param spindle_use: The number of spindles this instance uses
1164   @param _compute_fn: The compute function (unittest only)
1165   @return: A list of violations, or an empty list of no violations are found
1166
1167   """
1168   assert disk_count == len(disk_sizes)
1169
1170   test_settings = [
1171     (constants.ISPEC_MEM_SIZE, "", mem_size),
1172     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1173     (constants.ISPEC_DISK_COUNT, "", disk_count),
1174     (constants.ISPEC_NIC_COUNT, "", nic_count),
1175     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1176     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1177          for idx, d in enumerate(disk_sizes)]
1178
1179   return filter(None,
1180                 (_compute_fn(name, qualifier, ipolicy, value)
1181                  for (name, qualifier, value) in test_settings))
1182
1183
1184 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1185                                      _compute_fn=_ComputeIPolicySpecViolation):
1186   """Compute if instance meets the specs of ipolicy.
1187
1188   @type ipolicy: dict
1189   @param ipolicy: The ipolicy to verify against
1190   @type instance: L{objects.Instance}
1191   @param instance: The instance to verify
1192   @param _compute_fn: The function to verify ipolicy (unittest only)
1193   @see: L{_ComputeIPolicySpecViolation}
1194
1195   """
1196   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1197   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1198   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1199   disk_count = len(instance.disks)
1200   disk_sizes = [disk.size for disk in instance.disks]
1201   nic_count = len(instance.nics)
1202
1203   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1204                      disk_sizes, spindle_use)
1205
1206
1207 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1208     _compute_fn=_ComputeIPolicySpecViolation):
1209   """Compute if instance specs meets the specs of ipolicy.
1210
1211   @type ipolicy: dict
1212   @param ipolicy: The ipolicy to verify against
1213   @param instance_spec: dict
1214   @param instance_spec: The instance spec to verify
1215   @param _compute_fn: The function to verify ipolicy (unittest only)
1216   @see: L{_ComputeIPolicySpecViolation}
1217
1218   """
1219   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1220   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1221   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1222   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1223   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1224   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1225
1226   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1227                      disk_sizes, spindle_use)
1228
1229
1230 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1231                                  target_group,
1232                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1233   """Compute if instance meets the specs of the new target group.
1234
1235   @param ipolicy: The ipolicy to verify
1236   @param instance: The instance object to verify
1237   @param current_group: The current group of the instance
1238   @param target_group: The new group of the instance
1239   @param _compute_fn: The function to verify ipolicy (unittest only)
1240   @see: L{_ComputeIPolicySpecViolation}
1241
1242   """
1243   if current_group == target_group:
1244     return []
1245   else:
1246     return _compute_fn(ipolicy, instance)
1247
1248
1249 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1250                             _compute_fn=_ComputeIPolicyNodeViolation):
1251   """Checks that the target node is correct in terms of instance policy.
1252
1253   @param ipolicy: The ipolicy to verify
1254   @param instance: The instance object to verify
1255   @param node: The new node to relocate
1256   @param ignore: Ignore violations of the ipolicy
1257   @param _compute_fn: The function to verify ipolicy (unittest only)
1258   @see: L{_ComputeIPolicySpecViolation}
1259
1260   """
1261   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1262   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1263
1264   if res:
1265     msg = ("Instance does not meet target node group's (%s) instance"
1266            " policy: %s") % (node.group, utils.CommaJoin(res))
1267     if ignore:
1268       lu.LogWarning(msg)
1269     else:
1270       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1271
1272
1273 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1274   """Computes a set of any instances that would violate the new ipolicy.
1275
1276   @param old_ipolicy: The current (still in-place) ipolicy
1277   @param new_ipolicy: The new (to become) ipolicy
1278   @param instances: List of instances to verify
1279   @return: A list of instances which violates the new ipolicy but
1280       did not before
1281
1282   """
1283   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1284           _ComputeViolatingInstances(old_ipolicy, instances))
1285
1286
1287 def _ExpandItemName(fn, name, kind):
1288   """Expand an item name.
1289
1290   @param fn: the function to use for expansion
1291   @param name: requested item name
1292   @param kind: text description ('Node' or 'Instance')
1293   @return: the resolved (full) name
1294   @raise errors.OpPrereqError: if the item is not found
1295
1296   """
1297   full_name = fn(name)
1298   if full_name is None:
1299     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1300                                errors.ECODE_NOENT)
1301   return full_name
1302
1303
1304 def _ExpandNodeName(cfg, name):
1305   """Wrapper over L{_ExpandItemName} for nodes."""
1306   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1307
1308
1309 def _ExpandInstanceName(cfg, name):
1310   """Wrapper over L{_ExpandItemName} for instance."""
1311   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1312
1313
1314 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1315                           minmem, maxmem, vcpus, nics, disk_template, disks,
1316                           bep, hvp, hypervisor_name, tags):
1317   """Builds instance related env variables for hooks
1318
1319   This builds the hook environment from individual variables.
1320
1321   @type name: string
1322   @param name: the name of the instance
1323   @type primary_node: string
1324   @param primary_node: the name of the instance's primary node
1325   @type secondary_nodes: list
1326   @param secondary_nodes: list of secondary nodes as strings
1327   @type os_type: string
1328   @param os_type: the name of the instance's OS
1329   @type status: string
1330   @param status: the desired status of the instance
1331   @type minmem: string
1332   @param minmem: the minimum memory size of the instance
1333   @type maxmem: string
1334   @param maxmem: the maximum memory size of the instance
1335   @type vcpus: string
1336   @param vcpus: the count of VCPUs the instance has
1337   @type nics: list
1338   @param nics: list of tuples (ip, mac, mode, link) representing
1339       the NICs the instance has
1340   @type disk_template: string
1341   @param disk_template: the disk template of the instance
1342   @type disks: list
1343   @param disks: the list of (size, mode) pairs
1344   @type bep: dict
1345   @param bep: the backend parameters for the instance
1346   @type hvp: dict
1347   @param hvp: the hypervisor parameters for the instance
1348   @type hypervisor_name: string
1349   @param hypervisor_name: the hypervisor for the instance
1350   @type tags: list
1351   @param tags: list of instance tags as strings
1352   @rtype: dict
1353   @return: the hook environment for this instance
1354
1355   """
1356   env = {
1357     "OP_TARGET": name,
1358     "INSTANCE_NAME": name,
1359     "INSTANCE_PRIMARY": primary_node,
1360     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1361     "INSTANCE_OS_TYPE": os_type,
1362     "INSTANCE_STATUS": status,
1363     "INSTANCE_MINMEM": minmem,
1364     "INSTANCE_MAXMEM": maxmem,
1365     # TODO(2.7) remove deprecated "memory" value
1366     "INSTANCE_MEMORY": maxmem,
1367     "INSTANCE_VCPUS": vcpus,
1368     "INSTANCE_DISK_TEMPLATE": disk_template,
1369     "INSTANCE_HYPERVISOR": hypervisor_name,
1370   }
1371   if nics:
1372     nic_count = len(nics)
1373     for idx, (ip, mac, mode, link) in enumerate(nics):
1374       if ip is None:
1375         ip = ""
1376       env["INSTANCE_NIC%d_IP" % idx] = ip
1377       env["INSTANCE_NIC%d_MAC" % idx] = mac
1378       env["INSTANCE_NIC%d_MODE" % idx] = mode
1379       env["INSTANCE_NIC%d_LINK" % idx] = link
1380       if mode == constants.NIC_MODE_BRIDGED:
1381         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1382   else:
1383     nic_count = 0
1384
1385   env["INSTANCE_NIC_COUNT"] = nic_count
1386
1387   if disks:
1388     disk_count = len(disks)
1389     for idx, (size, mode) in enumerate(disks):
1390       env["INSTANCE_DISK%d_SIZE" % idx] = size
1391       env["INSTANCE_DISK%d_MODE" % idx] = mode
1392   else:
1393     disk_count = 0
1394
1395   env["INSTANCE_DISK_COUNT"] = disk_count
1396
1397   if not tags:
1398     tags = []
1399
1400   env["INSTANCE_TAGS"] = " ".join(tags)
1401
1402   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1403     for key, value in source.items():
1404       env["INSTANCE_%s_%s" % (kind, key)] = value
1405
1406   return env
1407
1408
1409 def _NICListToTuple(lu, nics):
1410   """Build a list of nic information tuples.
1411
1412   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1413   value in LUInstanceQueryData.
1414
1415   @type lu:  L{LogicalUnit}
1416   @param lu: the logical unit on whose behalf we execute
1417   @type nics: list of L{objects.NIC}
1418   @param nics: list of nics to convert to hooks tuples
1419
1420   """
1421   hooks_nics = []
1422   cluster = lu.cfg.GetClusterInfo()
1423   for nic in nics:
1424     ip = nic.ip
1425     mac = nic.mac
1426     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1427     mode = filled_params[constants.NIC_MODE]
1428     link = filled_params[constants.NIC_LINK]
1429     hooks_nics.append((ip, mac, mode, link))
1430   return hooks_nics
1431
1432
1433 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1434   """Builds instance related env variables for hooks from an object.
1435
1436   @type lu: L{LogicalUnit}
1437   @param lu: the logical unit on whose behalf we execute
1438   @type instance: L{objects.Instance}
1439   @param instance: the instance for which we should build the
1440       environment
1441   @type override: dict
1442   @param override: dictionary with key/values that will override
1443       our values
1444   @rtype: dict
1445   @return: the hook environment dictionary
1446
1447   """
1448   cluster = lu.cfg.GetClusterInfo()
1449   bep = cluster.FillBE(instance)
1450   hvp = cluster.FillHV(instance)
1451   args = {
1452     "name": instance.name,
1453     "primary_node": instance.primary_node,
1454     "secondary_nodes": instance.secondary_nodes,
1455     "os_type": instance.os,
1456     "status": instance.admin_state,
1457     "maxmem": bep[constants.BE_MAXMEM],
1458     "minmem": bep[constants.BE_MINMEM],
1459     "vcpus": bep[constants.BE_VCPUS],
1460     "nics": _NICListToTuple(lu, instance.nics),
1461     "disk_template": instance.disk_template,
1462     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1463     "bep": bep,
1464     "hvp": hvp,
1465     "hypervisor_name": instance.hypervisor,
1466     "tags": instance.tags,
1467   }
1468   if override:
1469     args.update(override)
1470   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1471
1472
1473 def _AdjustCandidatePool(lu, exceptions):
1474   """Adjust the candidate pool after node operations.
1475
1476   """
1477   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1478   if mod_list:
1479     lu.LogInfo("Promoted nodes to master candidate role: %s",
1480                utils.CommaJoin(node.name for node in mod_list))
1481     for name in mod_list:
1482       lu.context.ReaddNode(name)
1483   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1484   if mc_now > mc_max:
1485     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1486                (mc_now, mc_max))
1487
1488
1489 def _DecideSelfPromotion(lu, exceptions=None):
1490   """Decide whether I should promote myself as a master candidate.
1491
1492   """
1493   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1494   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1495   # the new node will increase mc_max with one, so:
1496   mc_should = min(mc_should + 1, cp_size)
1497   return mc_now < mc_should
1498
1499
1500 def _CalculateGroupIPolicy(cluster, group):
1501   """Calculate instance policy for group.
1502
1503   """
1504   return cluster.SimpleFillIPolicy(group.ipolicy)
1505
1506
1507 def _ComputeViolatingInstances(ipolicy, instances):
1508   """Computes a set of instances who violates given ipolicy.
1509
1510   @param ipolicy: The ipolicy to verify
1511   @type instances: object.Instance
1512   @param instances: List of instances to verify
1513   @return: A frozenset of instance names violating the ipolicy
1514
1515   """
1516   return frozenset([inst.name for inst in instances
1517                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1518
1519
1520 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1521   """Check that the brigdes needed by a list of nics exist.
1522
1523   """
1524   cluster = lu.cfg.GetClusterInfo()
1525   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1526   brlist = [params[constants.NIC_LINK] for params in paramslist
1527             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1528   if brlist:
1529     result = lu.rpc.call_bridges_exist(target_node, brlist)
1530     result.Raise("Error checking bridges on destination node '%s'" %
1531                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1532
1533
1534 def _CheckInstanceBridgesExist(lu, instance, node=None):
1535   """Check that the brigdes needed by an instance exist.
1536
1537   """
1538   if node is None:
1539     node = instance.primary_node
1540   _CheckNicsBridgesExist(lu, instance.nics, node)
1541
1542
1543 def _CheckOSVariant(os_obj, name):
1544   """Check whether an OS name conforms to the os variants specification.
1545
1546   @type os_obj: L{objects.OS}
1547   @param os_obj: OS object to check
1548   @type name: string
1549   @param name: OS name passed by the user, to check for validity
1550
1551   """
1552   variant = objects.OS.GetVariant(name)
1553   if not os_obj.supported_variants:
1554     if variant:
1555       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1556                                  " passed)" % (os_obj.name, variant),
1557                                  errors.ECODE_INVAL)
1558     return
1559   if not variant:
1560     raise errors.OpPrereqError("OS name must include a variant",
1561                                errors.ECODE_INVAL)
1562
1563   if variant not in os_obj.supported_variants:
1564     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1565
1566
1567 def _GetNodeInstancesInner(cfg, fn):
1568   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1569
1570
1571 def _GetNodeInstances(cfg, node_name):
1572   """Returns a list of all primary and secondary instances on a node.
1573
1574   """
1575
1576   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1577
1578
1579 def _GetNodePrimaryInstances(cfg, node_name):
1580   """Returns primary instances on a node.
1581
1582   """
1583   return _GetNodeInstancesInner(cfg,
1584                                 lambda inst: node_name == inst.primary_node)
1585
1586
1587 def _GetNodeSecondaryInstances(cfg, node_name):
1588   """Returns secondary instances on a node.
1589
1590   """
1591   return _GetNodeInstancesInner(cfg,
1592                                 lambda inst: node_name in inst.secondary_nodes)
1593
1594
1595 def _GetStorageTypeArgs(cfg, storage_type):
1596   """Returns the arguments for a storage type.
1597
1598   """
1599   # Special case for file storage
1600   if storage_type == constants.ST_FILE:
1601     # storage.FileStorage wants a list of storage directories
1602     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1603
1604   return []
1605
1606
1607 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1608   faulty = []
1609
1610   for dev in instance.disks:
1611     cfg.SetDiskID(dev, node_name)
1612
1613   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1614   result.Raise("Failed to get disk status from node %s" % node_name,
1615                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1616
1617   for idx, bdev_status in enumerate(result.payload):
1618     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1619       faulty.append(idx)
1620
1621   return faulty
1622
1623
1624 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1625   """Check the sanity of iallocator and node arguments and use the
1626   cluster-wide iallocator if appropriate.
1627
1628   Check that at most one of (iallocator, node) is specified. If none is
1629   specified, then the LU's opcode's iallocator slot is filled with the
1630   cluster-wide default iallocator.
1631
1632   @type iallocator_slot: string
1633   @param iallocator_slot: the name of the opcode iallocator slot
1634   @type node_slot: string
1635   @param node_slot: the name of the opcode target node slot
1636
1637   """
1638   node = getattr(lu.op, node_slot, None)
1639   iallocator = getattr(lu.op, iallocator_slot, None)
1640
1641   if node is not None and iallocator is not None:
1642     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1643                                errors.ECODE_INVAL)
1644   elif node is None and iallocator is None:
1645     default_iallocator = lu.cfg.GetDefaultIAllocator()
1646     if default_iallocator:
1647       setattr(lu.op, iallocator_slot, default_iallocator)
1648     else:
1649       raise errors.OpPrereqError("No iallocator or node given and no"
1650                                  " cluster-wide default iallocator found;"
1651                                  " please specify either an iallocator or a"
1652                                  " node, or set a cluster-wide default"
1653                                  " iallocator")
1654
1655
1656 def _GetDefaultIAllocator(cfg, iallocator):
1657   """Decides on which iallocator to use.
1658
1659   @type cfg: L{config.ConfigWriter}
1660   @param cfg: Cluster configuration object
1661   @type iallocator: string or None
1662   @param iallocator: Iallocator specified in opcode
1663   @rtype: string
1664   @return: Iallocator name
1665
1666   """
1667   if not iallocator:
1668     # Use default iallocator
1669     iallocator = cfg.GetDefaultIAllocator()
1670
1671   if not iallocator:
1672     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1673                                " opcode nor as a cluster-wide default",
1674                                errors.ECODE_INVAL)
1675
1676   return iallocator
1677
1678
1679 class LUClusterPostInit(LogicalUnit):
1680   """Logical unit for running hooks after cluster initialization.
1681
1682   """
1683   HPATH = "cluster-init"
1684   HTYPE = constants.HTYPE_CLUSTER
1685
1686   def BuildHooksEnv(self):
1687     """Build hooks env.
1688
1689     """
1690     return {
1691       "OP_TARGET": self.cfg.GetClusterName(),
1692       }
1693
1694   def BuildHooksNodes(self):
1695     """Build hooks nodes.
1696
1697     """
1698     return ([], [self.cfg.GetMasterNode()])
1699
1700   def Exec(self, feedback_fn):
1701     """Nothing to do.
1702
1703     """
1704     return True
1705
1706
1707 class LUClusterDestroy(LogicalUnit):
1708   """Logical unit for destroying the cluster.
1709
1710   """
1711   HPATH = "cluster-destroy"
1712   HTYPE = constants.HTYPE_CLUSTER
1713
1714   def BuildHooksEnv(self):
1715     """Build hooks env.
1716
1717     """
1718     return {
1719       "OP_TARGET": self.cfg.GetClusterName(),
1720       }
1721
1722   def BuildHooksNodes(self):
1723     """Build hooks nodes.
1724
1725     """
1726     return ([], [])
1727
1728   def CheckPrereq(self):
1729     """Check prerequisites.
1730
1731     This checks whether the cluster is empty.
1732
1733     Any errors are signaled by raising errors.OpPrereqError.
1734
1735     """
1736     master = self.cfg.GetMasterNode()
1737
1738     nodelist = self.cfg.GetNodeList()
1739     if len(nodelist) != 1 or nodelist[0] != master:
1740       raise errors.OpPrereqError("There are still %d node(s) in"
1741                                  " this cluster." % (len(nodelist) - 1),
1742                                  errors.ECODE_INVAL)
1743     instancelist = self.cfg.GetInstanceList()
1744     if instancelist:
1745       raise errors.OpPrereqError("There are still %d instance(s) in"
1746                                  " this cluster." % len(instancelist),
1747                                  errors.ECODE_INVAL)
1748
1749   def Exec(self, feedback_fn):
1750     """Destroys the cluster.
1751
1752     """
1753     master_params = self.cfg.GetMasterNetworkParameters()
1754
1755     # Run post hooks on master node before it's removed
1756     _RunPostHook(self, master_params.name)
1757
1758     ems = self.cfg.GetUseExternalMipScript()
1759     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1760                                                      master_params, ems)
1761     if result.fail_msg:
1762       self.LogWarning("Error disabling the master IP address: %s",
1763                       result.fail_msg)
1764
1765     return master_params.name
1766
1767
1768 def _VerifyCertificate(filename):
1769   """Verifies a certificate for L{LUClusterVerifyConfig}.
1770
1771   @type filename: string
1772   @param filename: Path to PEM file
1773
1774   """
1775   try:
1776     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1777                                            utils.ReadFile(filename))
1778   except Exception, err: # pylint: disable=W0703
1779     return (LUClusterVerifyConfig.ETYPE_ERROR,
1780             "Failed to load X509 certificate %s: %s" % (filename, err))
1781
1782   (errcode, msg) = \
1783     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1784                                 constants.SSL_CERT_EXPIRATION_ERROR)
1785
1786   if msg:
1787     fnamemsg = "While verifying %s: %s" % (filename, msg)
1788   else:
1789     fnamemsg = None
1790
1791   if errcode is None:
1792     return (None, fnamemsg)
1793   elif errcode == utils.CERT_WARNING:
1794     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1795   elif errcode == utils.CERT_ERROR:
1796     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1797
1798   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1799
1800
1801 def _GetAllHypervisorParameters(cluster, instances):
1802   """Compute the set of all hypervisor parameters.
1803
1804   @type cluster: L{objects.Cluster}
1805   @param cluster: the cluster object
1806   @param instances: list of L{objects.Instance}
1807   @param instances: additional instances from which to obtain parameters
1808   @rtype: list of (origin, hypervisor, parameters)
1809   @return: a list with all parameters found, indicating the hypervisor they
1810        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1811
1812   """
1813   hvp_data = []
1814
1815   for hv_name in cluster.enabled_hypervisors:
1816     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1817
1818   for os_name, os_hvp in cluster.os_hvp.items():
1819     for hv_name, hv_params in os_hvp.items():
1820       if hv_params:
1821         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1822         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1823
1824   # TODO: collapse identical parameter values in a single one
1825   for instance in instances:
1826     if instance.hvparams:
1827       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1828                        cluster.FillHV(instance)))
1829
1830   return hvp_data
1831
1832
1833 class _VerifyErrors(object):
1834   """Mix-in for cluster/group verify LUs.
1835
1836   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1837   self.op and self._feedback_fn to be available.)
1838
1839   """
1840
1841   ETYPE_FIELD = "code"
1842   ETYPE_ERROR = "ERROR"
1843   ETYPE_WARNING = "WARNING"
1844
1845   def _Error(self, ecode, item, msg, *args, **kwargs):
1846     """Format an error message.
1847
1848     Based on the opcode's error_codes parameter, either format a
1849     parseable error code, or a simpler error string.
1850
1851     This must be called only from Exec and functions called from Exec.
1852
1853     """
1854     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1855     itype, etxt, _ = ecode
1856     # first complete the msg
1857     if args:
1858       msg = msg % args
1859     # then format the whole message
1860     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1861       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1862     else:
1863       if item:
1864         item = " " + item
1865       else:
1866         item = ""
1867       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1868     # and finally report it via the feedback_fn
1869     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1870
1871   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1872     """Log an error message if the passed condition is True.
1873
1874     """
1875     cond = (bool(cond)
1876             or self.op.debug_simulate_errors) # pylint: disable=E1101
1877
1878     # If the error code is in the list of ignored errors, demote the error to a
1879     # warning
1880     (_, etxt, _) = ecode
1881     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1882       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1883
1884     if cond:
1885       self._Error(ecode, *args, **kwargs)
1886
1887     # do not mark the operation as failed for WARN cases only
1888     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1889       self.bad = self.bad or cond
1890
1891
1892 class LUClusterVerify(NoHooksLU):
1893   """Submits all jobs necessary to verify the cluster.
1894
1895   """
1896   REQ_BGL = False
1897
1898   def ExpandNames(self):
1899     self.needed_locks = {}
1900
1901   def Exec(self, feedback_fn):
1902     jobs = []
1903
1904     if self.op.group_name:
1905       groups = [self.op.group_name]
1906       depends_fn = lambda: None
1907     else:
1908       groups = self.cfg.GetNodeGroupList()
1909
1910       # Verify global configuration
1911       jobs.append([
1912         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1913         ])
1914
1915       # Always depend on global verification
1916       depends_fn = lambda: [(-len(jobs), [])]
1917
1918     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1919                                             ignore_errors=self.op.ignore_errors,
1920                                             depends=depends_fn())]
1921                 for group in groups)
1922
1923     # Fix up all parameters
1924     for op in itertools.chain(*jobs): # pylint: disable=W0142
1925       op.debug_simulate_errors = self.op.debug_simulate_errors
1926       op.verbose = self.op.verbose
1927       op.error_codes = self.op.error_codes
1928       try:
1929         op.skip_checks = self.op.skip_checks
1930       except AttributeError:
1931         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1932
1933     return ResultWithJobs(jobs)
1934
1935
1936 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1937   """Verifies the cluster config.
1938
1939   """
1940   REQ_BGL = False
1941
1942   def _VerifyHVP(self, hvp_data):
1943     """Verifies locally the syntax of the hypervisor parameters.
1944
1945     """
1946     for item, hv_name, hv_params in hvp_data:
1947       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1948              (item, hv_name))
1949       try:
1950         hv_class = hypervisor.GetHypervisor(hv_name)
1951         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1952         hv_class.CheckParameterSyntax(hv_params)
1953       except errors.GenericError, err:
1954         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1955
1956   def ExpandNames(self):
1957     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1958     self.share_locks = _ShareAll()
1959
1960   def CheckPrereq(self):
1961     """Check prerequisites.
1962
1963     """
1964     # Retrieve all information
1965     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1966     self.all_node_info = self.cfg.GetAllNodesInfo()
1967     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1968
1969   def Exec(self, feedback_fn):
1970     """Verify integrity of cluster, performing various test on nodes.
1971
1972     """
1973     self.bad = False
1974     self._feedback_fn = feedback_fn
1975
1976     feedback_fn("* Verifying cluster config")
1977
1978     for msg in self.cfg.VerifyConfig():
1979       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1980
1981     feedback_fn("* Verifying cluster certificate files")
1982
1983     for cert_filename in constants.ALL_CERT_FILES:
1984       (errcode, msg) = _VerifyCertificate(cert_filename)
1985       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1986
1987     feedback_fn("* Verifying hypervisor parameters")
1988
1989     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1990                                                 self.all_inst_info.values()))
1991
1992     feedback_fn("* Verifying all nodes belong to an existing group")
1993
1994     # We do this verification here because, should this bogus circumstance
1995     # occur, it would never be caught by VerifyGroup, which only acts on
1996     # nodes/instances reachable from existing node groups.
1997
1998     dangling_nodes = set(node.name for node in self.all_node_info.values()
1999                          if node.group not in self.all_group_info)
2000
2001     dangling_instances = {}
2002     no_node_instances = []
2003
2004     for inst in self.all_inst_info.values():
2005       if inst.primary_node in dangling_nodes:
2006         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2007       elif inst.primary_node not in self.all_node_info:
2008         no_node_instances.append(inst.name)
2009
2010     pretty_dangling = [
2011         "%s (%s)" %
2012         (node.name,
2013          utils.CommaJoin(dangling_instances.get(node.name,
2014                                                 ["no instances"])))
2015         for node in dangling_nodes]
2016
2017     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2018                   None,
2019                   "the following nodes (and their instances) belong to a non"
2020                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2021
2022     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2023                   None,
2024                   "the following instances have a non-existing primary-node:"
2025                   " %s", utils.CommaJoin(no_node_instances))
2026
2027     return not self.bad
2028
2029
2030 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2031   """Verifies the status of a node group.
2032
2033   """
2034   HPATH = "cluster-verify"
2035   HTYPE = constants.HTYPE_CLUSTER
2036   REQ_BGL = False
2037
2038   _HOOKS_INDENT_RE = re.compile("^", re.M)
2039
2040   class NodeImage(object):
2041     """A class representing the logical and physical status of a node.
2042
2043     @type name: string
2044     @ivar name: the node name to which this object refers
2045     @ivar volumes: a structure as returned from
2046         L{ganeti.backend.GetVolumeList} (runtime)
2047     @ivar instances: a list of running instances (runtime)
2048     @ivar pinst: list of configured primary instances (config)
2049     @ivar sinst: list of configured secondary instances (config)
2050     @ivar sbp: dictionary of {primary-node: list of instances} for all
2051         instances for which this node is secondary (config)
2052     @ivar mfree: free memory, as reported by hypervisor (runtime)
2053     @ivar dfree: free disk, as reported by the node (runtime)
2054     @ivar offline: the offline status (config)
2055     @type rpc_fail: boolean
2056     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2057         not whether the individual keys were correct) (runtime)
2058     @type lvm_fail: boolean
2059     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2060     @type hyp_fail: boolean
2061     @ivar hyp_fail: whether the RPC call didn't return the instance list
2062     @type ghost: boolean
2063     @ivar ghost: whether this is a known node or not (config)
2064     @type os_fail: boolean
2065     @ivar os_fail: whether the RPC call didn't return valid OS data
2066     @type oslist: list
2067     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2068     @type vm_capable: boolean
2069     @ivar vm_capable: whether the node can host instances
2070
2071     """
2072     def __init__(self, offline=False, name=None, vm_capable=True):
2073       self.name = name
2074       self.volumes = {}
2075       self.instances = []
2076       self.pinst = []
2077       self.sinst = []
2078       self.sbp = {}
2079       self.mfree = 0
2080       self.dfree = 0
2081       self.offline = offline
2082       self.vm_capable = vm_capable
2083       self.rpc_fail = False
2084       self.lvm_fail = False
2085       self.hyp_fail = False
2086       self.ghost = False
2087       self.os_fail = False
2088       self.oslist = {}
2089
2090   def ExpandNames(self):
2091     # This raises errors.OpPrereqError on its own:
2092     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2093
2094     # Get instances in node group; this is unsafe and needs verification later
2095     inst_names = \
2096       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2097
2098     self.needed_locks = {
2099       locking.LEVEL_INSTANCE: inst_names,
2100       locking.LEVEL_NODEGROUP: [self.group_uuid],
2101       locking.LEVEL_NODE: [],
2102       }
2103
2104     self.share_locks = _ShareAll()
2105
2106   def DeclareLocks(self, level):
2107     if level == locking.LEVEL_NODE:
2108       # Get members of node group; this is unsafe and needs verification later
2109       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2110
2111       all_inst_info = self.cfg.GetAllInstancesInfo()
2112
2113       # In Exec(), we warn about mirrored instances that have primary and
2114       # secondary living in separate node groups. To fully verify that
2115       # volumes for these instances are healthy, we will need to do an
2116       # extra call to their secondaries. We ensure here those nodes will
2117       # be locked.
2118       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2119         # Important: access only the instances whose lock is owned
2120         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2121           nodes.update(all_inst_info[inst].secondary_nodes)
2122
2123       self.needed_locks[locking.LEVEL_NODE] = nodes
2124
2125   def CheckPrereq(self):
2126     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2127     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2128
2129     group_nodes = set(self.group_info.members)
2130     group_instances = \
2131       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2132
2133     unlocked_nodes = \
2134         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2135
2136     unlocked_instances = \
2137         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2138
2139     if unlocked_nodes:
2140       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2141                                  utils.CommaJoin(unlocked_nodes),
2142                                  errors.ECODE_STATE)
2143
2144     if unlocked_instances:
2145       raise errors.OpPrereqError("Missing lock for instances: %s" %
2146                                  utils.CommaJoin(unlocked_instances),
2147                                  errors.ECODE_STATE)
2148
2149     self.all_node_info = self.cfg.GetAllNodesInfo()
2150     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2151
2152     self.my_node_names = utils.NiceSort(group_nodes)
2153     self.my_inst_names = utils.NiceSort(group_instances)
2154
2155     self.my_node_info = dict((name, self.all_node_info[name])
2156                              for name in self.my_node_names)
2157
2158     self.my_inst_info = dict((name, self.all_inst_info[name])
2159                              for name in self.my_inst_names)
2160
2161     # We detect here the nodes that will need the extra RPC calls for verifying
2162     # split LV volumes; they should be locked.
2163     extra_lv_nodes = set()
2164
2165     for inst in self.my_inst_info.values():
2166       if inst.disk_template in constants.DTS_INT_MIRROR:
2167         for nname in inst.all_nodes:
2168           if self.all_node_info[nname].group != self.group_uuid:
2169             extra_lv_nodes.add(nname)
2170
2171     unlocked_lv_nodes = \
2172         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2173
2174     if unlocked_lv_nodes:
2175       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2176                                  utils.CommaJoin(unlocked_lv_nodes),
2177                                  errors.ECODE_STATE)
2178     self.extra_lv_nodes = list(extra_lv_nodes)
2179
2180   def _VerifyNode(self, ninfo, nresult):
2181     """Perform some basic validation on data returned from a node.
2182
2183       - check the result data structure is well formed and has all the
2184         mandatory fields
2185       - check ganeti version
2186
2187     @type ninfo: L{objects.Node}
2188     @param ninfo: the node to check
2189     @param nresult: the results from the node
2190     @rtype: boolean
2191     @return: whether overall this call was successful (and we can expect
2192          reasonable values in the respose)
2193
2194     """
2195     node = ninfo.name
2196     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2197
2198     # main result, nresult should be a non-empty dict
2199     test = not nresult or not isinstance(nresult, dict)
2200     _ErrorIf(test, constants.CV_ENODERPC, node,
2201                   "unable to verify node: no data returned")
2202     if test:
2203       return False
2204
2205     # compares ganeti version
2206     local_version = constants.PROTOCOL_VERSION
2207     remote_version = nresult.get("version", None)
2208     test = not (remote_version and
2209                 isinstance(remote_version, (list, tuple)) and
2210                 len(remote_version) == 2)
2211     _ErrorIf(test, constants.CV_ENODERPC, node,
2212              "connection to node returned invalid data")
2213     if test:
2214       return False
2215
2216     test = local_version != remote_version[0]
2217     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2218              "incompatible protocol versions: master %s,"
2219              " node %s", local_version, remote_version[0])
2220     if test:
2221       return False
2222
2223     # node seems compatible, we can actually try to look into its results
2224
2225     # full package version
2226     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2227                   constants.CV_ENODEVERSION, node,
2228                   "software version mismatch: master %s, node %s",
2229                   constants.RELEASE_VERSION, remote_version[1],
2230                   code=self.ETYPE_WARNING)
2231
2232     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2233     if ninfo.vm_capable and isinstance(hyp_result, dict):
2234       for hv_name, hv_result in hyp_result.iteritems():
2235         test = hv_result is not None
2236         _ErrorIf(test, constants.CV_ENODEHV, node,
2237                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2238
2239     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2240     if ninfo.vm_capable and isinstance(hvp_result, list):
2241       for item, hv_name, hv_result in hvp_result:
2242         _ErrorIf(True, constants.CV_ENODEHV, node,
2243                  "hypervisor %s parameter verify failure (source %s): %s",
2244                  hv_name, item, hv_result)
2245
2246     test = nresult.get(constants.NV_NODESETUP,
2247                        ["Missing NODESETUP results"])
2248     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2249              "; ".join(test))
2250
2251     return True
2252
2253   def _VerifyNodeTime(self, ninfo, nresult,
2254                       nvinfo_starttime, nvinfo_endtime):
2255     """Check the node time.
2256
2257     @type ninfo: L{objects.Node}
2258     @param ninfo: the node to check
2259     @param nresult: the remote results for the node
2260     @param nvinfo_starttime: the start time of the RPC call
2261     @param nvinfo_endtime: the end time of the RPC call
2262
2263     """
2264     node = ninfo.name
2265     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2266
2267     ntime = nresult.get(constants.NV_TIME, None)
2268     try:
2269       ntime_merged = utils.MergeTime(ntime)
2270     except (ValueError, TypeError):
2271       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2272       return
2273
2274     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2275       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2276     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2277       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2278     else:
2279       ntime_diff = None
2280
2281     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2282              "Node time diverges by at least %s from master node time",
2283              ntime_diff)
2284
2285   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2286     """Check the node LVM results.
2287
2288     @type ninfo: L{objects.Node}
2289     @param ninfo: the node to check
2290     @param nresult: the remote results for the node
2291     @param vg_name: the configured VG name
2292
2293     """
2294     if vg_name is None:
2295       return
2296
2297     node = ninfo.name
2298     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2299
2300     # checks vg existence and size > 20G
2301     vglist = nresult.get(constants.NV_VGLIST, None)
2302     test = not vglist
2303     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2304     if not test:
2305       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2306                                             constants.MIN_VG_SIZE)
2307       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2308
2309     # check pv names
2310     pvlist = nresult.get(constants.NV_PVLIST, None)
2311     test = pvlist is None
2312     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2313     if not test:
2314       # check that ':' is not present in PV names, since it's a
2315       # special character for lvcreate (denotes the range of PEs to
2316       # use on the PV)
2317       for _, pvname, owner_vg in pvlist:
2318         test = ":" in pvname
2319         _ErrorIf(test, constants.CV_ENODELVM, node,
2320                  "Invalid character ':' in PV '%s' of VG '%s'",
2321                  pvname, owner_vg)
2322
2323   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2324     """Check the node bridges.
2325
2326     @type ninfo: L{objects.Node}
2327     @param ninfo: the node to check
2328     @param nresult: the remote results for the node
2329     @param bridges: the expected list of bridges
2330
2331     """
2332     if not bridges:
2333       return
2334
2335     node = ninfo.name
2336     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2337
2338     missing = nresult.get(constants.NV_BRIDGES, None)
2339     test = not isinstance(missing, list)
2340     _ErrorIf(test, constants.CV_ENODENET, node,
2341              "did not return valid bridge information")
2342     if not test:
2343       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2344                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2345
2346   def _VerifyNodeUserScripts(self, ninfo, nresult):
2347     """Check the results of user scripts presence and executability on the node
2348
2349     @type ninfo: L{objects.Node}
2350     @param ninfo: the node to check
2351     @param nresult: the remote results for the node
2352
2353     """
2354     node = ninfo.name
2355
2356     test = not constants.NV_USERSCRIPTS in nresult
2357     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2358                   "did not return user scripts information")
2359
2360     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2361     if not test:
2362       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2363                     "user scripts not present or not executable: %s" %
2364                     utils.CommaJoin(sorted(broken_scripts)))
2365
2366   def _VerifyNodeNetwork(self, ninfo, nresult):
2367     """Check the node network connectivity results.
2368
2369     @type ninfo: L{objects.Node}
2370     @param ninfo: the node to check
2371     @param nresult: the remote results for the node
2372
2373     """
2374     node = ninfo.name
2375     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2376
2377     test = constants.NV_NODELIST not in nresult
2378     _ErrorIf(test, constants.CV_ENODESSH, node,
2379              "node hasn't returned node ssh connectivity data")
2380     if not test:
2381       if nresult[constants.NV_NODELIST]:
2382         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2383           _ErrorIf(True, constants.CV_ENODESSH, node,
2384                    "ssh communication with node '%s': %s", a_node, a_msg)
2385
2386     test = constants.NV_NODENETTEST not in nresult
2387     _ErrorIf(test, constants.CV_ENODENET, node,
2388              "node hasn't returned node tcp connectivity data")
2389     if not test:
2390       if nresult[constants.NV_NODENETTEST]:
2391         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2392         for anode in nlist:
2393           _ErrorIf(True, constants.CV_ENODENET, node,
2394                    "tcp communication with node '%s': %s",
2395                    anode, nresult[constants.NV_NODENETTEST][anode])
2396
2397     test = constants.NV_MASTERIP not in nresult
2398     _ErrorIf(test, constants.CV_ENODENET, node,
2399              "node hasn't returned node master IP reachability data")
2400     if not test:
2401       if not nresult[constants.NV_MASTERIP]:
2402         if node == self.master_node:
2403           msg = "the master node cannot reach the master IP (not configured?)"
2404         else:
2405           msg = "cannot reach the master IP"
2406         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2407
2408   def _VerifyInstance(self, instance, instanceconfig, node_image,
2409                       diskstatus):
2410     """Verify an instance.
2411
2412     This function checks to see if the required block devices are
2413     available on the instance's node.
2414
2415     """
2416     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2417     node_current = instanceconfig.primary_node
2418
2419     node_vol_should = {}
2420     instanceconfig.MapLVsByNode(node_vol_should)
2421
2422     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2423     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2424     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2425
2426     for node in node_vol_should:
2427       n_img = node_image[node]
2428       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2429         # ignore missing volumes on offline or broken nodes
2430         continue
2431       for volume in node_vol_should[node]:
2432         test = volume not in n_img.volumes
2433         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2434                  "volume %s missing on node %s", volume, node)
2435
2436     if instanceconfig.admin_state == constants.ADMINST_UP:
2437       pri_img = node_image[node_current]
2438       test = instance not in pri_img.instances and not pri_img.offline
2439       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2440                "instance not running on its primary node %s",
2441                node_current)
2442
2443     diskdata = [(nname, success, status, idx)
2444                 for (nname, disks) in diskstatus.items()
2445                 for idx, (success, status) in enumerate(disks)]
2446
2447     for nname, success, bdev_status, idx in diskdata:
2448       # the 'ghost node' construction in Exec() ensures that we have a
2449       # node here
2450       snode = node_image[nname]
2451       bad_snode = snode.ghost or snode.offline
2452       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2453                not success and not bad_snode,
2454                constants.CV_EINSTANCEFAULTYDISK, instance,
2455                "couldn't retrieve status for disk/%s on %s: %s",
2456                idx, nname, bdev_status)
2457       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2458                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2459                constants.CV_EINSTANCEFAULTYDISK, instance,
2460                "disk/%s on %s is faulty", idx, nname)
2461
2462   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2463     """Verify if there are any unknown volumes in the cluster.
2464
2465     The .os, .swap and backup volumes are ignored. All other volumes are
2466     reported as unknown.
2467
2468     @type reserved: L{ganeti.utils.FieldSet}
2469     @param reserved: a FieldSet of reserved volume names
2470
2471     """
2472     for node, n_img in node_image.items():
2473       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2474           self.all_node_info[node].group != self.group_uuid):
2475         # skip non-healthy nodes
2476         continue
2477       for volume in n_img.volumes:
2478         test = ((node not in node_vol_should or
2479                 volume not in node_vol_should[node]) and
2480                 not reserved.Matches(volume))
2481         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2482                       "volume %s is unknown", volume)
2483
2484   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2485     """Verify N+1 Memory Resilience.
2486
2487     Check that if one single node dies we can still start all the
2488     instances it was primary for.
2489
2490     """
2491     cluster_info = self.cfg.GetClusterInfo()
2492     for node, n_img in node_image.items():
2493       # This code checks that every node which is now listed as
2494       # secondary has enough memory to host all instances it is
2495       # supposed to should a single other node in the cluster fail.
2496       # FIXME: not ready for failover to an arbitrary node
2497       # FIXME: does not support file-backed instances
2498       # WARNING: we currently take into account down instances as well
2499       # as up ones, considering that even if they're down someone
2500       # might want to start them even in the event of a node failure.
2501       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2502         # we're skipping nodes marked offline and nodes in other groups from
2503         # the N+1 warning, since most likely we don't have good memory
2504         # infromation from them; we already list instances living on such
2505         # nodes, and that's enough warning
2506         continue
2507       #TODO(dynmem): also consider ballooning out other instances
2508       for prinode, instances in n_img.sbp.items():
2509         needed_mem = 0
2510         for instance in instances:
2511           bep = cluster_info.FillBE(instance_cfg[instance])
2512           if bep[constants.BE_AUTO_BALANCE]:
2513             needed_mem += bep[constants.BE_MINMEM]
2514         test = n_img.mfree < needed_mem
2515         self._ErrorIf(test, constants.CV_ENODEN1, node,
2516                       "not enough memory to accomodate instance failovers"
2517                       " should node %s fail (%dMiB needed, %dMiB available)",
2518                       prinode, needed_mem, n_img.mfree)
2519
2520   @classmethod
2521   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2522                    (files_all, files_opt, files_mc, files_vm)):
2523     """Verifies file checksums collected from all nodes.
2524
2525     @param errorif: Callback for reporting errors
2526     @param nodeinfo: List of L{objects.Node} objects
2527     @param master_node: Name of master node
2528     @param all_nvinfo: RPC results
2529
2530     """
2531     # Define functions determining which nodes to consider for a file
2532     files2nodefn = [
2533       (files_all, None),
2534       (files_mc, lambda node: (node.master_candidate or
2535                                node.name == master_node)),
2536       (files_vm, lambda node: node.vm_capable),
2537       ]
2538
2539     # Build mapping from filename to list of nodes which should have the file
2540     nodefiles = {}
2541     for (files, fn) in files2nodefn:
2542       if fn is None:
2543         filenodes = nodeinfo
2544       else:
2545         filenodes = filter(fn, nodeinfo)
2546       nodefiles.update((filename,
2547                         frozenset(map(operator.attrgetter("name"), filenodes)))
2548                        for filename in files)
2549
2550     assert set(nodefiles) == (files_all | files_mc | files_vm)
2551
2552     fileinfo = dict((filename, {}) for filename in nodefiles)
2553     ignore_nodes = set()
2554
2555     for node in nodeinfo:
2556       if node.offline:
2557         ignore_nodes.add(node.name)
2558         continue
2559
2560       nresult = all_nvinfo[node.name]
2561
2562       if nresult.fail_msg or not nresult.payload:
2563         node_files = None
2564       else:
2565         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2566
2567       test = not (node_files and isinstance(node_files, dict))
2568       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2569               "Node did not return file checksum data")
2570       if test:
2571         ignore_nodes.add(node.name)
2572         continue
2573
2574       # Build per-checksum mapping from filename to nodes having it
2575       for (filename, checksum) in node_files.items():
2576         assert filename in nodefiles
2577         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2578
2579     for (filename, checksums) in fileinfo.items():
2580       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2581
2582       # Nodes having the file
2583       with_file = frozenset(node_name
2584                             for nodes in fileinfo[filename].values()
2585                             for node_name in nodes) - ignore_nodes
2586
2587       expected_nodes = nodefiles[filename] - ignore_nodes
2588
2589       # Nodes missing file
2590       missing_file = expected_nodes - with_file
2591
2592       if filename in files_opt:
2593         # All or no nodes
2594         errorif(missing_file and missing_file != expected_nodes,
2595                 constants.CV_ECLUSTERFILECHECK, None,
2596                 "File %s is optional, but it must exist on all or no"
2597                 " nodes (not found on %s)",
2598                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2599       else:
2600         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2601                 "File %s is missing from node(s) %s", filename,
2602                 utils.CommaJoin(utils.NiceSort(missing_file)))
2603
2604         # Warn if a node has a file it shouldn't
2605         unexpected = with_file - expected_nodes
2606         errorif(unexpected,
2607                 constants.CV_ECLUSTERFILECHECK, None,
2608                 "File %s should not exist on node(s) %s",
2609                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2610
2611       # See if there are multiple versions of the file
2612       test = len(checksums) > 1
2613       if test:
2614         variants = ["variant %s on %s" %
2615                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2616                     for (idx, (checksum, nodes)) in
2617                       enumerate(sorted(checksums.items()))]
2618       else:
2619         variants = []
2620
2621       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2622               "File %s found with %s different checksums (%s)",
2623               filename, len(checksums), "; ".join(variants))
2624
2625   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2626                       drbd_map):
2627     """Verifies and the node DRBD status.
2628
2629     @type ninfo: L{objects.Node}
2630     @param ninfo: the node to check
2631     @param nresult: the remote results for the node
2632     @param instanceinfo: the dict of instances
2633     @param drbd_helper: the configured DRBD usermode helper
2634     @param drbd_map: the DRBD map as returned by
2635         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2636
2637     """
2638     node = ninfo.name
2639     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2640
2641     if drbd_helper:
2642       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2643       test = (helper_result == None)
2644       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2645                "no drbd usermode helper returned")
2646       if helper_result:
2647         status, payload = helper_result
2648         test = not status
2649         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2650                  "drbd usermode helper check unsuccessful: %s", payload)
2651         test = status and (payload != drbd_helper)
2652         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2653                  "wrong drbd usermode helper: %s", payload)
2654
2655     # compute the DRBD minors
2656     node_drbd = {}
2657     for minor, instance in drbd_map[node].items():
2658       test = instance not in instanceinfo
2659       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2660                "ghost instance '%s' in temporary DRBD map", instance)
2661         # ghost instance should not be running, but otherwise we
2662         # don't give double warnings (both ghost instance and
2663         # unallocated minor in use)
2664       if test:
2665         node_drbd[minor] = (instance, False)
2666       else:
2667         instance = instanceinfo[instance]
2668         node_drbd[minor] = (instance.name,
2669                             instance.admin_state == constants.ADMINST_UP)
2670
2671     # and now check them
2672     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2673     test = not isinstance(used_minors, (tuple, list))
2674     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2675              "cannot parse drbd status file: %s", str(used_minors))
2676     if test:
2677       # we cannot check drbd status
2678       return
2679
2680     for minor, (iname, must_exist) in node_drbd.items():
2681       test = minor not in used_minors and must_exist
2682       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2683                "drbd minor %d of instance %s is not active", minor, iname)
2684     for minor in used_minors:
2685       test = minor not in node_drbd
2686       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2687                "unallocated drbd minor %d is in use", minor)
2688
2689   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2690     """Builds the node OS structures.
2691
2692     @type ninfo: L{objects.Node}
2693     @param ninfo: the node to check
2694     @param nresult: the remote results for the node
2695     @param nimg: the node image object
2696
2697     """
2698     node = ninfo.name
2699     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2700
2701     remote_os = nresult.get(constants.NV_OSLIST, None)
2702     test = (not isinstance(remote_os, list) or
2703             not compat.all(isinstance(v, list) and len(v) == 7
2704                            for v in remote_os))
2705
2706     _ErrorIf(test, constants.CV_ENODEOS, node,
2707              "node hasn't returned valid OS data")
2708
2709     nimg.os_fail = test
2710
2711     if test:
2712       return
2713
2714     os_dict = {}
2715
2716     for (name, os_path, status, diagnose,
2717          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2718
2719       if name not in os_dict:
2720         os_dict[name] = []
2721
2722       # parameters is a list of lists instead of list of tuples due to
2723       # JSON lacking a real tuple type, fix it:
2724       parameters = [tuple(v) for v in parameters]
2725       os_dict[name].append((os_path, status, diagnose,
2726                             set(variants), set(parameters), set(api_ver)))
2727
2728     nimg.oslist = os_dict
2729
2730   def _VerifyNodeOS(self, ninfo, nimg, base):
2731     """Verifies the node OS list.
2732
2733     @type ninfo: L{objects.Node}
2734     @param ninfo: the node to check
2735     @param nimg: the node image object
2736     @param base: the 'template' node we match against (e.g. from the master)
2737
2738     """
2739     node = ninfo.name
2740     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2741
2742     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2743
2744     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2745     for os_name, os_data in nimg.oslist.items():
2746       assert os_data, "Empty OS status for OS %s?!" % os_name
2747       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2748       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2749                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2750       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2751                "OS '%s' has multiple entries (first one shadows the rest): %s",
2752                os_name, utils.CommaJoin([v[0] for v in os_data]))
2753       # comparisons with the 'base' image
2754       test = os_name not in base.oslist
2755       _ErrorIf(test, constants.CV_ENODEOS, node,
2756                "Extra OS %s not present on reference node (%s)",
2757                os_name, base.name)
2758       if test:
2759         continue
2760       assert base.oslist[os_name], "Base node has empty OS status?"
2761       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2762       if not b_status:
2763         # base OS is invalid, skipping
2764         continue
2765       for kind, a, b in [("API version", f_api, b_api),
2766                          ("variants list", f_var, b_var),
2767                          ("parameters", beautify_params(f_param),
2768                           beautify_params(b_param))]:
2769         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2770                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2771                  kind, os_name, base.name,
2772                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2773
2774     # check any missing OSes
2775     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2776     _ErrorIf(missing, constants.CV_ENODEOS, node,
2777              "OSes present on reference node %s but missing on this node: %s",
2778              base.name, utils.CommaJoin(missing))
2779
2780   def _VerifyOob(self, ninfo, nresult):
2781     """Verifies out of band functionality of a node.
2782
2783     @type ninfo: L{objects.Node}
2784     @param ninfo: the node to check
2785     @param nresult: the remote results for the node
2786
2787     """
2788     node = ninfo.name
2789     # We just have to verify the paths on master and/or master candidates
2790     # as the oob helper is invoked on the master
2791     if ((ninfo.master_candidate or ninfo.master_capable) and
2792         constants.NV_OOB_PATHS in nresult):
2793       for path_result in nresult[constants.NV_OOB_PATHS]:
2794         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2795
2796   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2797     """Verifies and updates the node volume data.
2798
2799     This function will update a L{NodeImage}'s internal structures
2800     with data from the remote call.
2801
2802     @type ninfo: L{objects.Node}
2803     @param ninfo: the node to check
2804     @param nresult: the remote results for the node
2805     @param nimg: the node image object
2806     @param vg_name: the configured VG name
2807
2808     """
2809     node = ninfo.name
2810     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2811
2812     nimg.lvm_fail = True
2813     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2814     if vg_name is None:
2815       pass
2816     elif isinstance(lvdata, basestring):
2817       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2818                utils.SafeEncode(lvdata))
2819     elif not isinstance(lvdata, dict):
2820       _ErrorIf(True, constants.CV_ENODELVM, node,
2821                "rpc call to node failed (lvlist)")
2822     else:
2823       nimg.volumes = lvdata
2824       nimg.lvm_fail = False
2825
2826   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2827     """Verifies and updates the node instance list.
2828
2829     If the listing was successful, then updates this node's instance
2830     list. Otherwise, it marks the RPC call as failed for the instance
2831     list key.
2832
2833     @type ninfo: L{objects.Node}
2834     @param ninfo: the node to check
2835     @param nresult: the remote results for the node
2836     @param nimg: the node image object
2837
2838     """
2839     idata = nresult.get(constants.NV_INSTANCELIST, None)
2840     test = not isinstance(idata, list)
2841     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2842                   "rpc call to node failed (instancelist): %s",
2843                   utils.SafeEncode(str(idata)))
2844     if test:
2845       nimg.hyp_fail = True
2846     else:
2847       nimg.instances = idata
2848
2849   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2850     """Verifies and computes a node information map
2851
2852     @type ninfo: L{objects.Node}
2853     @param ninfo: the node to check
2854     @param nresult: the remote results for the node
2855     @param nimg: the node image object
2856     @param vg_name: the configured VG name
2857
2858     """
2859     node = ninfo.name
2860     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2861
2862     # try to read free memory (from the hypervisor)
2863     hv_info = nresult.get(constants.NV_HVINFO, None)
2864     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2865     _ErrorIf(test, constants.CV_ENODEHV, node,
2866              "rpc call to node failed (hvinfo)")
2867     if not test:
2868       try:
2869         nimg.mfree = int(hv_info["memory_free"])
2870       except (ValueError, TypeError):
2871         _ErrorIf(True, constants.CV_ENODERPC, node,
2872                  "node returned invalid nodeinfo, check hypervisor")
2873
2874     # FIXME: devise a free space model for file based instances as well
2875     if vg_name is not None:
2876       test = (constants.NV_VGLIST not in nresult or
2877               vg_name not in nresult[constants.NV_VGLIST])
2878       _ErrorIf(test, constants.CV_ENODELVM, node,
2879                "node didn't return data for the volume group '%s'"
2880                " - it is either missing or broken", vg_name)
2881       if not test:
2882         try:
2883           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2884         except (ValueError, TypeError):
2885           _ErrorIf(True, constants.CV_ENODERPC, node,
2886                    "node returned invalid LVM info, check LVM status")
2887
2888   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2889     """Gets per-disk status information for all instances.
2890
2891     @type nodelist: list of strings
2892     @param nodelist: Node names
2893     @type node_image: dict of (name, L{objects.Node})
2894     @param node_image: Node objects
2895     @type instanceinfo: dict of (name, L{objects.Instance})
2896     @param instanceinfo: Instance objects
2897     @rtype: {instance: {node: [(succes, payload)]}}
2898     @return: a dictionary of per-instance dictionaries with nodes as
2899         keys and disk information as values; the disk information is a
2900         list of tuples (success, payload)
2901
2902     """
2903     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2904
2905     node_disks = {}
2906     node_disks_devonly = {}
2907     diskless_instances = set()
2908     diskless = constants.DT_DISKLESS
2909
2910     for nname in nodelist:
2911       node_instances = list(itertools.chain(node_image[nname].pinst,
2912                                             node_image[nname].sinst))
2913       diskless_instances.update(inst for inst in node_instances
2914                                 if instanceinfo[inst].disk_template == diskless)
2915       disks = [(inst, disk)
2916                for inst in node_instances
2917                for disk in instanceinfo[inst].disks]
2918
2919       if not disks:
2920         # No need to collect data
2921         continue
2922
2923       node_disks[nname] = disks
2924
2925       # Creating copies as SetDiskID below will modify the objects and that can
2926       # lead to incorrect data returned from nodes
2927       devonly = [dev.Copy() for (_, dev) in disks]
2928
2929       for dev in devonly:
2930         self.cfg.SetDiskID(dev, nname)
2931
2932       node_disks_devonly[nname] = devonly
2933
2934     assert len(node_disks) == len(node_disks_devonly)
2935
2936     # Collect data from all nodes with disks
2937     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2938                                                           node_disks_devonly)
2939
2940     assert len(result) == len(node_disks)
2941
2942     instdisk = {}
2943
2944     for (nname, nres) in result.items():
2945       disks = node_disks[nname]
2946
2947       if nres.offline:
2948         # No data from this node
2949         data = len(disks) * [(False, "node offline")]
2950       else:
2951         msg = nres.fail_msg
2952         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2953                  "while getting disk information: %s", msg)
2954         if msg:
2955           # No data from this node
2956           data = len(disks) * [(False, msg)]
2957         else:
2958           data = []
2959           for idx, i in enumerate(nres.payload):
2960             if isinstance(i, (tuple, list)) and len(i) == 2:
2961               data.append(i)
2962             else:
2963               logging.warning("Invalid result from node %s, entry %d: %s",
2964                               nname, idx, i)
2965               data.append((False, "Invalid result from the remote node"))
2966
2967       for ((inst, _), status) in zip(disks, data):
2968         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2969
2970     # Add empty entries for diskless instances.
2971     for inst in diskless_instances:
2972       assert inst not in instdisk
2973       instdisk[inst] = {}
2974
2975     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2976                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2977                       compat.all(isinstance(s, (tuple, list)) and
2978                                  len(s) == 2 for s in statuses)
2979                       for inst, nnames in instdisk.items()
2980                       for nname, statuses in nnames.items())
2981     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2982
2983     return instdisk
2984
2985   @staticmethod
2986   def _SshNodeSelector(group_uuid, all_nodes):
2987     """Create endless iterators for all potential SSH check hosts.
2988
2989     """
2990     nodes = [node for node in all_nodes
2991              if (node.group != group_uuid and
2992                  not node.offline)]
2993     keyfunc = operator.attrgetter("group")
2994
2995     return map(itertools.cycle,
2996                [sorted(map(operator.attrgetter("name"), names))
2997                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2998                                                   keyfunc)])
2999
3000   @classmethod
3001   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3002     """Choose which nodes should talk to which other nodes.
3003
3004     We will make nodes contact all nodes in their group, and one node from
3005     every other group.
3006
3007     @warning: This algorithm has a known issue if one node group is much
3008       smaller than others (e.g. just one node). In such a case all other
3009       nodes will talk to the single node.
3010
3011     """
3012     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3013     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3014
3015     return (online_nodes,
3016             dict((name, sorted([i.next() for i in sel]))
3017                  for name in online_nodes))
3018
3019   def BuildHooksEnv(self):
3020     """Build hooks env.
3021
3022     Cluster-Verify hooks just ran in the post phase and their failure makes
3023     the output be logged in the verify output and the verification to fail.
3024
3025     """
3026     env = {
3027       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3028       }
3029
3030     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3031                for node in self.my_node_info.values())
3032
3033     return env
3034
3035   def BuildHooksNodes(self):
3036     """Build hooks nodes.
3037
3038     """
3039     return ([], self.my_node_names)
3040
3041   def Exec(self, feedback_fn):
3042     """Verify integrity of the node group, performing various test on nodes.
3043
3044     """
3045     # This method has too many local variables. pylint: disable=R0914
3046     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3047
3048     if not self.my_node_names:
3049       # empty node group
3050       feedback_fn("* Empty node group, skipping verification")
3051       return True
3052
3053     self.bad = False
3054     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3055     verbose = self.op.verbose
3056     self._feedback_fn = feedback_fn
3057
3058     vg_name = self.cfg.GetVGName()
3059     drbd_helper = self.cfg.GetDRBDHelper()
3060     cluster = self.cfg.GetClusterInfo()
3061     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3062     hypervisors = cluster.enabled_hypervisors
3063     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3064
3065     i_non_redundant = [] # Non redundant instances
3066     i_non_a_balanced = [] # Non auto-balanced instances
3067     i_offline = 0 # Count of offline instances
3068     n_offline = 0 # Count of offline nodes
3069     n_drained = 0 # Count of nodes being drained
3070     node_vol_should = {}
3071
3072     # FIXME: verify OS list
3073
3074     # File verification
3075     filemap = _ComputeAncillaryFiles(cluster, False)
3076
3077     # do local checksums
3078     master_node = self.master_node = self.cfg.GetMasterNode()
3079     master_ip = self.cfg.GetMasterIP()
3080
3081     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3082
3083     user_scripts = []
3084     if self.cfg.GetUseExternalMipScript():
3085       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3086
3087     node_verify_param = {
3088       constants.NV_FILELIST:
3089         utils.UniqueSequence(filename
3090                              for files in filemap
3091                              for filename in files),
3092       constants.NV_NODELIST:
3093         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3094                                   self.all_node_info.values()),
3095       constants.NV_HYPERVISOR: hypervisors,
3096       constants.NV_HVPARAMS:
3097         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3098       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3099                                  for node in node_data_list
3100                                  if not node.offline],
3101       constants.NV_INSTANCELIST: hypervisors,
3102       constants.NV_VERSION: None,
3103       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3104       constants.NV_NODESETUP: None,
3105       constants.NV_TIME: None,
3106       constants.NV_MASTERIP: (master_node, master_ip),
3107       constants.NV_OSLIST: None,
3108       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3109       constants.NV_USERSCRIPTS: user_scripts,
3110       }
3111
3112     if vg_name is not None:
3113       node_verify_param[constants.NV_VGLIST] = None
3114       node_verify_param[constants.NV_LVLIST] = vg_name
3115       node_verify_param[constants.NV_PVLIST] = [vg_name]
3116       node_verify_param[constants.NV_DRBDLIST] = None
3117
3118     if drbd_helper:
3119       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3120
3121     # bridge checks
3122     # FIXME: this needs to be changed per node-group, not cluster-wide
3123     bridges = set()
3124     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3125     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3126       bridges.add(default_nicpp[constants.NIC_LINK])
3127     for instance in self.my_inst_info.values():
3128       for nic in instance.nics:
3129         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3130         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3131           bridges.add(full_nic[constants.NIC_LINK])
3132
3133     if bridges:
3134       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3135
3136     # Build our expected cluster state
3137     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3138                                                  name=node.name,
3139                                                  vm_capable=node.vm_capable))
3140                       for node in node_data_list)
3141
3142     # Gather OOB paths
3143     oob_paths = []
3144     for node in self.all_node_info.values():
3145       path = _SupportsOob(self.cfg, node)
3146       if path and path not in oob_paths:
3147         oob_paths.append(path)
3148
3149     if oob_paths:
3150       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3151
3152     for instance in self.my_inst_names:
3153       inst_config = self.my_inst_info[instance]
3154
3155       for nname in inst_config.all_nodes:
3156         if nname not in node_image:
3157           gnode = self.NodeImage(name=nname)
3158           gnode.ghost = (nname not in self.all_node_info)
3159           node_image[nname] = gnode
3160
3161       inst_config.MapLVsByNode(node_vol_should)
3162
3163       pnode = inst_config.primary_node
3164       node_image[pnode].pinst.append(instance)
3165
3166       for snode in inst_config.secondary_nodes:
3167         nimg = node_image[snode]
3168         nimg.sinst.append(instance)
3169         if pnode not in nimg.sbp:
3170           nimg.sbp[pnode] = []
3171         nimg.sbp[pnode].append(instance)
3172
3173     # At this point, we have the in-memory data structures complete,
3174     # except for the runtime information, which we'll gather next
3175
3176     # Due to the way our RPC system works, exact response times cannot be
3177     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3178     # time before and after executing the request, we can at least have a time
3179     # window.
3180     nvinfo_starttime = time.time()
3181     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3182                                            node_verify_param,
3183                                            self.cfg.GetClusterName())
3184     nvinfo_endtime = time.time()
3185
3186     if self.extra_lv_nodes and vg_name is not None:
3187       extra_lv_nvinfo = \
3188           self.rpc.call_node_verify(self.extra_lv_nodes,
3189                                     {constants.NV_LVLIST: vg_name},
3190                                     self.cfg.GetClusterName())
3191     else:
3192       extra_lv_nvinfo = {}
3193
3194     all_drbd_map = self.cfg.ComputeDRBDMap()
3195
3196     feedback_fn("* Gathering disk information (%s nodes)" %
3197                 len(self.my_node_names))
3198     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3199                                      self.my_inst_info)
3200
3201     feedback_fn("* Verifying configuration file consistency")
3202
3203     # If not all nodes are being checked, we need to make sure the master node
3204     # and a non-checked vm_capable node are in the list.
3205     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3206     if absent_nodes:
3207       vf_nvinfo = all_nvinfo.copy()
3208       vf_node_info = list(self.my_node_info.values())
3209       additional_nodes = []
3210       if master_node not in self.my_node_info:
3211         additional_nodes.append(master_node)
3212         vf_node_info.append(self.all_node_info[master_node])
3213       # Add the first vm_capable node we find which is not included
3214       for node in absent_nodes:
3215         nodeinfo = self.all_node_info[node]
3216         if nodeinfo.vm_capable and not nodeinfo.offline:
3217           additional_nodes.append(node)
3218           vf_node_info.append(self.all_node_info[node])
3219           break
3220       key = constants.NV_FILELIST
3221       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3222                                                  {key: node_verify_param[key]},
3223                                                  self.cfg.GetClusterName()))
3224     else:
3225       vf_nvinfo = all_nvinfo
3226       vf_node_info = self.my_node_info.values()
3227
3228     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3229
3230     feedback_fn("* Verifying node status")
3231
3232     refos_img = None
3233
3234     for node_i in node_data_list:
3235       node = node_i.name
3236       nimg = node_image[node]
3237
3238       if node_i.offline:
3239         if verbose:
3240           feedback_fn("* Skipping offline node %s" % (node,))
3241         n_offline += 1
3242         continue
3243
3244       if node == master_node:
3245         ntype = "master"
3246       elif node_i.master_candidate:
3247         ntype = "master candidate"
3248       elif node_i.drained:
3249         ntype = "drained"
3250         n_drained += 1
3251       else:
3252         ntype = "regular"
3253       if verbose:
3254         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3255
3256       msg = all_nvinfo[node].fail_msg
3257       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3258                msg)
3259       if msg:
3260         nimg.rpc_fail = True
3261         continue
3262
3263       nresult = all_nvinfo[node].payload
3264
3265       nimg.call_ok = self._VerifyNode(node_i, nresult)
3266       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3267       self._VerifyNodeNetwork(node_i, nresult)
3268       self._VerifyNodeUserScripts(node_i, nresult)
3269       self._VerifyOob(node_i, nresult)
3270
3271       if nimg.vm_capable:
3272         self._VerifyNodeLVM(node_i, nresult, vg_name)
3273         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3274                              all_drbd_map)
3275
3276         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3277         self._UpdateNodeInstances(node_i, nresult, nimg)
3278         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3279         self._UpdateNodeOS(node_i, nresult, nimg)
3280
3281         if not nimg.os_fail:
3282           if refos_img is None:
3283             refos_img = nimg
3284           self._VerifyNodeOS(node_i, nimg, refos_img)
3285         self._VerifyNodeBridges(node_i, nresult, bridges)
3286
3287         # Check whether all running instancies are primary for the node. (This
3288         # can no longer be done from _VerifyInstance below, since some of the
3289         # wrong instances could be from other node groups.)
3290         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3291
3292         for inst in non_primary_inst:
3293           # FIXME: investigate best way to handle offline insts
3294           if inst.admin_state == constants.ADMINST_OFFLINE:
3295             if verbose:
3296               feedback_fn("* Skipping offline instance %s" % inst.name)
3297             i_offline += 1
3298             continue
3299           test = inst in self.all_inst_info
3300           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3301                    "instance should not run on node %s", node_i.name)
3302           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3303                    "node is running unknown instance %s", inst)
3304
3305     for node, result in extra_lv_nvinfo.items():
3306       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3307                               node_image[node], vg_name)
3308
3309     feedback_fn("* Verifying instance status")
3310     for instance in self.my_inst_names:
3311       if verbose:
3312         feedback_fn("* Verifying instance %s" % instance)
3313       inst_config = self.my_inst_info[instance]
3314       self._VerifyInstance(instance, inst_config, node_image,
3315                            instdisk[instance])
3316       inst_nodes_offline = []
3317
3318       pnode = inst_config.primary_node
3319       pnode_img = node_image[pnode]
3320       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3321                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3322                " primary node failed", instance)
3323
3324       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3325                pnode_img.offline,
3326                constants.CV_EINSTANCEBADNODE, instance,
3327                "instance is marked as running and lives on offline node %s",
3328                inst_config.primary_node)
3329
3330       # If the instance is non-redundant we cannot survive losing its primary
3331       # node, so we are not N+1 compliant. On the other hand we have no disk
3332       # templates with more than one secondary so that situation is not well
3333       # supported either.
3334       # FIXME: does not support file-backed instances
3335       if not inst_config.secondary_nodes:
3336         i_non_redundant.append(instance)
3337
3338       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3339                constants.CV_EINSTANCELAYOUT,
3340                instance, "instance has multiple secondary nodes: %s",
3341                utils.CommaJoin(inst_config.secondary_nodes),
3342                code=self.ETYPE_WARNING)
3343
3344       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3345         pnode = inst_config.primary_node
3346         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3347         instance_groups = {}
3348
3349         for node in instance_nodes:
3350           instance_groups.setdefault(self.all_node_info[node].group,
3351                                      []).append(node)
3352
3353         pretty_list = [
3354           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3355           # Sort so that we always list the primary node first.
3356           for group, nodes in sorted(instance_groups.items(),
3357                                      key=lambda (_, nodes): pnode in nodes,
3358                                      reverse=True)]
3359
3360         self._ErrorIf(len(instance_groups) > 1,
3361                       constants.CV_EINSTANCESPLITGROUPS,
3362                       instance, "instance has primary and secondary nodes in"
3363                       " different groups: %s", utils.CommaJoin(pretty_list),
3364                       code=self.ETYPE_WARNING)
3365
3366       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3367         i_non_a_balanced.append(instance)
3368
3369       for snode in inst_config.secondary_nodes:
3370         s_img = node_image[snode]
3371         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3372                  snode, "instance %s, connection to secondary node failed",
3373                  instance)
3374
3375         if s_img.offline:
3376           inst_nodes_offline.append(snode)
3377
3378       # warn that the instance lives on offline nodes
3379       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3380                "instance has offline secondary node(s) %s",
3381                utils.CommaJoin(inst_nodes_offline))
3382       # ... or ghost/non-vm_capable nodes
3383       for node in inst_config.all_nodes:
3384         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3385                  instance, "instance lives on ghost node %s", node)
3386         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3387                  instance, "instance lives on non-vm_capable node %s", node)
3388
3389     feedback_fn("* Verifying orphan volumes")
3390     reserved = utils.FieldSet(*cluster.reserved_lvs)
3391
3392     # We will get spurious "unknown volume" warnings if any node of this group
3393     # is secondary for an instance whose primary is in another group. To avoid
3394     # them, we find these instances and add their volumes to node_vol_should.
3395     for inst in self.all_inst_info.values():
3396       for secondary in inst.secondary_nodes:
3397         if (secondary in self.my_node_info
3398             and inst.name not in self.my_inst_info):
3399           inst.MapLVsByNode(node_vol_should)
3400           break
3401
3402     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3403
3404     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3405       feedback_fn("* Verifying N+1 Memory redundancy")
3406       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3407
3408     feedback_fn("* Other Notes")
3409     if i_non_redundant:
3410       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3411                   % len(i_non_redundant))
3412
3413     if i_non_a_balanced:
3414       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3415                   % len(i_non_a_balanced))
3416
3417     if i_offline:
3418       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3419
3420     if n_offline:
3421       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3422
3423     if n_drained:
3424       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3425
3426     return not self.bad
3427
3428   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3429     """Analyze the post-hooks' result
3430
3431     This method analyses the hook result, handles it, and sends some
3432     nicely-formatted feedback back to the user.
3433
3434     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3435         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3436     @param hooks_results: the results of the multi-node hooks rpc call
3437     @param feedback_fn: function used send feedback back to the caller
3438     @param lu_result: previous Exec result
3439     @return: the new Exec result, based on the previous result
3440         and hook results
3441
3442     """
3443     # We only really run POST phase hooks, only for non-empty groups,
3444     # and are only interested in their results
3445     if not self.my_node_names:
3446       # empty node group
3447       pass
3448     elif phase == constants.HOOKS_PHASE_POST:
3449       # Used to change hooks' output to proper indentation
3450       feedback_fn("* Hooks Results")
3451       assert hooks_results, "invalid result from hooks"
3452
3453       for node_name in hooks_results:
3454         res = hooks_results[node_name]
3455         msg = res.fail_msg
3456         test = msg and not res.offline
3457         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3458                       "Communication failure in hooks execution: %s", msg)
3459         if res.offline or msg:
3460           # No need to investigate payload if node is offline or gave
3461           # an error.
3462           continue
3463         for script, hkr, output in res.payload:
3464           test = hkr == constants.HKR_FAIL
3465           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3466                         "Script %s failed, output:", script)
3467           if test:
3468             output = self._HOOKS_INDENT_RE.sub("      ", output)
3469             feedback_fn("%s" % output)
3470             lu_result = False
3471
3472     return lu_result
3473
3474
3475 class LUClusterVerifyDisks(NoHooksLU):
3476   """Verifies the cluster disks status.
3477
3478   """
3479   REQ_BGL = False
3480
3481   def ExpandNames(self):
3482     self.share_locks = _ShareAll()
3483     self.needed_locks = {
3484       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3485       }
3486
3487   def Exec(self, feedback_fn):
3488     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3489
3490     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3491     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3492                            for group in group_names])
3493
3494
3495 class LUGroupVerifyDisks(NoHooksLU):
3496   """Verifies the status of all disks in a node group.
3497
3498   """
3499   REQ_BGL = False
3500
3501   def ExpandNames(self):
3502     # Raises errors.OpPrereqError on its own if group can't be found
3503     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3504
3505     self.share_locks = _ShareAll()
3506     self.needed_locks = {
3507       locking.LEVEL_INSTANCE: [],
3508       locking.LEVEL_NODEGROUP: [],
3509       locking.LEVEL_NODE: [],
3510       }
3511
3512   def DeclareLocks(self, level):
3513     if level == locking.LEVEL_INSTANCE:
3514       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3515
3516       # Lock instances optimistically, needs verification once node and group
3517       # locks have been acquired
3518       self.needed_locks[locking.LEVEL_INSTANCE] = \
3519         self.cfg.GetNodeGroupInstances(self.group_uuid)
3520
3521     elif level == locking.LEVEL_NODEGROUP:
3522       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3523
3524       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3525         set([self.group_uuid] +
3526             # Lock all groups used by instances optimistically; this requires
3527             # going via the node before it's locked, requiring verification
3528             # later on
3529             [group_uuid
3530              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3531              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3532
3533     elif level == locking.LEVEL_NODE:
3534       # This will only lock the nodes in the group to be verified which contain
3535       # actual instances
3536       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3537       self._LockInstancesNodes()
3538
3539       # Lock all nodes in group to be verified
3540       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3541       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3542       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3543
3544   def CheckPrereq(self):
3545     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3546     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3547     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3548
3549     assert self.group_uuid in owned_groups
3550
3551     # Check if locked instances are still correct
3552     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3553
3554     # Get instance information
3555     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3556
3557     # Check if node groups for locked instances are still correct
3558     _CheckInstancesNodeGroups(self.cfg, self.instances,
3559                               owned_groups, owned_nodes, self.group_uuid)
3560
3561   def Exec(self, feedback_fn):
3562     """Verify integrity of cluster disks.
3563
3564     @rtype: tuple of three items
3565     @return: a tuple of (dict of node-to-node_error, list of instances
3566         which need activate-disks, dict of instance: (node, volume) for
3567         missing volumes
3568
3569     """
3570     res_nodes = {}
3571     res_instances = set()
3572     res_missing = {}
3573
3574     nv_dict = _MapInstanceDisksToNodes([inst
3575             for inst in self.instances.values()
3576             if inst.admin_state == constants.ADMINST_UP])
3577
3578     if nv_dict:
3579       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3580                              set(self.cfg.GetVmCapableNodeList()))
3581
3582       node_lvs = self.rpc.call_lv_list(nodes, [])
3583
3584       for (node, node_res) in node_lvs.items():
3585         if node_res.offline:
3586           continue
3587
3588         msg = node_res.fail_msg
3589         if msg:
3590           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3591           res_nodes[node] = msg
3592           continue
3593
3594         for lv_name, (_, _, lv_online) in node_res.payload.items():
3595           inst = nv_dict.pop((node, lv_name), None)
3596           if not (lv_online or inst is None):
3597             res_instances.add(inst)
3598
3599       # any leftover items in nv_dict are missing LVs, let's arrange the data
3600       # better
3601       for key, inst in nv_dict.iteritems():
3602         res_missing.setdefault(inst, []).append(list(key))
3603
3604     return (res_nodes, list(res_instances), res_missing)
3605
3606
3607 class LUClusterRepairDiskSizes(NoHooksLU):
3608   """Verifies the cluster disks sizes.
3609
3610   """
3611   REQ_BGL = False
3612
3613   def ExpandNames(self):
3614     if self.op.instances:
3615       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3616       self.needed_locks = {
3617         locking.LEVEL_NODE_RES: [],
3618         locking.LEVEL_INSTANCE: self.wanted_names,
3619         }
3620       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3621     else:
3622       self.wanted_names = None
3623       self.needed_locks = {
3624         locking.LEVEL_NODE_RES: locking.ALL_SET,
3625         locking.LEVEL_INSTANCE: locking.ALL_SET,
3626         }
3627     self.share_locks = {
3628       locking.LEVEL_NODE_RES: 1,
3629       locking.LEVEL_INSTANCE: 0,
3630       }
3631
3632   def DeclareLocks(self, level):
3633     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3634       self._LockInstancesNodes(primary_only=True, level=level)
3635
3636   def CheckPrereq(self):
3637     """Check prerequisites.
3638
3639     This only checks the optional instance list against the existing names.
3640
3641     """
3642     if self.wanted_names is None:
3643       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3644
3645     self.wanted_instances = \
3646         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3647
3648   def _EnsureChildSizes(self, disk):
3649     """Ensure children of the disk have the needed disk size.
3650
3651     This is valid mainly for DRBD8 and fixes an issue where the
3652     children have smaller disk size.
3653
3654     @param disk: an L{ganeti.objects.Disk} object
3655
3656     """
3657     if disk.dev_type == constants.LD_DRBD8:
3658       assert disk.children, "Empty children for DRBD8?"
3659       fchild = disk.children[0]
3660       mismatch = fchild.size < disk.size
3661       if mismatch:
3662         self.LogInfo("Child disk has size %d, parent %d, fixing",
3663                      fchild.size, disk.size)
3664         fchild.size = disk.size
3665
3666       # and we recurse on this child only, not on the metadev
3667       return self._EnsureChildSizes(fchild) or mismatch
3668     else:
3669       return False
3670
3671   def Exec(self, feedback_fn):
3672     """Verify the size of cluster disks.
3673
3674     """
3675     # TODO: check child disks too
3676     # TODO: check differences in size between primary/secondary nodes
3677     per_node_disks = {}
3678     for instance in self.wanted_instances:
3679       pnode = instance.primary_node
3680       if pnode not in per_node_disks:
3681         per_node_disks[pnode] = []
3682       for idx, disk in enumerate(instance.disks):
3683         per_node_disks[pnode].append((instance, idx, disk))
3684
3685     assert not (frozenset(per_node_disks.keys()) -
3686                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3687       "Not owning correct locks"
3688     assert not self.owned_locks(locking.LEVEL_NODE)
3689
3690     changed = []
3691     for node, dskl in per_node_disks.items():
3692       newl = [v[2].Copy() for v in dskl]
3693       for dsk in newl:
3694         self.cfg.SetDiskID(dsk, node)
3695       result = self.rpc.call_blockdev_getsize(node, newl)
3696       if result.fail_msg:
3697         self.LogWarning("Failure in blockdev_getsize call to node"
3698                         " %s, ignoring", node)
3699         continue
3700       if len(result.payload) != len(dskl):
3701         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3702                         " result.payload=%s", node, len(dskl), result.payload)
3703         self.LogWarning("Invalid result from node %s, ignoring node results",
3704                         node)
3705         continue
3706       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3707         if size is None:
3708           self.LogWarning("Disk %d of instance %s did not return size"
3709                           " information, ignoring", idx, instance.name)
3710           continue
3711         if not isinstance(size, (int, long)):
3712           self.LogWarning("Disk %d of instance %s did not return valid"
3713                           " size information, ignoring", idx, instance.name)
3714           continue
3715         size = size >> 20
3716         if size != disk.size:
3717           self.LogInfo("Disk %d of instance %s has mismatched size,"
3718                        " correcting: recorded %d, actual %d", idx,
3719                        instance.name, disk.size, size)
3720           disk.size = size
3721           self.cfg.Update(instance, feedback_fn)
3722           changed.append((instance.name, idx, size))
3723         if self._EnsureChildSizes(disk):
3724           self.cfg.Update(instance, feedback_fn)
3725           changed.append((instance.name, idx, disk.size))
3726     return changed
3727
3728
3729 class LUClusterRename(LogicalUnit):
3730   """Rename the cluster.
3731
3732   """
3733   HPATH = "cluster-rename"
3734   HTYPE = constants.HTYPE_CLUSTER
3735
3736   def BuildHooksEnv(self):
3737     """Build hooks env.
3738
3739     """
3740     return {
3741       "OP_TARGET": self.cfg.GetClusterName(),
3742       "NEW_NAME": self.op.name,
3743       }
3744
3745   def BuildHooksNodes(self):
3746     """Build hooks nodes.
3747
3748     """
3749     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3750
3751   def CheckPrereq(self):
3752     """Verify that the passed name is a valid one.
3753
3754     """
3755     hostname = netutils.GetHostname(name=self.op.name,
3756                                     family=self.cfg.GetPrimaryIPFamily())
3757
3758     new_name = hostname.name
3759     self.ip = new_ip = hostname.ip
3760     old_name = self.cfg.GetClusterName()
3761     old_ip = self.cfg.GetMasterIP()
3762     if new_name == old_name and new_ip == old_ip:
3763       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3764                                  " cluster has changed",
3765                                  errors.ECODE_INVAL)
3766     if new_ip != old_ip:
3767       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3768         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3769                                    " reachable on the network" %
3770                                    new_ip, errors.ECODE_NOTUNIQUE)
3771
3772     self.op.name = new_name
3773
3774   def Exec(self, feedback_fn):
3775     """Rename the cluster.
3776
3777     """
3778     clustername = self.op.name
3779     new_ip = self.ip
3780
3781     # shutdown the master IP
3782     master_params = self.cfg.GetMasterNetworkParameters()
3783     ems = self.cfg.GetUseExternalMipScript()
3784     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3785                                                      master_params, ems)
3786     result.Raise("Could not disable the master role")
3787
3788     try:
3789       cluster = self.cfg.GetClusterInfo()
3790       cluster.cluster_name = clustername
3791       cluster.master_ip = new_ip
3792       self.cfg.Update(cluster, feedback_fn)
3793
3794       # update the known hosts file
3795       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3796       node_list = self.cfg.GetOnlineNodeList()
3797       try:
3798         node_list.remove(master_params.name)
3799       except ValueError:
3800         pass
3801       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3802     finally:
3803       master_params.ip = new_ip
3804       result = self.rpc.call_node_activate_master_ip(master_params.name,
3805                                                      master_params, ems)
3806       msg = result.fail_msg
3807       if msg:
3808         self.LogWarning("Could not re-enable the master role on"
3809                         " the master, please restart manually: %s", msg)
3810
3811     return clustername
3812
3813
3814 def _ValidateNetmask(cfg, netmask):
3815   """Checks if a netmask is valid.
3816
3817   @type cfg: L{config.ConfigWriter}
3818   @param cfg: The cluster configuration
3819   @type netmask: int
3820   @param netmask: the netmask to be verified
3821   @raise errors.OpPrereqError: if the validation fails
3822
3823   """
3824   ip_family = cfg.GetPrimaryIPFamily()
3825   try:
3826     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3827   except errors.ProgrammerError:
3828     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3829                                ip_family)
3830   if not ipcls.ValidateNetmask(netmask):
3831     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3832                                 (netmask))
3833
3834
3835 class LUClusterSetParams(LogicalUnit):
3836   """Change the parameters of the cluster.
3837
3838   """
3839   HPATH = "cluster-modify"
3840   HTYPE = constants.HTYPE_CLUSTER
3841   REQ_BGL = False
3842
3843   def CheckArguments(self):
3844     """Check parameters
3845
3846     """
3847     if self.op.uid_pool:
3848       uidpool.CheckUidPool(self.op.uid_pool)
3849
3850     if self.op.add_uids:
3851       uidpool.CheckUidPool(self.op.add_uids)
3852
3853     if self.op.remove_uids:
3854       uidpool.CheckUidPool(self.op.remove_uids)
3855
3856     if self.op.master_netmask is not None:
3857       _ValidateNetmask(self.cfg, self.op.master_netmask)
3858
3859     if self.op.diskparams:
3860       for dt_params in self.op.diskparams.values():
3861         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3862
3863   def ExpandNames(self):
3864     # FIXME: in the future maybe other cluster params won't require checking on
3865     # all nodes to be modified.
3866     self.needed_locks = {
3867       locking.LEVEL_NODE: locking.ALL_SET,
3868       locking.LEVEL_INSTANCE: locking.ALL_SET,
3869       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3870     }
3871     self.share_locks = {
3872         locking.LEVEL_NODE: 1,
3873         locking.LEVEL_INSTANCE: 1,
3874         locking.LEVEL_NODEGROUP: 1,
3875     }
3876
3877   def BuildHooksEnv(self):
3878     """Build hooks env.
3879
3880     """
3881     return {
3882       "OP_TARGET": self.cfg.GetClusterName(),
3883       "NEW_VG_NAME": self.op.vg_name,
3884       }
3885
3886   def BuildHooksNodes(self):
3887     """Build hooks nodes.
3888
3889     """
3890     mn = self.cfg.GetMasterNode()
3891     return ([mn], [mn])
3892
3893   def CheckPrereq(self):
3894     """Check prerequisites.
3895
3896     This checks whether the given params don't conflict and
3897     if the given volume group is valid.
3898
3899     """
3900     if self.op.vg_name is not None and not self.op.vg_name:
3901       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3902         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3903                                    " instances exist", errors.ECODE_INVAL)
3904
3905     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3906       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3907         raise errors.OpPrereqError("Cannot disable drbd helper while"
3908                                    " drbd-based instances exist",
3909                                    errors.ECODE_INVAL)
3910
3911     node_list = self.owned_locks(locking.LEVEL_NODE)
3912
3913     # if vg_name not None, checks given volume group on all nodes
3914     if self.op.vg_name:
3915       vglist = self.rpc.call_vg_list(node_list)
3916       for node in node_list:
3917         msg = vglist[node].fail_msg
3918         if msg:
3919           # ignoring down node
3920           self.LogWarning("Error while gathering data on node %s"
3921                           " (ignoring node): %s", node, msg)
3922           continue
3923         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3924                                               self.op.vg_name,
3925                                               constants.MIN_VG_SIZE)
3926         if vgstatus:
3927           raise errors.OpPrereqError("Error on node '%s': %s" %
3928                                      (node, vgstatus), errors.ECODE_ENVIRON)
3929
3930     if self.op.drbd_helper:
3931       # checks given drbd helper on all nodes
3932       helpers = self.rpc.call_drbd_helper(node_list)
3933       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3934         if ninfo.offline:
3935           self.LogInfo("Not checking drbd helper on offline node %s", node)
3936           continue
3937         msg = helpers[node].fail_msg
3938         if msg:
3939           raise errors.OpPrereqError("Error checking drbd helper on node"
3940                                      " '%s': %s" % (node, msg),
3941                                      errors.ECODE_ENVIRON)
3942         node_helper = helpers[node].payload
3943         if node_helper != self.op.drbd_helper:
3944           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3945                                      (node, node_helper), errors.ECODE_ENVIRON)
3946
3947     self.cluster = cluster = self.cfg.GetClusterInfo()
3948     # validate params changes
3949     if self.op.beparams:
3950       objects.UpgradeBeParams(self.op.beparams)
3951       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3952       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3953
3954     if self.op.ndparams:
3955       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3956       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3957
3958       # TODO: we need a more general way to handle resetting
3959       # cluster-level parameters to default values
3960       if self.new_ndparams["oob_program"] == "":
3961         self.new_ndparams["oob_program"] = \
3962             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3963
3964     if self.op.hv_state:
3965       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3966                                             self.cluster.hv_state_static)
3967       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3968                                for hv, values in new_hv_state.items())
3969
3970     if self.op.disk_state:
3971       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3972                                                 self.cluster.disk_state_static)
3973       self.new_disk_state = \
3974         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3975                             for name, values in svalues.items()))
3976              for storage, svalues in new_disk_state.items())
3977
3978     if self.op.ipolicy:
3979       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3980                                             group_policy=False)
3981
3982       all_instances = self.cfg.GetAllInstancesInfo().values()
3983       violations = set()
3984       for group in self.cfg.GetAllNodeGroupsInfo().values():
3985         instances = frozenset([inst for inst in all_instances
3986                                if compat.any(node in group.members
3987                                              for node in inst.all_nodes)])
3988         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3989         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3990                                                                    group),
3991                                             new_ipolicy, instances)
3992         if new:
3993           violations.update(new)
3994
3995       if violations:
3996         self.LogWarning("After the ipolicy change the following instances"
3997                         " violate them: %s",
3998                         utils.CommaJoin(utils.NiceSort(violations)))
3999
4000     if self.op.nicparams:
4001       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4002       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4003       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4004       nic_errors = []
4005
4006       # check all instances for consistency
4007       for instance in self.cfg.GetAllInstancesInfo().values():
4008         for nic_idx, nic in enumerate(instance.nics):
4009           params_copy = copy.deepcopy(nic.nicparams)
4010           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4011
4012           # check parameter syntax
4013           try:
4014             objects.NIC.CheckParameterSyntax(params_filled)
4015           except errors.ConfigurationError, err:
4016             nic_errors.append("Instance %s, nic/%d: %s" %
4017                               (instance.name, nic_idx, err))
4018
4019           # if we're moving instances to routed, check that they have an ip
4020           target_mode = params_filled[constants.NIC_MODE]
4021           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4022             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4023                               " address" % (instance.name, nic_idx))
4024       if nic_errors:
4025         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4026                                    "\n".join(nic_errors))
4027
4028     # hypervisor list/parameters
4029     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4030     if self.op.hvparams:
4031       for hv_name, hv_dict in self.op.hvparams.items():
4032         if hv_name not in self.new_hvparams:
4033           self.new_hvparams[hv_name] = hv_dict
4034         else:
4035           self.new_hvparams[hv_name].update(hv_dict)
4036
4037     # disk template parameters
4038     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4039     if self.op.diskparams:
4040       for dt_name, dt_params in self.op.diskparams.items():
4041         if dt_name not in self.op.diskparams:
4042           self.new_diskparams[dt_name] = dt_params
4043         else:
4044           self.new_diskparams[dt_name].update(dt_params)
4045
4046     # os hypervisor parameters
4047     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4048     if self.op.os_hvp:
4049       for os_name, hvs in self.op.os_hvp.items():
4050         if os_name not in self.new_os_hvp:
4051           self.new_os_hvp[os_name] = hvs
4052         else:
4053           for hv_name, hv_dict in hvs.items():
4054             if hv_name not in self.new_os_hvp[os_name]:
4055               self.new_os_hvp[os_name][hv_name] = hv_dict
4056             else:
4057               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4058
4059     # os parameters
4060     self.new_osp = objects.FillDict(cluster.osparams, {})
4061     if self.op.osparams:
4062       for os_name, osp in self.op.osparams.items():
4063         if os_name not in self.new_osp:
4064           self.new_osp[os_name] = {}
4065
4066         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4067                                                   use_none=True)
4068
4069         if not self.new_osp[os_name]:
4070           # we removed all parameters
4071           del self.new_osp[os_name]
4072         else:
4073           # check the parameter validity (remote check)
4074           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4075                          os_name, self.new_osp[os_name])
4076
4077     # changes to the hypervisor list
4078     if self.op.enabled_hypervisors is not None:
4079       self.hv_list = self.op.enabled_hypervisors
4080       for hv in self.hv_list:
4081         # if the hypervisor doesn't already exist in the cluster
4082         # hvparams, we initialize it to empty, and then (in both
4083         # cases) we make sure to fill the defaults, as we might not
4084         # have a complete defaults list if the hypervisor wasn't
4085         # enabled before
4086         if hv not in new_hvp:
4087           new_hvp[hv] = {}
4088         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4089         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4090     else:
4091       self.hv_list = cluster.enabled_hypervisors
4092
4093     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4094       # either the enabled list has changed, or the parameters have, validate
4095       for hv_name, hv_params in self.new_hvparams.items():
4096         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4097             (self.op.enabled_hypervisors and
4098              hv_name in self.op.enabled_hypervisors)):
4099           # either this is a new hypervisor, or its parameters have changed
4100           hv_class = hypervisor.GetHypervisor(hv_name)
4101           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4102           hv_class.CheckParameterSyntax(hv_params)
4103           _CheckHVParams(self, node_list, hv_name, hv_params)
4104
4105     if self.op.os_hvp:
4106       # no need to check any newly-enabled hypervisors, since the
4107       # defaults have already been checked in the above code-block
4108       for os_name, os_hvp in self.new_os_hvp.items():
4109         for hv_name, hv_params in os_hvp.items():
4110           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4111           # we need to fill in the new os_hvp on top of the actual hv_p
4112           cluster_defaults = self.new_hvparams.get(hv_name, {})
4113           new_osp = objects.FillDict(cluster_defaults, hv_params)
4114           hv_class = hypervisor.GetHypervisor(hv_name)
4115           hv_class.CheckParameterSyntax(new_osp)
4116           _CheckHVParams(self, node_list, hv_name, new_osp)
4117
4118     if self.op.default_iallocator:
4119       alloc_script = utils.FindFile(self.op.default_iallocator,
4120                                     constants.IALLOCATOR_SEARCH_PATH,
4121                                     os.path.isfile)
4122       if alloc_script is None:
4123         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4124                                    " specified" % self.op.default_iallocator,
4125                                    errors.ECODE_INVAL)
4126
4127   def Exec(self, feedback_fn):
4128     """Change the parameters of the cluster.
4129
4130     """
4131     if self.op.vg_name is not None:
4132       new_volume = self.op.vg_name
4133       if not new_volume:
4134         new_volume = None
4135       if new_volume != self.cfg.GetVGName():
4136         self.cfg.SetVGName(new_volume)
4137       else:
4138         feedback_fn("Cluster LVM configuration already in desired"
4139                     " state, not changing")
4140     if self.op.drbd_helper is not None:
4141       new_helper = self.op.drbd_helper
4142       if not new_helper:
4143         new_helper = None
4144       if new_helper != self.cfg.GetDRBDHelper():
4145         self.cfg.SetDRBDHelper(new_helper)
4146       else:
4147         feedback_fn("Cluster DRBD helper already in desired state,"
4148                     " not changing")
4149     if self.op.hvparams:
4150       self.cluster.hvparams = self.new_hvparams
4151     if self.op.os_hvp:
4152       self.cluster.os_hvp = self.new_os_hvp
4153     if self.op.enabled_hypervisors is not None:
4154       self.cluster.hvparams = self.new_hvparams
4155       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4156     if self.op.beparams:
4157       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4158     if self.op.nicparams:
4159       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4160     if self.op.ipolicy:
4161       self.cluster.ipolicy = self.new_ipolicy
4162     if self.op.osparams:
4163       self.cluster.osparams = self.new_osp
4164     if self.op.ndparams:
4165       self.cluster.ndparams = self.new_ndparams
4166     if self.op.diskparams:
4167       self.cluster.diskparams = self.new_diskparams
4168     if self.op.hv_state:
4169       self.cluster.hv_state_static = self.new_hv_state
4170     if self.op.disk_state:
4171       self.cluster.disk_state_static = self.new_disk_state
4172
4173     if self.op.candidate_pool_size is not None:
4174       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4175       # we need to update the pool size here, otherwise the save will fail
4176       _AdjustCandidatePool(self, [])
4177
4178     if self.op.maintain_node_health is not None:
4179       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4180         feedback_fn("Note: CONFD was disabled at build time, node health"
4181                     " maintenance is not useful (still enabling it)")
4182       self.cluster.maintain_node_health = self.op.maintain_node_health
4183
4184     if self.op.prealloc_wipe_disks is not None:
4185       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4186
4187     if self.op.add_uids is not None:
4188       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4189
4190     if self.op.remove_uids is not None:
4191       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4192
4193     if self.op.uid_pool is not None:
4194       self.cluster.uid_pool = self.op.uid_pool
4195
4196     if self.op.default_iallocator is not None:
4197       self.cluster.default_iallocator = self.op.default_iallocator
4198
4199     if self.op.reserved_lvs is not None:
4200       self.cluster.reserved_lvs = self.op.reserved_lvs
4201
4202     if self.op.use_external_mip_script is not None:
4203       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4204
4205     def helper_os(aname, mods, desc):
4206       desc += " OS list"
4207       lst = getattr(self.cluster, aname)
4208       for key, val in mods:
4209         if key == constants.DDM_ADD:
4210           if val in lst:
4211             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4212           else:
4213             lst.append(val)
4214         elif key == constants.DDM_REMOVE:
4215           if val in lst:
4216             lst.remove(val)
4217           else:
4218             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4219         else:
4220           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4221
4222     if self.op.hidden_os:
4223       helper_os("hidden_os", self.op.hidden_os, "hidden")
4224
4225     if self.op.blacklisted_os:
4226       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4227
4228     if self.op.master_netdev:
4229       master_params = self.cfg.GetMasterNetworkParameters()
4230       ems = self.cfg.GetUseExternalMipScript()
4231       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4232                   self.cluster.master_netdev)
4233       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4234                                                        master_params, ems)
4235       result.Raise("Could not disable the master ip")
4236       feedback_fn("Changing master_netdev from %s to %s" %
4237                   (master_params.netdev, self.op.master_netdev))
4238       self.cluster.master_netdev = self.op.master_netdev
4239
4240     if self.op.master_netmask:
4241       master_params = self.cfg.GetMasterNetworkParameters()
4242       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4243       result = self.rpc.call_node_change_master_netmask(master_params.name,
4244                                                         master_params.netmask,
4245                                                         self.op.master_netmask,
4246                                                         master_params.ip,
4247                                                         master_params.netdev)
4248       if result.fail_msg:
4249         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4250         feedback_fn(msg)
4251
4252       self.cluster.master_netmask = self.op.master_netmask
4253
4254     self.cfg.Update(self.cluster, feedback_fn)
4255
4256     if self.op.master_netdev:
4257       master_params = self.cfg.GetMasterNetworkParameters()
4258       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4259                   self.op.master_netdev)
4260       ems = self.cfg.GetUseExternalMipScript()
4261       result = self.rpc.call_node_activate_master_ip(master_params.name,
4262                                                      master_params, ems)
4263       if result.fail_msg:
4264         self.LogWarning("Could not re-enable the master ip on"
4265                         " the master, please restart manually: %s",
4266                         result.fail_msg)
4267
4268
4269 def _UploadHelper(lu, nodes, fname):
4270   """Helper for uploading a file and showing warnings.
4271
4272   """
4273   if os.path.exists(fname):
4274     result = lu.rpc.call_upload_file(nodes, fname)
4275     for to_node, to_result in result.items():
4276       msg = to_result.fail_msg
4277       if msg:
4278         msg = ("Copy of file %s to node %s failed: %s" %
4279                (fname, to_node, msg))
4280         lu.proc.LogWarning(msg)
4281
4282
4283 def _ComputeAncillaryFiles(cluster, redist):
4284   """Compute files external to Ganeti which need to be consistent.
4285
4286   @type redist: boolean
4287   @param redist: Whether to include files which need to be redistributed
4288
4289   """
4290   # Compute files for all nodes
4291   files_all = set([
4292     constants.SSH_KNOWN_HOSTS_FILE,
4293     constants.CONFD_HMAC_KEY,
4294     constants.CLUSTER_DOMAIN_SECRET_FILE,
4295     constants.SPICE_CERT_FILE,
4296     constants.SPICE_CACERT_FILE,
4297     constants.RAPI_USERS_FILE,
4298     ])
4299
4300   if not redist:
4301     files_all.update(constants.ALL_CERT_FILES)
4302     files_all.update(ssconf.SimpleStore().GetFileList())
4303   else:
4304     # we need to ship at least the RAPI certificate
4305     files_all.add(constants.RAPI_CERT_FILE)
4306
4307   if cluster.modify_etc_hosts:
4308     files_all.add(constants.ETC_HOSTS)
4309
4310   # Files which are optional, these must:
4311   # - be present in one other category as well
4312   # - either exist or not exist on all nodes of that category (mc, vm all)
4313   files_opt = set([
4314     constants.RAPI_USERS_FILE,
4315     ])
4316
4317   # Files which should only be on master candidates
4318   files_mc = set()
4319
4320   if not redist:
4321     files_mc.add(constants.CLUSTER_CONF_FILE)
4322
4323     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4324     # replication
4325     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4326
4327   # Files which should only be on VM-capable nodes
4328   files_vm = set(filename
4329     for hv_name in cluster.enabled_hypervisors
4330     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4331
4332   files_opt |= set(filename
4333     for hv_name in cluster.enabled_hypervisors
4334     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4335
4336   # Filenames in each category must be unique
4337   all_files_set = files_all | files_mc | files_vm
4338   assert (len(all_files_set) ==
4339           sum(map(len, [files_all, files_mc, files_vm]))), \
4340          "Found file listed in more than one file list"
4341
4342   # Optional files must be present in one other category
4343   assert all_files_set.issuperset(files_opt), \
4344          "Optional file not in a different required list"
4345
4346   return (files_all, files_opt, files_mc, files_vm)
4347
4348
4349 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4350   """Distribute additional files which are part of the cluster configuration.
4351
4352   ConfigWriter takes care of distributing the config and ssconf files, but
4353   there are more files which should be distributed to all nodes. This function
4354   makes sure those are copied.
4355
4356   @param lu: calling logical unit
4357   @param additional_nodes: list of nodes not in the config to distribute to
4358   @type additional_vm: boolean
4359   @param additional_vm: whether the additional nodes are vm-capable or not
4360
4361   """
4362   # Gather target nodes
4363   cluster = lu.cfg.GetClusterInfo()
4364   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4365
4366   online_nodes = lu.cfg.GetOnlineNodeList()
4367   vm_nodes = lu.cfg.GetVmCapableNodeList()
4368
4369   if additional_nodes is not None:
4370     online_nodes.extend(additional_nodes)
4371     if additional_vm:
4372       vm_nodes.extend(additional_nodes)
4373
4374   # Never distribute to master node
4375   for nodelist in [online_nodes, vm_nodes]:
4376     if master_info.name in nodelist:
4377       nodelist.remove(master_info.name)
4378
4379   # Gather file lists
4380   (files_all, _, files_mc, files_vm) = \
4381     _ComputeAncillaryFiles(cluster, True)
4382
4383   # Never re-distribute configuration file from here
4384   assert not (constants.CLUSTER_CONF_FILE in files_all or
4385               constants.CLUSTER_CONF_FILE in files_vm)
4386   assert not files_mc, "Master candidates not handled in this function"
4387
4388   filemap = [
4389     (online_nodes, files_all),
4390     (vm_nodes, files_vm),
4391     ]
4392
4393   # Upload the files
4394   for (node_list, files) in filemap:
4395     for fname in files:
4396       _UploadHelper(lu, node_list, fname)
4397
4398
4399 class LUClusterRedistConf(NoHooksLU):
4400   """Force the redistribution of cluster configuration.
4401
4402   This is a very simple LU.
4403
4404   """
4405   REQ_BGL = False
4406
4407   def ExpandNames(self):
4408     self.needed_locks = {
4409       locking.LEVEL_NODE: locking.ALL_SET,
4410     }
4411     self.share_locks[locking.LEVEL_NODE] = 1
4412
4413   def Exec(self, feedback_fn):
4414     """Redistribute the configuration.
4415
4416     """
4417     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4418     _RedistributeAncillaryFiles(self)
4419
4420
4421 class LUClusterActivateMasterIp(NoHooksLU):
4422   """Activate the master IP on the master node.
4423
4424   """
4425   def Exec(self, feedback_fn):
4426     """Activate the master IP.
4427
4428     """
4429     master_params = self.cfg.GetMasterNetworkParameters()
4430     ems = self.cfg.GetUseExternalMipScript()
4431     result = self.rpc.call_node_activate_master_ip(master_params.name,
4432                                                    master_params, ems)
4433     result.Raise("Could not activate the master IP")
4434
4435
4436 class LUClusterDeactivateMasterIp(NoHooksLU):
4437   """Deactivate the master IP on the master node.
4438
4439   """
4440   def Exec(self, feedback_fn):
4441     """Deactivate the master IP.
4442
4443     """
4444     master_params = self.cfg.GetMasterNetworkParameters()
4445     ems = self.cfg.GetUseExternalMipScript()
4446     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4447                                                      master_params, ems)
4448     result.Raise("Could not deactivate the master IP")
4449
4450
4451 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4452   """Sleep and poll for an instance's disk to sync.
4453
4454   """
4455   if not instance.disks or disks is not None and not disks:
4456     return True
4457
4458   disks = _ExpandCheckDisks(instance, disks)
4459
4460   if not oneshot:
4461     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4462
4463   node = instance.primary_node
4464
4465   for dev in disks:
4466     lu.cfg.SetDiskID(dev, node)
4467
4468   # TODO: Convert to utils.Retry
4469
4470   retries = 0
4471   degr_retries = 10 # in seconds, as we sleep 1 second each time
4472   while True:
4473     max_time = 0
4474     done = True
4475     cumul_degraded = False
4476     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4477     msg = rstats.fail_msg
4478     if msg:
4479       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4480       retries += 1
4481       if retries >= 10:
4482         raise errors.RemoteError("Can't contact node %s for mirror data,"
4483                                  " aborting." % node)
4484       time.sleep(6)
4485       continue
4486     rstats = rstats.payload
4487     retries = 0
4488     for i, mstat in enumerate(rstats):
4489       if mstat is None:
4490         lu.LogWarning("Can't compute data for node %s/%s",
4491                            node, disks[i].iv_name)
4492         continue
4493
4494       cumul_degraded = (cumul_degraded or
4495                         (mstat.is_degraded and mstat.sync_percent is None))
4496       if mstat.sync_percent is not None:
4497         done = False
4498         if mstat.estimated_time is not None:
4499           rem_time = ("%s remaining (estimated)" %
4500                       utils.FormatSeconds(mstat.estimated_time))
4501           max_time = mstat.estimated_time
4502         else:
4503           rem_time = "no time estimate"
4504         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4505                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4506
4507     # if we're done but degraded, let's do a few small retries, to
4508     # make sure we see a stable and not transient situation; therefore
4509     # we force restart of the loop
4510     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4511       logging.info("Degraded disks found, %d retries left", degr_retries)
4512       degr_retries -= 1
4513       time.sleep(1)
4514       continue
4515
4516     if done or oneshot:
4517       break
4518
4519     time.sleep(min(60, max_time))
4520
4521   if done:
4522     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4523   return not cumul_degraded
4524
4525
4526 def _BlockdevFind(lu, node, dev, instance):
4527   """Wrapper around call_blockdev_find to annotate diskparams.
4528
4529   @param lu: A reference to the lu object
4530   @param node: The node to call out
4531   @param dev: The device to find
4532   @param instance: The instance object the device belongs to
4533   @returns The result of the rpc call
4534
4535   """
4536   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4537   return lu.rpc.call_blockdev_find(node, disk)
4538
4539
4540 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4541   """Wrapper around L{_CheckDiskConsistencyInner}.
4542
4543   """
4544   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4545   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4546                                     ldisk=ldisk)
4547
4548
4549 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4550                                ldisk=False):
4551   """Check that mirrors are not degraded.
4552
4553   @attention: The device has to be annotated already.
4554
4555   The ldisk parameter, if True, will change the test from the
4556   is_degraded attribute (which represents overall non-ok status for
4557   the device(s)) to the ldisk (representing the local storage status).
4558
4559   """
4560   lu.cfg.SetDiskID(dev, node)
4561
4562   result = True
4563
4564   if on_primary or dev.AssembleOnSecondary():
4565     rstats = lu.rpc.call_blockdev_find(node, dev)
4566     msg = rstats.fail_msg
4567     if msg:
4568       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4569       result = False
4570     elif not rstats.payload:
4571       lu.LogWarning("Can't find disk on node %s", node)
4572       result = False
4573     else:
4574       if ldisk:
4575         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4576       else:
4577         result = result and not rstats.payload.is_degraded
4578
4579   if dev.children:
4580     for child in dev.children:
4581       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4582                                                      on_primary)
4583
4584   return result
4585
4586
4587 class LUOobCommand(NoHooksLU):
4588   """Logical unit for OOB handling.
4589
4590   """
4591   REQ_BGL = False
4592   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4593
4594   def ExpandNames(self):
4595     """Gather locks we need.
4596
4597     """
4598     if self.op.node_names:
4599       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4600       lock_names = self.op.node_names
4601     else:
4602       lock_names = locking.ALL_SET
4603
4604     self.needed_locks = {
4605       locking.LEVEL_NODE: lock_names,
4606       }
4607
4608   def CheckPrereq(self):
4609     """Check prerequisites.
4610
4611     This checks:
4612      - the node exists in the configuration
4613      - OOB is supported
4614
4615     Any errors are signaled by raising errors.OpPrereqError.
4616
4617     """
4618     self.nodes = []
4619     self.master_node = self.cfg.GetMasterNode()
4620
4621     assert self.op.power_delay >= 0.0
4622
4623     if self.op.node_names:
4624       if (self.op.command in self._SKIP_MASTER and
4625           self.master_node in self.op.node_names):
4626         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4627         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4628
4629         if master_oob_handler:
4630           additional_text = ("run '%s %s %s' if you want to operate on the"
4631                              " master regardless") % (master_oob_handler,
4632                                                       self.op.command,
4633                                                       self.master_node)
4634         else:
4635           additional_text = "it does not support out-of-band operations"
4636
4637         raise errors.OpPrereqError(("Operating on the master node %s is not"
4638                                     " allowed for %s; %s") %
4639                                    (self.master_node, self.op.command,
4640                                     additional_text), errors.ECODE_INVAL)
4641     else:
4642       self.op.node_names = self.cfg.GetNodeList()
4643       if self.op.command in self._SKIP_MASTER:
4644         self.op.node_names.remove(self.master_node)
4645
4646     if self.op.command in self._SKIP_MASTER:
4647       assert self.master_node not in self.op.node_names
4648
4649     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4650       if node is None:
4651         raise errors.OpPrereqError("Node %s not found" % node_name,
4652                                    errors.ECODE_NOENT)
4653       else:
4654         self.nodes.append(node)
4655
4656       if (not self.op.ignore_status and
4657           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4658         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4659                                     " not marked offline") % node_name,
4660                                    errors.ECODE_STATE)
4661
4662   def Exec(self, feedback_fn):
4663     """Execute OOB and return result if we expect any.
4664
4665     """
4666     master_node = self.master_node
4667     ret = []
4668
4669     for idx, node in enumerate(utils.NiceSort(self.nodes,
4670                                               key=lambda node: node.name)):
4671       node_entry = [(constants.RS_NORMAL, node.name)]
4672       ret.append(node_entry)
4673
4674       oob_program = _SupportsOob(self.cfg, node)
4675
4676       if not oob_program:
4677         node_entry.append((constants.RS_UNAVAIL, None))
4678         continue
4679
4680       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4681                    self.op.command, oob_program, node.name)
4682       result = self.rpc.call_run_oob(master_node, oob_program,
4683                                      self.op.command, node.name,
4684                                      self.op.timeout)
4685
4686       if result.fail_msg:
4687         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4688                         node.name, result.fail_msg)
4689         node_entry.append((constants.RS_NODATA, None))
4690       else:
4691         try:
4692           self._CheckPayload(result)
4693         except errors.OpExecError, err:
4694           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4695                           node.name, err)
4696           node_entry.append((constants.RS_NODATA, None))
4697         else:
4698           if self.op.command == constants.OOB_HEALTH:
4699             # For health we should log important events
4700             for item, status in result.payload:
4701               if status in [constants.OOB_STATUS_WARNING,
4702                             constants.OOB_STATUS_CRITICAL]:
4703                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4704                                 item, node.name, status)
4705
4706           if self.op.command == constants.OOB_POWER_ON:
4707             node.powered = True
4708           elif self.op.command == constants.OOB_POWER_OFF:
4709             node.powered = False
4710           elif self.op.command == constants.OOB_POWER_STATUS:
4711             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4712             if powered != node.powered:
4713               logging.warning(("Recorded power state (%s) of node '%s' does not"
4714                                " match actual power state (%s)"), node.powered,
4715                               node.name, powered)
4716
4717           # For configuration changing commands we should update the node
4718           if self.op.command in (constants.OOB_POWER_ON,
4719                                  constants.OOB_POWER_OFF):
4720             self.cfg.Update(node, feedback_fn)
4721
4722           node_entry.append((constants.RS_NORMAL, result.payload))
4723
4724           if (self.op.command == constants.OOB_POWER_ON and
4725               idx < len(self.nodes) - 1):
4726             time.sleep(self.op.power_delay)
4727
4728     return ret
4729
4730   def _CheckPayload(self, result):
4731     """Checks if the payload is valid.
4732
4733     @param result: RPC result
4734     @raises errors.OpExecError: If payload is not valid
4735
4736     """
4737     errs = []
4738     if self.op.command == constants.OOB_HEALTH:
4739       if not isinstance(result.payload, list):
4740         errs.append("command 'health' is expected to return a list but got %s" %
4741                     type(result.payload))
4742       else:
4743         for item, status in result.payload:
4744           if status not in constants.OOB_STATUSES:
4745             errs.append("health item '%s' has invalid status '%s'" %
4746                         (item, status))
4747
4748     if self.op.command == constants.OOB_POWER_STATUS:
4749       if not isinstance(result.payload, dict):
4750         errs.append("power-status is expected to return a dict but got %s" %
4751                     type(result.payload))
4752
4753     if self.op.command in [
4754         constants.OOB_POWER_ON,
4755         constants.OOB_POWER_OFF,
4756         constants.OOB_POWER_CYCLE,
4757         ]:
4758       if result.payload is not None:
4759         errs.append("%s is expected to not return payload but got '%s'" %
4760                     (self.op.command, result.payload))
4761
4762     if errs:
4763       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4764                                utils.CommaJoin(errs))
4765
4766
4767 class _OsQuery(_QueryBase):
4768   FIELDS = query.OS_FIELDS
4769
4770   def ExpandNames(self, lu):
4771     # Lock all nodes in shared mode
4772     # Temporary removal of locks, should be reverted later
4773     # TODO: reintroduce locks when they are lighter-weight
4774     lu.needed_locks = {}
4775     #self.share_locks[locking.LEVEL_NODE] = 1
4776     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4777
4778     # The following variables interact with _QueryBase._GetNames
4779     if self.names:
4780       self.wanted = self.names
4781     else:
4782       self.wanted = locking.ALL_SET
4783
4784     self.do_locking = self.use_locking
4785
4786   def DeclareLocks(self, lu, level):
4787     pass
4788
4789   @staticmethod
4790   def _DiagnoseByOS(rlist):
4791     """Remaps a per-node return list into an a per-os per-node dictionary
4792
4793     @param rlist: a map with node names as keys and OS objects as values
4794
4795     @rtype: dict
4796     @return: a dictionary with osnames as keys and as value another
4797         map, with nodes as keys and tuples of (path, status, diagnose,
4798         variants, parameters, api_versions) as values, eg::
4799
4800           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4801                                      (/srv/..., False, "invalid api")],
4802                            "node2": [(/srv/..., True, "", [], [])]}
4803           }
4804
4805     """
4806     all_os = {}
4807     # we build here the list of nodes that didn't fail the RPC (at RPC
4808     # level), so that nodes with a non-responding node daemon don't
4809     # make all OSes invalid
4810     good_nodes = [node_name for node_name in rlist
4811                   if not rlist[node_name].fail_msg]
4812     for node_name, nr in rlist.items():
4813       if nr.fail_msg or not nr.payload:
4814         continue
4815       for (name, path, status, diagnose, variants,
4816            params, api_versions) in nr.payload:
4817         if name not in all_os:
4818           # build a list of nodes for this os containing empty lists
4819           # for each node in node_list
4820           all_os[name] = {}
4821           for nname in good_nodes:
4822             all_os[name][nname] = []
4823         # convert params from [name, help] to (name, help)
4824         params = [tuple(v) for v in params]
4825         all_os[name][node_name].append((path, status, diagnose,
4826                                         variants, params, api_versions))
4827     return all_os
4828
4829   def _GetQueryData(self, lu):
4830     """Computes the list of nodes and their attributes.
4831
4832     """
4833     # Locking is not used
4834     assert not (compat.any(lu.glm.is_owned(level)
4835                            for level in locking.LEVELS
4836                            if level != locking.LEVEL_CLUSTER) or
4837                 self.do_locking or self.use_locking)
4838
4839     valid_nodes = [node.name
4840                    for node in lu.cfg.GetAllNodesInfo().values()
4841                    if not node.offline and node.vm_capable]
4842     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4843     cluster = lu.cfg.GetClusterInfo()
4844
4845     data = {}
4846
4847     for (os_name, os_data) in pol.items():
4848       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4849                           hidden=(os_name in cluster.hidden_os),
4850                           blacklisted=(os_name in cluster.blacklisted_os))
4851
4852       variants = set()
4853       parameters = set()
4854       api_versions = set()
4855
4856       for idx, osl in enumerate(os_data.values()):
4857         info.valid = bool(info.valid and osl and osl[0][1])
4858         if not info.valid:
4859           break
4860
4861         (node_variants, node_params, node_api) = osl[0][3:6]
4862         if idx == 0:
4863           # First entry
4864           variants.update(node_variants)
4865           parameters.update(node_params)
4866           api_versions.update(node_api)
4867         else:
4868           # Filter out inconsistent values
4869           variants.intersection_update(node_variants)
4870           parameters.intersection_update(node_params)
4871           api_versions.intersection_update(node_api)
4872
4873       info.variants = list(variants)
4874       info.parameters = list(parameters)
4875       info.api_versions = list(api_versions)
4876
4877       data[os_name] = info
4878
4879     # Prepare data in requested order
4880     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4881             if name in data]
4882
4883
4884 class LUOsDiagnose(NoHooksLU):
4885   """Logical unit for OS diagnose/query.
4886
4887   """
4888   REQ_BGL = False
4889
4890   @staticmethod
4891   def _BuildFilter(fields, names):
4892     """Builds a filter for querying OSes.
4893
4894     """
4895     name_filter = qlang.MakeSimpleFilter("name", names)
4896
4897     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4898     # respective field is not requested
4899     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4900                      for fname in ["hidden", "blacklisted"]
4901                      if fname not in fields]
4902     if "valid" not in fields:
4903       status_filter.append([qlang.OP_TRUE, "valid"])
4904
4905     if status_filter:
4906       status_filter.insert(0, qlang.OP_AND)
4907     else:
4908       status_filter = None
4909
4910     if name_filter and status_filter:
4911       return [qlang.OP_AND, name_filter, status_filter]
4912     elif name_filter:
4913       return name_filter
4914     else:
4915       return status_filter
4916
4917   def CheckArguments(self):
4918     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4919                        self.op.output_fields, False)
4920
4921   def ExpandNames(self):
4922     self.oq.ExpandNames(self)
4923
4924   def Exec(self, feedback_fn):
4925     return self.oq.OldStyleQuery(self)
4926
4927
4928 class LUNodeRemove(LogicalUnit):
4929   """Logical unit for removing a node.
4930
4931   """
4932   HPATH = "node-remove"
4933   HTYPE = constants.HTYPE_NODE
4934
4935   def BuildHooksEnv(self):
4936     """Build hooks env.
4937
4938     """
4939     return {
4940       "OP_TARGET": self.op.node_name,
4941       "NODE_NAME": self.op.node_name,
4942       }
4943
4944   def BuildHooksNodes(self):
4945     """Build hooks nodes.
4946
4947     This doesn't run on the target node in the pre phase as a failed
4948     node would then be impossible to remove.
4949
4950     """
4951     all_nodes = self.cfg.GetNodeList()
4952     try:
4953       all_nodes.remove(self.op.node_name)
4954     except ValueError:
4955       pass
4956     return (all_nodes, all_nodes)
4957
4958   def CheckPrereq(self):
4959     """Check prerequisites.
4960
4961     This checks:
4962      - the node exists in the configuration
4963      - it does not have primary or secondary instances
4964      - it's not the master
4965
4966     Any errors are signaled by raising errors.OpPrereqError.
4967
4968     """
4969     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4970     node = self.cfg.GetNodeInfo(self.op.node_name)
4971     assert node is not None
4972
4973     masternode = self.cfg.GetMasterNode()
4974     if node.name == masternode:
4975       raise errors.OpPrereqError("Node is the master node, failover to another"
4976                                  " node is required", errors.ECODE_INVAL)
4977
4978     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4979       if node.name in instance.all_nodes:
4980         raise errors.OpPrereqError("Instance %s is still running on the node,"
4981                                    " please remove first" % instance_name,
4982                                    errors.ECODE_INVAL)
4983     self.op.node_name = node.name
4984     self.node = node
4985
4986   def Exec(self, feedback_fn):
4987     """Removes the node from the cluster.
4988
4989     """
4990     node = self.node
4991     logging.info("Stopping the node daemon and removing configs from node %s",
4992                  node.name)
4993
4994     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4995
4996     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4997       "Not owning BGL"
4998
4999     # Promote nodes to master candidate as needed
5000     _AdjustCandidatePool(self, exceptions=[node.name])
5001     self.context.RemoveNode(node.name)
5002
5003     # Run post hooks on the node before it's removed
5004     _RunPostHook(self, node.name)
5005
5006     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5007     msg = result.fail_msg
5008     if msg:
5009       self.LogWarning("Errors encountered on the remote node while leaving"
5010                       " the cluster: %s", msg)
5011
5012     # Remove node from our /etc/hosts
5013     if self.cfg.GetClusterInfo().modify_etc_hosts:
5014       master_node = self.cfg.GetMasterNode()
5015       result = self.rpc.call_etc_hosts_modify(master_node,
5016                                               constants.ETC_HOSTS_REMOVE,
5017                                               node.name, None)
5018       result.Raise("Can't update hosts file with new host data")
5019       _RedistributeAncillaryFiles(self)
5020
5021
5022 class _NodeQuery(_QueryBase):
5023   FIELDS = query.NODE_FIELDS
5024
5025   def ExpandNames(self, lu):
5026     lu.needed_locks = {}
5027     lu.share_locks = _ShareAll()
5028
5029     if self.names:
5030       self.wanted = _GetWantedNodes(lu, self.names)
5031     else:
5032       self.wanted = locking.ALL_SET
5033
5034     self.do_locking = (self.use_locking and
5035                        query.NQ_LIVE in self.requested_data)
5036
5037     if self.do_locking:
5038       # If any non-static field is requested we need to lock the nodes
5039       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5040
5041   def DeclareLocks(self, lu, level):
5042     pass
5043
5044   def _GetQueryData(self, lu):
5045     """Computes the list of nodes and their attributes.
5046
5047     """
5048     all_info = lu.cfg.GetAllNodesInfo()
5049
5050     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5051
5052     # Gather data as requested
5053     if query.NQ_LIVE in self.requested_data:
5054       # filter out non-vm_capable nodes
5055       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5056
5057       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5058                                         [lu.cfg.GetHypervisorType()])
5059       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5060                        for (name, nresult) in node_data.items()
5061                        if not nresult.fail_msg and nresult.payload)
5062     else:
5063       live_data = None
5064
5065     if query.NQ_INST in self.requested_data:
5066       node_to_primary = dict([(name, set()) for name in nodenames])
5067       node_to_secondary = dict([(name, set()) for name in nodenames])
5068
5069       inst_data = lu.cfg.GetAllInstancesInfo()
5070
5071       for inst in inst_data.values():
5072         if inst.primary_node in node_to_primary:
5073           node_to_primary[inst.primary_node].add(inst.name)
5074         for secnode in inst.secondary_nodes:
5075           if secnode in node_to_secondary:
5076             node_to_secondary[secnode].add(inst.name)
5077     else:
5078       node_to_primary = None
5079       node_to_secondary = None
5080
5081     if query.NQ_OOB in self.requested_data:
5082       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5083                          for name, node in all_info.iteritems())
5084     else:
5085       oob_support = None
5086
5087     if query.NQ_GROUP in self.requested_data:
5088       groups = lu.cfg.GetAllNodeGroupsInfo()
5089     else:
5090       groups = {}
5091
5092     return query.NodeQueryData([all_info[name] for name in nodenames],
5093                                live_data, lu.cfg.GetMasterNode(),
5094                                node_to_primary, node_to_secondary, groups,
5095                                oob_support, lu.cfg.GetClusterInfo())
5096
5097
5098 class LUNodeQuery(NoHooksLU):
5099   """Logical unit for querying nodes.
5100
5101   """
5102   # pylint: disable=W0142
5103   REQ_BGL = False
5104
5105   def CheckArguments(self):
5106     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5107                          self.op.output_fields, self.op.use_locking)
5108
5109   def ExpandNames(self):
5110     self.nq.ExpandNames(self)
5111
5112   def DeclareLocks(self, level):
5113     self.nq.DeclareLocks(self, level)
5114
5115   def Exec(self, feedback_fn):
5116     return self.nq.OldStyleQuery(self)
5117
5118
5119 class LUNodeQueryvols(NoHooksLU):
5120   """Logical unit for getting volumes on node(s).
5121
5122   """
5123   REQ_BGL = False
5124   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5125   _FIELDS_STATIC = utils.FieldSet("node")
5126
5127   def CheckArguments(self):
5128     _CheckOutputFields(static=self._FIELDS_STATIC,
5129                        dynamic=self._FIELDS_DYNAMIC,
5130                        selected=self.op.output_fields)
5131
5132   def ExpandNames(self):
5133     self.share_locks = _ShareAll()
5134     self.needed_locks = {}
5135
5136     if not self.op.nodes:
5137       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5138     else:
5139       self.needed_locks[locking.LEVEL_NODE] = \
5140         _GetWantedNodes(self, self.op.nodes)
5141
5142   def Exec(self, feedback_fn):
5143     """Computes the list of nodes and their attributes.
5144
5145     """
5146     nodenames = self.owned_locks(locking.LEVEL_NODE)
5147     volumes = self.rpc.call_node_volumes(nodenames)
5148
5149     ilist = self.cfg.GetAllInstancesInfo()
5150     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5151
5152     output = []
5153     for node in nodenames:
5154       nresult = volumes[node]
5155       if nresult.offline:
5156         continue
5157       msg = nresult.fail_msg
5158       if msg:
5159         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5160         continue
5161
5162       node_vols = sorted(nresult.payload,
5163                          key=operator.itemgetter("dev"))
5164
5165       for vol in node_vols:
5166         node_output = []
5167         for field in self.op.output_fields:
5168           if field == "node":
5169             val = node
5170           elif field == "phys":
5171             val = vol["dev"]
5172           elif field == "vg":
5173             val = vol["vg"]
5174           elif field == "name":
5175             val = vol["name"]
5176           elif field == "size":
5177             val = int(float(vol["size"]))
5178           elif field == "instance":
5179             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5180           else:
5181             raise errors.ParameterError(field)
5182           node_output.append(str(val))
5183
5184         output.append(node_output)
5185
5186     return output
5187
5188
5189 class LUNodeQueryStorage(NoHooksLU):
5190   """Logical unit for getting information on storage units on node(s).
5191
5192   """
5193   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5194   REQ_BGL = False
5195
5196   def CheckArguments(self):
5197     _CheckOutputFields(static=self._FIELDS_STATIC,
5198                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5199                        selected=self.op.output_fields)
5200
5201   def ExpandNames(self):
5202     self.share_locks = _ShareAll()
5203     self.needed_locks = {}
5204
5205     if self.op.nodes:
5206       self.needed_locks[locking.LEVEL_NODE] = \
5207         _GetWantedNodes(self, self.op.nodes)
5208     else:
5209       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5210
5211   def Exec(self, feedback_fn):
5212     """Computes the list of nodes and their attributes.
5213
5214     """
5215     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5216
5217     # Always get name to sort by
5218     if constants.SF_NAME in self.op.output_fields:
5219       fields = self.op.output_fields[:]
5220     else:
5221       fields = [constants.SF_NAME] + self.op.output_fields
5222
5223     # Never ask for node or type as it's only known to the LU
5224     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5225       while extra in fields:
5226         fields.remove(extra)
5227
5228     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5229     name_idx = field_idx[constants.SF_NAME]
5230
5231     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5232     data = self.rpc.call_storage_list(self.nodes,
5233                                       self.op.storage_type, st_args,
5234                                       self.op.name, fields)
5235
5236     result = []
5237
5238     for node in utils.NiceSort(self.nodes):
5239       nresult = data[node]
5240       if nresult.offline:
5241         continue
5242
5243       msg = nresult.fail_msg
5244       if msg:
5245         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5246         continue
5247
5248       rows = dict([(row[name_idx], row) for row in nresult.payload])
5249
5250       for name in utils.NiceSort(rows.keys()):
5251         row = rows[name]
5252
5253         out = []
5254
5255         for field in self.op.output_fields:
5256           if field == constants.SF_NODE:
5257             val = node
5258           elif field == constants.SF_TYPE:
5259             val = self.op.storage_type
5260           elif field in field_idx:
5261             val = row[field_idx[field]]
5262           else:
5263             raise errors.ParameterError(field)
5264
5265           out.append(val)
5266
5267         result.append(out)
5268
5269     return result
5270
5271
5272 class _InstanceQuery(_QueryBase):
5273   FIELDS = query.INSTANCE_FIELDS
5274
5275   def ExpandNames(self, lu):
5276     lu.needed_locks = {}
5277     lu.share_locks = _ShareAll()
5278
5279     if self.names:
5280       self.wanted = _GetWantedInstances(lu, self.names)
5281     else:
5282       self.wanted = locking.ALL_SET
5283
5284     self.do_locking = (self.use_locking and
5285                        query.IQ_LIVE in self.requested_data)
5286     if self.do_locking:
5287       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5288       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5289       lu.needed_locks[locking.LEVEL_NODE] = []
5290       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5291
5292     self.do_grouplocks = (self.do_locking and
5293                           query.IQ_NODES in self.requested_data)
5294
5295   def DeclareLocks(self, lu, level):
5296     if self.do_locking:
5297       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5298         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5299
5300         # Lock all groups used by instances optimistically; this requires going
5301         # via the node before it's locked, requiring verification later on
5302         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5303           set(group_uuid
5304               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5305               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5306       elif level == locking.LEVEL_NODE:
5307         lu._LockInstancesNodes() # pylint: disable=W0212
5308
5309   @staticmethod
5310   def _CheckGroupLocks(lu):
5311     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5312     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5313
5314     # Check if node groups for locked instances are still correct
5315     for instance_name in owned_instances:
5316       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5317
5318   def _GetQueryData(self, lu):
5319     """Computes the list of instances and their attributes.
5320
5321     """
5322     if self.do_grouplocks:
5323       self._CheckGroupLocks(lu)
5324
5325     cluster = lu.cfg.GetClusterInfo()
5326     all_info = lu.cfg.GetAllInstancesInfo()
5327
5328     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5329
5330     instance_list = [all_info[name] for name in instance_names]
5331     nodes = frozenset(itertools.chain(*(inst.all_nodes
5332                                         for inst in instance_list)))
5333     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5334     bad_nodes = []
5335     offline_nodes = []
5336     wrongnode_inst = set()
5337
5338     # Gather data as requested
5339     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5340       live_data = {}
5341       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5342       for name in nodes:
5343         result = node_data[name]
5344         if result.offline:
5345           # offline nodes will be in both lists
5346           assert result.fail_msg
5347           offline_nodes.append(name)
5348         if result.fail_msg:
5349           bad_nodes.append(name)
5350         elif result.payload:
5351           for inst in result.payload:
5352             if inst in all_info:
5353               if all_info[inst].primary_node == name:
5354                 live_data.update(result.payload)
5355               else:
5356                 wrongnode_inst.add(inst)
5357             else:
5358               # orphan instance; we don't list it here as we don't
5359               # handle this case yet in the output of instance listing
5360               logging.warning("Orphan instance '%s' found on node %s",
5361                               inst, name)
5362         # else no instance is alive
5363     else:
5364       live_data = {}
5365
5366     if query.IQ_DISKUSAGE in self.requested_data:
5367       disk_usage = dict((inst.name,
5368                          _ComputeDiskSize(inst.disk_template,
5369                                           [{constants.IDISK_SIZE: disk.size}
5370                                            for disk in inst.disks]))
5371                         for inst in instance_list)
5372     else:
5373       disk_usage = None
5374
5375     if query.IQ_CONSOLE in self.requested_data:
5376       consinfo = {}
5377       for inst in instance_list:
5378         if inst.name in live_data:
5379           # Instance is running
5380           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5381         else:
5382           consinfo[inst.name] = None
5383       assert set(consinfo.keys()) == set(instance_names)
5384     else:
5385       consinfo = None
5386
5387     if query.IQ_NODES in self.requested_data:
5388       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5389                                             instance_list)))
5390       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5391       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5392                     for uuid in set(map(operator.attrgetter("group"),
5393                                         nodes.values())))
5394     else:
5395       nodes = None
5396       groups = None
5397
5398     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5399                                    disk_usage, offline_nodes, bad_nodes,
5400                                    live_data, wrongnode_inst, consinfo,
5401                                    nodes, groups)
5402
5403
5404 class LUQuery(NoHooksLU):
5405   """Query for resources/items of a certain kind.
5406
5407   """
5408   # pylint: disable=W0142
5409   REQ_BGL = False
5410
5411   def CheckArguments(self):
5412     qcls = _GetQueryImplementation(self.op.what)
5413
5414     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5415
5416   def ExpandNames(self):
5417     self.impl.ExpandNames(self)
5418
5419   def DeclareLocks(self, level):
5420     self.impl.DeclareLocks(self, level)
5421
5422   def Exec(self, feedback_fn):
5423     return self.impl.NewStyleQuery(self)
5424
5425
5426 class LUQueryFields(NoHooksLU):
5427   """Query for resources/items of a certain kind.
5428
5429   """
5430   # pylint: disable=W0142
5431   REQ_BGL = False
5432
5433   def CheckArguments(self):
5434     self.qcls = _GetQueryImplementation(self.op.what)
5435
5436   def ExpandNames(self):
5437     self.needed_locks = {}
5438
5439   def Exec(self, feedback_fn):
5440     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5441
5442
5443 class LUNodeModifyStorage(NoHooksLU):
5444   """Logical unit for modifying a storage volume on a node.
5445
5446   """
5447   REQ_BGL = False
5448
5449   def CheckArguments(self):
5450     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5451
5452     storage_type = self.op.storage_type
5453
5454     try:
5455       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5456     except KeyError:
5457       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5458                                  " modified" % storage_type,
5459                                  errors.ECODE_INVAL)
5460
5461     diff = set(self.op.changes.keys()) - modifiable
5462     if diff:
5463       raise errors.OpPrereqError("The following fields can not be modified for"
5464                                  " storage units of type '%s': %r" %
5465                                  (storage_type, list(diff)),
5466                                  errors.ECODE_INVAL)
5467
5468   def ExpandNames(self):
5469     self.needed_locks = {
5470       locking.LEVEL_NODE: self.op.node_name,
5471       }
5472
5473   def Exec(self, feedback_fn):
5474     """Computes the list of nodes and their attributes.
5475
5476     """
5477     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5478     result = self.rpc.call_storage_modify(self.op.node_name,
5479                                           self.op.storage_type, st_args,
5480                                           self.op.name, self.op.changes)
5481     result.Raise("Failed to modify storage unit '%s' on %s" %
5482                  (self.op.name, self.op.node_name))
5483
5484
5485 class LUNodeAdd(LogicalUnit):
5486   """Logical unit for adding node to the cluster.
5487
5488   """
5489   HPATH = "node-add"
5490   HTYPE = constants.HTYPE_NODE
5491   _NFLAGS = ["master_capable", "vm_capable"]
5492
5493   def CheckArguments(self):
5494     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5495     # validate/normalize the node name
5496     self.hostname = netutils.GetHostname(name=self.op.node_name,
5497                                          family=self.primary_ip_family)
5498     self.op.node_name = self.hostname.name
5499
5500     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5501       raise errors.OpPrereqError("Cannot readd the master node",
5502                                  errors.ECODE_STATE)
5503
5504     if self.op.readd and self.op.group:
5505       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5506                                  " being readded", errors.ECODE_INVAL)
5507
5508   def BuildHooksEnv(self):
5509     """Build hooks env.
5510
5511     This will run on all nodes before, and on all nodes + the new node after.
5512
5513     """
5514     return {
5515       "OP_TARGET": self.op.node_name,
5516       "NODE_NAME": self.op.node_name,
5517       "NODE_PIP": self.op.primary_ip,
5518       "NODE_SIP": self.op.secondary_ip,
5519       "MASTER_CAPABLE": str(self.op.master_capable),
5520       "VM_CAPABLE": str(self.op.vm_capable),
5521       }
5522
5523   def BuildHooksNodes(self):
5524     """Build hooks nodes.
5525
5526     """
5527     # Exclude added node
5528     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5529     post_nodes = pre_nodes + [self.op.node_name, ]
5530
5531     return (pre_nodes, post_nodes)
5532
5533   def CheckPrereq(self):
5534     """Check prerequisites.
5535
5536     This checks:
5537      - the new node is not already in the config
5538      - it is resolvable
5539      - its parameters (single/dual homed) matches the cluster
5540
5541     Any errors are signaled by raising errors.OpPrereqError.
5542
5543     """
5544     cfg = self.cfg
5545     hostname = self.hostname
5546     node = hostname.name
5547     primary_ip = self.op.primary_ip = hostname.ip
5548     if self.op.secondary_ip is None:
5549       if self.primary_ip_family == netutils.IP6Address.family:
5550         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5551                                    " IPv4 address must be given as secondary",
5552                                    errors.ECODE_INVAL)
5553       self.op.secondary_ip = primary_ip
5554
5555     secondary_ip = self.op.secondary_ip
5556     if not netutils.IP4Address.IsValid(secondary_ip):
5557       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5558                                  " address" % secondary_ip, errors.ECODE_INVAL)
5559
5560     node_list = cfg.GetNodeList()
5561     if not self.op.readd and node in node_list:
5562       raise errors.OpPrereqError("Node %s is already in the configuration" %
5563                                  node, errors.ECODE_EXISTS)
5564     elif self.op.readd and node not in node_list:
5565       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5566                                  errors.ECODE_NOENT)
5567
5568     self.changed_primary_ip = False
5569
5570     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5571       if self.op.readd and node == existing_node_name:
5572         if existing_node.secondary_ip != secondary_ip:
5573           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5574                                      " address configuration as before",
5575                                      errors.ECODE_INVAL)
5576         if existing_node.primary_ip != primary_ip:
5577           self.changed_primary_ip = True
5578
5579         continue
5580
5581       if (existing_node.primary_ip == primary_ip or
5582           existing_node.secondary_ip == primary_ip or
5583           existing_node.primary_ip == secondary_ip or
5584           existing_node.secondary_ip == secondary_ip):
5585         raise errors.OpPrereqError("New node ip address(es) conflict with"
5586                                    " existing node %s" % existing_node.name,
5587                                    errors.ECODE_NOTUNIQUE)
5588
5589     # After this 'if' block, None is no longer a valid value for the
5590     # _capable op attributes
5591     if self.op.readd:
5592       old_node = self.cfg.GetNodeInfo(node)
5593       assert old_node is not None, "Can't retrieve locked node %s" % node
5594       for attr in self._NFLAGS:
5595         if getattr(self.op, attr) is None:
5596           setattr(self.op, attr, getattr(old_node, attr))
5597     else:
5598       for attr in self._NFLAGS:
5599         if getattr(self.op, attr) is None:
5600           setattr(self.op, attr, True)
5601
5602     if self.op.readd and not self.op.vm_capable:
5603       pri, sec = cfg.GetNodeInstances(node)
5604       if pri or sec:
5605         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5606                                    " flag set to false, but it already holds"
5607                                    " instances" % node,
5608                                    errors.ECODE_STATE)
5609
5610     # check that the type of the node (single versus dual homed) is the
5611     # same as for the master
5612     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5613     master_singlehomed = myself.secondary_ip == myself.primary_ip
5614     newbie_singlehomed = secondary_ip == primary_ip
5615     if master_singlehomed != newbie_singlehomed:
5616       if master_singlehomed:
5617         raise errors.OpPrereqError("The master has no secondary ip but the"
5618                                    " new node has one",
5619                                    errors.ECODE_INVAL)
5620       else:
5621         raise errors.OpPrereqError("The master has a secondary ip but the"
5622                                    " new node doesn't have one",
5623                                    errors.ECODE_INVAL)
5624
5625     # checks reachability
5626     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5627       raise errors.OpPrereqError("Node not reachable by ping",
5628                                  errors.ECODE_ENVIRON)
5629
5630     if not newbie_singlehomed:
5631       # check reachability from my secondary ip to newbie's secondary ip
5632       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5633                            source=myself.secondary_ip):
5634         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5635                                    " based ping to node daemon port",
5636                                    errors.ECODE_ENVIRON)
5637
5638     if self.op.readd:
5639       exceptions = [node]
5640     else:
5641       exceptions = []
5642
5643     if self.op.master_capable:
5644       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5645     else:
5646       self.master_candidate = False
5647
5648     if self.op.readd:
5649       self.new_node = old_node
5650     else:
5651       node_group = cfg.LookupNodeGroup(self.op.group)
5652       self.new_node = objects.Node(name=node,
5653                                    primary_ip=primary_ip,
5654                                    secondary_ip=secondary_ip,
5655                                    master_candidate=self.master_candidate,
5656                                    offline=False, drained=False,
5657                                    group=node_group)
5658
5659     if self.op.ndparams:
5660       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5661
5662     if self.op.hv_state:
5663       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5664
5665     if self.op.disk_state:
5666       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5667
5668     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5669     #       it a property on the base class.
5670     result = rpc.DnsOnlyRunner().call_version([node])[node]
5671     result.Raise("Can't get version information from node %s" % node)
5672     if constants.PROTOCOL_VERSION == result.payload:
5673       logging.info("Communication to node %s fine, sw version %s match",
5674                    node, result.payload)
5675     else:
5676       raise errors.OpPrereqError("Version mismatch master version %s,"
5677                                  " node version %s" %
5678                                  (constants.PROTOCOL_VERSION, result.payload),
5679                                  errors.ECODE_ENVIRON)
5680
5681   def Exec(self, feedback_fn):
5682     """Adds the new node to the cluster.
5683
5684     """
5685     new_node = self.new_node
5686     node = new_node.name
5687
5688     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5689       "Not owning BGL"
5690
5691     # We adding a new node so we assume it's powered
5692     new_node.powered = True
5693
5694     # for re-adds, reset the offline/drained/master-candidate flags;
5695     # we need to reset here, otherwise offline would prevent RPC calls
5696     # later in the procedure; this also means that if the re-add
5697     # fails, we are left with a non-offlined, broken node
5698     if self.op.readd:
5699       new_node.drained = new_node.offline = False # pylint: disable=W0201
5700       self.LogInfo("Readding a node, the offline/drained flags were reset")
5701       # if we demote the node, we do cleanup later in the procedure
5702       new_node.master_candidate = self.master_candidate
5703       if self.changed_primary_ip:
5704         new_node.primary_ip = self.op.primary_ip
5705
5706     # copy the master/vm_capable flags
5707     for attr in self._NFLAGS:
5708       setattr(new_node, attr, getattr(self.op, attr))
5709
5710     # notify the user about any possible mc promotion
5711     if new_node.master_candidate:
5712       self.LogInfo("Node will be a master candidate")
5713
5714     if self.op.ndparams:
5715       new_node.ndparams = self.op.ndparams
5716     else:
5717       new_node.ndparams = {}
5718
5719     if self.op.hv_state:
5720       new_node.hv_state_static = self.new_hv_state
5721
5722     if self.op.disk_state:
5723       new_node.disk_state_static = self.new_disk_state
5724
5725     # Add node to our /etc/hosts, and add key to known_hosts
5726     if self.cfg.GetClusterInfo().modify_etc_hosts:
5727       master_node = self.cfg.GetMasterNode()
5728       result = self.rpc.call_etc_hosts_modify(master_node,
5729                                               constants.ETC_HOSTS_ADD,
5730                                               self.hostname.name,
5731                                               self.hostname.ip)
5732       result.Raise("Can't update hosts file with new host data")
5733
5734     if new_node.secondary_ip != new_node.primary_ip:
5735       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5736                                False)
5737
5738     node_verify_list = [self.cfg.GetMasterNode()]
5739     node_verify_param = {
5740       constants.NV_NODELIST: ([node], {}),
5741       # TODO: do a node-net-test as well?
5742     }
5743
5744     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5745                                        self.cfg.GetClusterName())
5746     for verifier in node_verify_list:
5747       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5748       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5749       if nl_payload:
5750         for failed in nl_payload:
5751           feedback_fn("ssh/hostname verification failed"
5752                       " (checking from %s): %s" %
5753                       (verifier, nl_payload[failed]))
5754         raise errors.OpExecError("ssh/hostname verification failed")
5755
5756     if self.op.readd:
5757       _RedistributeAncillaryFiles(self)
5758       self.context.ReaddNode(new_node)
5759       # make sure we redistribute the config
5760       self.cfg.Update(new_node, feedback_fn)
5761       # and make sure the new node will not have old files around
5762       if not new_node.master_candidate:
5763         result = self.rpc.call_node_demote_from_mc(new_node.name)
5764         msg = result.fail_msg
5765         if msg:
5766           self.LogWarning("Node failed to demote itself from master"
5767                           " candidate status: %s" % msg)
5768     else:
5769       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5770                                   additional_vm=self.op.vm_capable)
5771       self.context.AddNode(new_node, self.proc.GetECId())
5772
5773
5774 class LUNodeSetParams(LogicalUnit):
5775   """Modifies the parameters of a node.
5776
5777   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5778       to the node role (as _ROLE_*)
5779   @cvar _R2F: a dictionary from node role to tuples of flags
5780   @cvar _FLAGS: a list of attribute names corresponding to the flags
5781
5782   """
5783   HPATH = "node-modify"
5784   HTYPE = constants.HTYPE_NODE
5785   REQ_BGL = False
5786   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5787   _F2R = {
5788     (True, False, False): _ROLE_CANDIDATE,
5789     (False, True, False): _ROLE_DRAINED,
5790     (False, False, True): _ROLE_OFFLINE,
5791     (False, False, False): _ROLE_REGULAR,
5792     }
5793   _R2F = dict((v, k) for k, v in _F2R.items())
5794   _FLAGS = ["master_candidate", "drained", "offline"]
5795
5796   def CheckArguments(self):
5797     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5798     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5799                 self.op.master_capable, self.op.vm_capable,
5800                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5801                 self.op.disk_state]
5802     if all_mods.count(None) == len(all_mods):
5803       raise errors.OpPrereqError("Please pass at least one modification",
5804                                  errors.ECODE_INVAL)
5805     if all_mods.count(True) > 1:
5806       raise errors.OpPrereqError("Can't set the node into more than one"
5807                                  " state at the same time",
5808                                  errors.ECODE_INVAL)
5809
5810     # Boolean value that tells us whether we might be demoting from MC
5811     self.might_demote = (self.op.master_candidate == False or
5812                          self.op.offline == True or
5813                          self.op.drained == True or
5814                          self.op.master_capable == False)
5815
5816     if self.op.secondary_ip:
5817       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5818         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5819                                    " address" % self.op.secondary_ip,
5820                                    errors.ECODE_INVAL)
5821
5822     self.lock_all = self.op.auto_promote and self.might_demote
5823     self.lock_instances = self.op.secondary_ip is not None
5824
5825   def _InstanceFilter(self, instance):
5826     """Filter for getting affected instances.
5827
5828     """
5829     return (instance.disk_template in constants.DTS_INT_MIRROR and
5830             self.op.node_name in instance.all_nodes)
5831
5832   def ExpandNames(self):
5833     if self.lock_all:
5834       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5835     else:
5836       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5837
5838     # Since modifying a node can have severe effects on currently running
5839     # operations the resource lock is at least acquired in shared mode
5840     self.needed_locks[locking.LEVEL_NODE_RES] = \
5841       self.needed_locks[locking.LEVEL_NODE]
5842
5843     # Get node resource and instance locks in shared mode; they are not used
5844     # for anything but read-only access
5845     self.share_locks[locking.LEVEL_NODE_RES] = 1
5846     self.share_locks[locking.LEVEL_INSTANCE] = 1
5847
5848     if self.lock_instances:
5849       self.needed_locks[locking.LEVEL_INSTANCE] = \
5850         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5851
5852   def BuildHooksEnv(self):
5853     """Build hooks env.
5854
5855     This runs on the master node.
5856
5857     """
5858     return {
5859       "OP_TARGET": self.op.node_name,
5860       "MASTER_CANDIDATE": str(self.op.master_candidate),
5861       "OFFLINE": str(self.op.offline),
5862       "DRAINED": str(self.op.drained),
5863       "MASTER_CAPABLE": str(self.op.master_capable),
5864       "VM_CAPABLE": str(self.op.vm_capable),
5865       }
5866
5867   def BuildHooksNodes(self):
5868     """Build hooks nodes.
5869
5870     """
5871     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5872     return (nl, nl)
5873
5874   def CheckPrereq(self):
5875     """Check prerequisites.
5876
5877     This only checks the instance list against the existing names.
5878
5879     """
5880     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5881
5882     if self.lock_instances:
5883       affected_instances = \
5884         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5885
5886       # Verify instance locks
5887       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5888       wanted_instances = frozenset(affected_instances.keys())
5889       if wanted_instances - owned_instances:
5890         raise errors.OpPrereqError("Instances affected by changing node %s's"
5891                                    " secondary IP address have changed since"
5892                                    " locks were acquired, wanted '%s', have"
5893                                    " '%s'; retry the operation" %
5894                                    (self.op.node_name,
5895                                     utils.CommaJoin(wanted_instances),
5896                                     utils.CommaJoin(owned_instances)),
5897                                    errors.ECODE_STATE)
5898     else:
5899       affected_instances = None
5900
5901     if (self.op.master_candidate is not None or
5902         self.op.drained is not None or
5903         self.op.offline is not None):
5904       # we can't change the master's node flags
5905       if self.op.node_name == self.cfg.GetMasterNode():
5906         raise errors.OpPrereqError("The master role can be changed"
5907                                    " only via master-failover",
5908                                    errors.ECODE_INVAL)
5909
5910     if self.op.master_candidate and not node.master_capable:
5911       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5912                                  " it a master candidate" % node.name,
5913                                  errors.ECODE_STATE)
5914
5915     if self.op.vm_capable == False:
5916       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5917       if ipri or isec:
5918         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5919                                    " the vm_capable flag" % node.name,
5920                                    errors.ECODE_STATE)
5921
5922     if node.master_candidate and self.might_demote and not self.lock_all:
5923       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5924       # check if after removing the current node, we're missing master
5925       # candidates
5926       (mc_remaining, mc_should, _) = \
5927           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5928       if mc_remaining < mc_should:
5929         raise errors.OpPrereqError("Not enough master candidates, please"
5930                                    " pass auto promote option to allow"
5931                                    " promotion", errors.ECODE_STATE)
5932
5933     self.old_flags = old_flags = (node.master_candidate,
5934                                   node.drained, node.offline)
5935     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5936     self.old_role = old_role = self._F2R[old_flags]
5937
5938     # Check for ineffective changes
5939     for attr in self._FLAGS:
5940       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5941         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5942         setattr(self.op, attr, None)
5943
5944     # Past this point, any flag change to False means a transition
5945     # away from the respective state, as only real changes are kept
5946
5947     # TODO: We might query the real power state if it supports OOB
5948     if _SupportsOob(self.cfg, node):
5949       if self.op.offline is False and not (node.powered or
5950                                            self.op.powered == True):
5951         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5952                                     " offline status can be reset") %
5953                                    self.op.node_name)
5954     elif self.op.powered is not None:
5955       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5956                                   " as it does not support out-of-band"
5957                                   " handling") % self.op.node_name)
5958
5959     # If we're being deofflined/drained, we'll MC ourself if needed
5960     if (self.op.drained == False or self.op.offline == False or
5961         (self.op.master_capable and not node.master_capable)):
5962       if _DecideSelfPromotion(self):
5963         self.op.master_candidate = True
5964         self.LogInfo("Auto-promoting node to master candidate")
5965
5966     # If we're no longer master capable, we'll demote ourselves from MC
5967     if self.op.master_capable == False and node.master_candidate:
5968       self.LogInfo("Demoting from master candidate")
5969       self.op.master_candidate = False
5970
5971     # Compute new role
5972     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5973     if self.op.master_candidate:
5974       new_role = self._ROLE_CANDIDATE
5975     elif self.op.drained:
5976       new_role = self._ROLE_DRAINED
5977     elif self.op.offline:
5978       new_role = self._ROLE_OFFLINE
5979     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5980       # False is still in new flags, which means we're un-setting (the
5981       # only) True flag
5982       new_role = self._ROLE_REGULAR
5983     else: # no new flags, nothing, keep old role
5984       new_role = old_role
5985
5986     self.new_role = new_role
5987
5988     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5989       # Trying to transition out of offline status
5990       result = self.rpc.call_version([node.name])[node.name]
5991       if result.fail_msg:
5992         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5993                                    " to report its version: %s" %
5994                                    (node.name, result.fail_msg),
5995                                    errors.ECODE_STATE)
5996       else:
5997         self.LogWarning("Transitioning node from offline to online state"
5998                         " without using re-add. Please make sure the node"
5999                         " is healthy!")
6000
6001     if self.op.secondary_ip:
6002       # Ok even without locking, because this can't be changed by any LU
6003       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6004       master_singlehomed = master.secondary_ip == master.primary_ip
6005       if master_singlehomed and self.op.secondary_ip:
6006         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6007                                    " homed cluster", errors.ECODE_INVAL)
6008
6009       assert not (frozenset(affected_instances) -
6010                   self.owned_locks(locking.LEVEL_INSTANCE))
6011
6012       if node.offline:
6013         if affected_instances:
6014           raise errors.OpPrereqError("Cannot change secondary IP address:"
6015                                      " offline node has instances (%s)"
6016                                      " configured to use it" %
6017                                      utils.CommaJoin(affected_instances.keys()))
6018       else:
6019         # On online nodes, check that no instances are running, and that
6020         # the node has the new ip and we can reach it.
6021         for instance in affected_instances.values():
6022           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6023                               msg="cannot change secondary ip")
6024
6025         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6026         if master.name != node.name:
6027           # check reachability from master secondary ip to new secondary ip
6028           if not netutils.TcpPing(self.op.secondary_ip,
6029                                   constants.DEFAULT_NODED_PORT,
6030                                   source=master.secondary_ip):
6031             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6032                                        " based ping to node daemon port",
6033                                        errors.ECODE_ENVIRON)
6034
6035     if self.op.ndparams:
6036       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6037       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6038       self.new_ndparams = new_ndparams
6039
6040     if self.op.hv_state:
6041       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6042                                                  self.node.hv_state_static)
6043
6044     if self.op.disk_state:
6045       self.new_disk_state = \
6046         _MergeAndVerifyDiskState(self.op.disk_state,
6047                                  self.node.disk_state_static)
6048
6049   def Exec(self, feedback_fn):
6050     """Modifies a node.
6051
6052     """
6053     node = self.node
6054     old_role = self.old_role
6055     new_role = self.new_role
6056
6057     result = []
6058
6059     if self.op.ndparams:
6060       node.ndparams = self.new_ndparams
6061
6062     if self.op.powered is not None:
6063       node.powered = self.op.powered
6064
6065     if self.op.hv_state:
6066       node.hv_state_static = self.new_hv_state
6067
6068     if self.op.disk_state:
6069       node.disk_state_static = self.new_disk_state
6070
6071     for attr in ["master_capable", "vm_capable"]:
6072       val = getattr(self.op, attr)
6073       if val is not None:
6074         setattr(node, attr, val)
6075         result.append((attr, str(val)))
6076
6077     if new_role != old_role:
6078       # Tell the node to demote itself, if no longer MC and not offline
6079       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6080         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6081         if msg:
6082           self.LogWarning("Node failed to demote itself: %s", msg)
6083
6084       new_flags = self._R2F[new_role]
6085       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6086         if of != nf:
6087           result.append((desc, str(nf)))
6088       (node.master_candidate, node.drained, node.offline) = new_flags
6089
6090       # we locked all nodes, we adjust the CP before updating this node
6091       if self.lock_all:
6092         _AdjustCandidatePool(self, [node.name])
6093
6094     if self.op.secondary_ip:
6095       node.secondary_ip = self.op.secondary_ip
6096       result.append(("secondary_ip", self.op.secondary_ip))
6097
6098     # this will trigger configuration file update, if needed
6099     self.cfg.Update(node, feedback_fn)
6100
6101     # this will trigger job queue propagation or cleanup if the mc
6102     # flag changed
6103     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6104       self.context.ReaddNode(node)
6105
6106     return result
6107
6108
6109 class LUNodePowercycle(NoHooksLU):
6110   """Powercycles a node.
6111
6112   """
6113   REQ_BGL = False
6114
6115   def CheckArguments(self):
6116     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6117     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6118       raise errors.OpPrereqError("The node is the master and the force"
6119                                  " parameter was not set",
6120                                  errors.ECODE_INVAL)
6121
6122   def ExpandNames(self):
6123     """Locking for PowercycleNode.
6124
6125     This is a last-resort option and shouldn't block on other
6126     jobs. Therefore, we grab no locks.
6127
6128     """
6129     self.needed_locks = {}
6130
6131   def Exec(self, feedback_fn):
6132     """Reboots a node.
6133
6134     """
6135     result = self.rpc.call_node_powercycle(self.op.node_name,
6136                                            self.cfg.GetHypervisorType())
6137     result.Raise("Failed to schedule the reboot")
6138     return result.payload
6139
6140
6141 class LUClusterQuery(NoHooksLU):
6142   """Query cluster configuration.
6143
6144   """
6145   REQ_BGL = False
6146
6147   def ExpandNames(self):
6148     self.needed_locks = {}
6149
6150   def Exec(self, feedback_fn):
6151     """Return cluster config.
6152
6153     """
6154     cluster = self.cfg.GetClusterInfo()
6155     os_hvp = {}
6156
6157     # Filter just for enabled hypervisors
6158     for os_name, hv_dict in cluster.os_hvp.items():
6159       os_hvp[os_name] = {}
6160       for hv_name, hv_params in hv_dict.items():
6161         if hv_name in cluster.enabled_hypervisors:
6162           os_hvp[os_name][hv_name] = hv_params
6163
6164     # Convert ip_family to ip_version
6165     primary_ip_version = constants.IP4_VERSION
6166     if cluster.primary_ip_family == netutils.IP6Address.family:
6167       primary_ip_version = constants.IP6_VERSION
6168
6169     result = {
6170       "software_version": constants.RELEASE_VERSION,
6171       "protocol_version": constants.PROTOCOL_VERSION,
6172       "config_version": constants.CONFIG_VERSION,
6173       "os_api_version": max(constants.OS_API_VERSIONS),
6174       "export_version": constants.EXPORT_VERSION,
6175       "architecture": runtime.GetArchInfo(),
6176       "name": cluster.cluster_name,
6177       "master": cluster.master_node,
6178       "default_hypervisor": cluster.primary_hypervisor,
6179       "enabled_hypervisors": cluster.enabled_hypervisors,
6180       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6181                         for hypervisor_name in cluster.enabled_hypervisors]),
6182       "os_hvp": os_hvp,
6183       "beparams": cluster.beparams,
6184       "osparams": cluster.osparams,
6185       "ipolicy": cluster.ipolicy,
6186       "nicparams": cluster.nicparams,
6187       "ndparams": cluster.ndparams,
6188       "diskparams": cluster.diskparams,
6189       "candidate_pool_size": cluster.candidate_pool_size,
6190       "master_netdev": cluster.master_netdev,
6191       "master_netmask": cluster.master_netmask,
6192       "use_external_mip_script": cluster.use_external_mip_script,
6193       "volume_group_name": cluster.volume_group_name,
6194       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6195       "file_storage_dir": cluster.file_storage_dir,
6196       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6197       "maintain_node_health": cluster.maintain_node_health,
6198       "ctime": cluster.ctime,
6199       "mtime": cluster.mtime,
6200       "uuid": cluster.uuid,
6201       "tags": list(cluster.GetTags()),
6202       "uid_pool": cluster.uid_pool,
6203       "default_iallocator": cluster.default_iallocator,
6204       "reserved_lvs": cluster.reserved_lvs,
6205       "primary_ip_version": primary_ip_version,
6206       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6207       "hidden_os": cluster.hidden_os,
6208       "blacklisted_os": cluster.blacklisted_os,
6209       }
6210
6211     return result
6212
6213
6214 class LUClusterConfigQuery(NoHooksLU):
6215   """Return configuration values.
6216
6217   """
6218   REQ_BGL = False
6219
6220   def CheckArguments(self):
6221     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6222
6223   def ExpandNames(self):
6224     self.cq.ExpandNames(self)
6225
6226   def DeclareLocks(self, level):
6227     self.cq.DeclareLocks(self, level)
6228
6229   def Exec(self, feedback_fn):
6230     result = self.cq.OldStyleQuery(self)
6231
6232     assert len(result) == 1
6233
6234     return result[0]
6235
6236
6237 class _ClusterQuery(_QueryBase):
6238   FIELDS = query.CLUSTER_FIELDS
6239
6240   #: Do not sort (there is only one item)
6241   SORT_FIELD = None
6242
6243   def ExpandNames(self, lu):
6244     lu.needed_locks = {}
6245
6246     # The following variables interact with _QueryBase._GetNames
6247     self.wanted = locking.ALL_SET
6248     self.do_locking = self.use_locking
6249
6250     if self.do_locking:
6251       raise errors.OpPrereqError("Can not use locking for cluster queries",
6252                                  errors.ECODE_INVAL)
6253
6254   def DeclareLocks(self, lu, level):
6255     pass
6256
6257   def _GetQueryData(self, lu):
6258     """Computes the list of nodes and their attributes.
6259
6260     """
6261     # Locking is not used
6262     assert not (compat.any(lu.glm.is_owned(level)
6263                            for level in locking.LEVELS
6264                            if level != locking.LEVEL_CLUSTER) or
6265                 self.do_locking or self.use_locking)
6266
6267     if query.CQ_CONFIG in self.requested_data:
6268       cluster = lu.cfg.GetClusterInfo()
6269     else:
6270       cluster = NotImplemented
6271
6272     if query.CQ_QUEUE_DRAINED in self.requested_data:
6273       drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6274     else:
6275       drain_flag = NotImplemented
6276
6277     if query.CQ_WATCHER_PAUSE in self.requested_data:
6278       watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6279     else:
6280       watcher_pause = NotImplemented
6281
6282     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6283
6284
6285 class LUInstanceActivateDisks(NoHooksLU):
6286   """Bring up an instance's disks.
6287
6288   """
6289   REQ_BGL = False
6290
6291   def ExpandNames(self):
6292     self._ExpandAndLockInstance()
6293     self.needed_locks[locking.LEVEL_NODE] = []
6294     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6295
6296   def DeclareLocks(self, level):
6297     if level == locking.LEVEL_NODE:
6298       self._LockInstancesNodes()
6299
6300   def CheckPrereq(self):
6301     """Check prerequisites.
6302
6303     This checks that the instance is in the cluster.
6304
6305     """
6306     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6307     assert self.instance is not None, \
6308       "Cannot retrieve locked instance %s" % self.op.instance_name
6309     _CheckNodeOnline(self, self.instance.primary_node)
6310
6311   def Exec(self, feedback_fn):
6312     """Activate the disks.
6313
6314     """
6315     disks_ok, disks_info = \
6316               _AssembleInstanceDisks(self, self.instance,
6317                                      ignore_size=self.op.ignore_size)
6318     if not disks_ok:
6319       raise errors.OpExecError("Cannot activate block devices")
6320
6321     return disks_info
6322
6323
6324 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6325                            ignore_size=False):
6326   """Prepare the block devices for an instance.
6327
6328   This sets up the block devices on all nodes.
6329
6330   @type lu: L{LogicalUnit}
6331   @param lu: the logical unit on whose behalf we execute
6332   @type instance: L{objects.Instance}
6333   @param instance: the instance for whose disks we assemble
6334   @type disks: list of L{objects.Disk} or None
6335   @param disks: which disks to assemble (or all, if None)
6336   @type ignore_secondaries: boolean
6337   @param ignore_secondaries: if true, errors on secondary nodes
6338       won't result in an error return from the function
6339   @type ignore_size: boolean
6340   @param ignore_size: if true, the current known size of the disk
6341       will not be used during the disk activation, useful for cases
6342       when the size is wrong
6343   @return: False if the operation failed, otherwise a list of
6344       (host, instance_visible_name, node_visible_name)
6345       with the mapping from node devices to instance devices
6346
6347   """
6348   device_info = []
6349   disks_ok = True
6350   iname = instance.name
6351   disks = _ExpandCheckDisks(instance, disks)
6352
6353   # With the two passes mechanism we try to reduce the window of
6354   # opportunity for the race condition of switching DRBD to primary
6355   # before handshaking occured, but we do not eliminate it
6356
6357   # The proper fix would be to wait (with some limits) until the
6358   # connection has been made and drbd transitions from WFConnection
6359   # into any other network-connected state (Connected, SyncTarget,
6360   # SyncSource, etc.)
6361
6362   # 1st pass, assemble on all nodes in secondary mode
6363   for idx, inst_disk in enumerate(disks):
6364     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6365       if ignore_size:
6366         node_disk = node_disk.Copy()
6367         node_disk.UnsetSize()
6368       lu.cfg.SetDiskID(node_disk, node)
6369       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6370                                              False, idx)
6371       msg = result.fail_msg
6372       if msg:
6373         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6374                            " (is_primary=False, pass=1): %s",
6375                            inst_disk.iv_name, node, msg)
6376         if not ignore_secondaries:
6377           disks_ok = False
6378
6379   # FIXME: race condition on drbd migration to primary
6380
6381   # 2nd pass, do only the primary node
6382   for idx, inst_disk in enumerate(disks):
6383     dev_path = None
6384
6385     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6386       if node != instance.primary_node:
6387         continue
6388       if ignore_size:
6389         node_disk = node_disk.Copy()
6390         node_disk.UnsetSize()
6391       lu.cfg.SetDiskID(node_disk, node)
6392       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6393                                              True, idx)
6394       msg = result.fail_msg
6395       if msg:
6396         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6397                            " (is_primary=True, pass=2): %s",
6398                            inst_disk.iv_name, node, msg)
6399         disks_ok = False
6400       else:
6401         dev_path = result.payload
6402
6403     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6404
6405   # leave the disks configured for the primary node
6406   # this is a workaround that would be fixed better by
6407   # improving the logical/physical id handling
6408   for disk in disks:
6409     lu.cfg.SetDiskID(disk, instance.primary_node)
6410
6411   return disks_ok, device_info
6412
6413
6414 def _StartInstanceDisks(lu, instance, force):
6415   """Start the disks of an instance.
6416
6417   """
6418   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6419                                            ignore_secondaries=force)
6420   if not disks_ok:
6421     _ShutdownInstanceDisks(lu, instance)
6422     if force is not None and not force:
6423       lu.proc.LogWarning("", hint="If the message above refers to a"
6424                          " secondary node,"
6425                          " you can retry the operation using '--force'.")
6426     raise errors.OpExecError("Disk consistency error")
6427
6428
6429 class LUInstanceDeactivateDisks(NoHooksLU):
6430   """Shutdown an instance's disks.
6431
6432   """
6433   REQ_BGL = False
6434
6435   def ExpandNames(self):
6436     self._ExpandAndLockInstance()
6437     self.needed_locks[locking.LEVEL_NODE] = []
6438     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6439
6440   def DeclareLocks(self, level):
6441     if level == locking.LEVEL_NODE:
6442       self._LockInstancesNodes()
6443
6444   def CheckPrereq(self):
6445     """Check prerequisites.
6446
6447     This checks that the instance is in the cluster.
6448
6449     """
6450     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6451     assert self.instance is not None, \
6452       "Cannot retrieve locked instance %s" % self.op.instance_name
6453
6454   def Exec(self, feedback_fn):
6455     """Deactivate the disks
6456
6457     """
6458     instance = self.instance
6459     if self.op.force:
6460       _ShutdownInstanceDisks(self, instance)
6461     else:
6462       _SafeShutdownInstanceDisks(self, instance)
6463
6464
6465 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6466   """Shutdown block devices of an instance.
6467
6468   This function checks if an instance is running, before calling
6469   _ShutdownInstanceDisks.
6470
6471   """
6472   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6473   _ShutdownInstanceDisks(lu, instance, disks=disks)
6474
6475
6476 def _ExpandCheckDisks(instance, disks):
6477   """Return the instance disks selected by the disks list
6478
6479   @type disks: list of L{objects.Disk} or None
6480   @param disks: selected disks
6481   @rtype: list of L{objects.Disk}
6482   @return: selected instance disks to act on
6483
6484   """
6485   if disks is None:
6486     return instance.disks
6487   else:
6488     if not set(disks).issubset(instance.disks):
6489       raise errors.ProgrammerError("Can only act on disks belonging to the"
6490                                    " target instance")
6491     return disks
6492
6493
6494 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6495   """Shutdown block devices of an instance.
6496
6497   This does the shutdown on all nodes of the instance.
6498
6499   If the ignore_primary is false, errors on the primary node are
6500   ignored.
6501
6502   """
6503   all_result = True
6504   disks = _ExpandCheckDisks(instance, disks)
6505
6506   for disk in disks:
6507     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6508       lu.cfg.SetDiskID(top_disk, node)
6509       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6510       msg = result.fail_msg
6511       if msg:
6512         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6513                       disk.iv_name, node, msg)
6514         if ((node == instance.primary_node and not ignore_primary) or
6515             (node != instance.primary_node and not result.offline)):
6516           all_result = False
6517   return all_result
6518
6519
6520 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6521   """Checks if a node has enough free memory.
6522
6523   This function check if a given node has the needed amount of free
6524   memory. In case the node has less memory or we cannot get the
6525   information from the node, this function raise an OpPrereqError
6526   exception.
6527
6528   @type lu: C{LogicalUnit}
6529   @param lu: a logical unit from which we get configuration data
6530   @type node: C{str}
6531   @param node: the node to check
6532   @type reason: C{str}
6533   @param reason: string to use in the error message
6534   @type requested: C{int}
6535   @param requested: the amount of memory in MiB to check for
6536   @type hypervisor_name: C{str}
6537   @param hypervisor_name: the hypervisor to ask for memory stats
6538   @rtype: integer
6539   @return: node current free memory
6540   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6541       we cannot check the node
6542
6543   """
6544   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6545   nodeinfo[node].Raise("Can't get data from node %s" % node,
6546                        prereq=True, ecode=errors.ECODE_ENVIRON)
6547   (_, _, (hv_info, )) = nodeinfo[node].payload
6548
6549   free_mem = hv_info.get("memory_free", None)
6550   if not isinstance(free_mem, int):
6551     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6552                                " was '%s'" % (node, free_mem),
6553                                errors.ECODE_ENVIRON)
6554   if requested > free_mem:
6555     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6556                                " needed %s MiB, available %s MiB" %
6557                                (node, reason, requested, free_mem),
6558                                errors.ECODE_NORES)
6559   return free_mem
6560
6561
6562 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6563   """Checks if nodes have enough free disk space in the all VGs.
6564
6565   This function check if all given nodes have the needed amount of
6566   free disk. In case any node has less disk or we cannot get the
6567   information from the node, this function raise an OpPrereqError
6568   exception.
6569
6570   @type lu: C{LogicalUnit}
6571   @param lu: a logical unit from which we get configuration data
6572   @type nodenames: C{list}
6573   @param nodenames: the list of node names to check
6574   @type req_sizes: C{dict}
6575   @param req_sizes: the hash of vg and corresponding amount of disk in
6576       MiB to check for
6577   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6578       or we cannot check the node
6579
6580   """
6581   for vg, req_size in req_sizes.items():
6582     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6583
6584
6585 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6586   """Checks if nodes have enough free disk space in the specified VG.
6587
6588   This function check if all given nodes have the needed amount of
6589   free disk. In case any node has less disk or we cannot get the
6590   information from the node, this function raise an OpPrereqError
6591   exception.
6592
6593   @type lu: C{LogicalUnit}
6594   @param lu: a logical unit from which we get configuration data
6595   @type nodenames: C{list}
6596   @param nodenames: the list of node names to check
6597   @type vg: C{str}
6598   @param vg: the volume group to check
6599   @type requested: C{int}
6600   @param requested: the amount of disk in MiB to check for
6601   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6602       or we cannot check the node
6603
6604   """
6605   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6606   for node in nodenames:
6607     info = nodeinfo[node]
6608     info.Raise("Cannot get current information from node %s" % node,
6609                prereq=True, ecode=errors.ECODE_ENVIRON)
6610     (_, (vg_info, ), _) = info.payload
6611     vg_free = vg_info.get("vg_free", None)
6612     if not isinstance(vg_free, int):
6613       raise errors.OpPrereqError("Can't compute free disk space on node"
6614                                  " %s for vg %s, result was '%s'" %
6615                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6616     if requested > vg_free:
6617       raise errors.OpPrereqError("Not enough disk space on target node %s"
6618                                  " vg %s: required %d MiB, available %d MiB" %
6619                                  (node, vg, requested, vg_free),
6620                                  errors.ECODE_NORES)
6621
6622
6623 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6624   """Checks if nodes have enough physical CPUs
6625
6626   This function checks if all given nodes have the needed number of
6627   physical CPUs. In case any node has less CPUs or we cannot get the
6628   information from the node, this function raises an OpPrereqError
6629   exception.
6630
6631   @type lu: C{LogicalUnit}
6632   @param lu: a logical unit from which we get configuration data
6633   @type nodenames: C{list}
6634   @param nodenames: the list of node names to check
6635   @type requested: C{int}
6636   @param requested: the minimum acceptable number of physical CPUs
6637   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6638       or we cannot check the node
6639
6640   """
6641   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6642   for node in nodenames:
6643     info = nodeinfo[node]
6644     info.Raise("Cannot get current information from node %s" % node,
6645                prereq=True, ecode=errors.ECODE_ENVIRON)
6646     (_, _, (hv_info, )) = info.payload
6647     num_cpus = hv_info.get("cpu_total", None)
6648     if not isinstance(num_cpus, int):
6649       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6650                                  " on node %s, result was '%s'" %
6651                                  (node, num_cpus), errors.ECODE_ENVIRON)
6652     if requested > num_cpus:
6653       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6654                                  "required" % (node, num_cpus, requested),
6655                                  errors.ECODE_NORES)
6656
6657
6658 class LUInstanceStartup(LogicalUnit):
6659   """Starts an instance.
6660
6661   """
6662   HPATH = "instance-start"
6663   HTYPE = constants.HTYPE_INSTANCE
6664   REQ_BGL = False
6665
6666   def CheckArguments(self):
6667     # extra beparams
6668     if self.op.beparams:
6669       # fill the beparams dict
6670       objects.UpgradeBeParams(self.op.beparams)
6671       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6672
6673   def ExpandNames(self):
6674     self._ExpandAndLockInstance()
6675     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6676
6677   def DeclareLocks(self, level):
6678     if level == locking.LEVEL_NODE_RES:
6679       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6680
6681   def BuildHooksEnv(self):
6682     """Build hooks env.
6683
6684     This runs on master, primary and secondary nodes of the instance.
6685
6686     """
6687     env = {
6688       "FORCE": self.op.force,
6689       }
6690
6691     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6692
6693     return env
6694
6695   def BuildHooksNodes(self):
6696     """Build hooks nodes.
6697
6698     """
6699     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6700     return (nl, nl)
6701
6702   def CheckPrereq(self):
6703     """Check prerequisites.
6704
6705     This checks that the instance is in the cluster.
6706
6707     """
6708     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6709     assert self.instance is not None, \
6710       "Cannot retrieve locked instance %s" % self.op.instance_name
6711
6712     # extra hvparams
6713     if self.op.hvparams:
6714       # check hypervisor parameter syntax (locally)
6715       cluster = self.cfg.GetClusterInfo()
6716       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6717       filled_hvp = cluster.FillHV(instance)
6718       filled_hvp.update(self.op.hvparams)
6719       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6720       hv_type.CheckParameterSyntax(filled_hvp)
6721       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6722
6723     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6724
6725     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6726
6727     if self.primary_offline and self.op.ignore_offline_nodes:
6728       self.proc.LogWarning("Ignoring offline primary node")
6729
6730       if self.op.hvparams or self.op.beparams:
6731         self.proc.LogWarning("Overridden parameters are ignored")
6732     else:
6733       _CheckNodeOnline(self, instance.primary_node)
6734
6735       bep = self.cfg.GetClusterInfo().FillBE(instance)
6736       bep.update(self.op.beparams)
6737
6738       # check bridges existence
6739       _CheckInstanceBridgesExist(self, instance)
6740
6741       remote_info = self.rpc.call_instance_info(instance.primary_node,
6742                                                 instance.name,
6743                                                 instance.hypervisor)
6744       remote_info.Raise("Error checking node %s" % instance.primary_node,
6745                         prereq=True, ecode=errors.ECODE_ENVIRON)
6746       if not remote_info.payload: # not running already
6747         _CheckNodeFreeMemory(self, instance.primary_node,
6748                              "starting instance %s" % instance.name,
6749                              bep[constants.BE_MINMEM], instance.hypervisor)
6750
6751   def Exec(self, feedback_fn):
6752     """Start the instance.
6753
6754     """
6755     instance = self.instance
6756     force = self.op.force
6757
6758     if not self.op.no_remember:
6759       self.cfg.MarkInstanceUp(instance.name)
6760
6761     if self.primary_offline:
6762       assert self.op.ignore_offline_nodes
6763       self.proc.LogInfo("Primary node offline, marked instance as started")
6764     else:
6765       node_current = instance.primary_node
6766
6767       _StartInstanceDisks(self, instance, force)
6768
6769       result = \
6770         self.rpc.call_instance_start(node_current,
6771                                      (instance, self.op.hvparams,
6772                                       self.op.beparams),
6773                                      self.op.startup_paused)
6774       msg = result.fail_msg
6775       if msg:
6776         _ShutdownInstanceDisks(self, instance)
6777         raise errors.OpExecError("Could not start instance: %s" % msg)
6778
6779
6780 class LUInstanceReboot(LogicalUnit):
6781   """Reboot an instance.
6782
6783   """
6784   HPATH = "instance-reboot"
6785   HTYPE = constants.HTYPE_INSTANCE
6786   REQ_BGL = False
6787
6788   def ExpandNames(self):
6789     self._ExpandAndLockInstance()
6790
6791   def BuildHooksEnv(self):
6792     """Build hooks env.
6793
6794     This runs on master, primary and secondary nodes of the instance.
6795
6796     """
6797     env = {
6798       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6799       "REBOOT_TYPE": self.op.reboot_type,
6800       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6801       }
6802
6803     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6804
6805     return env
6806
6807   def BuildHooksNodes(self):
6808     """Build hooks nodes.
6809
6810     """
6811     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6812     return (nl, nl)
6813
6814   def CheckPrereq(self):
6815     """Check prerequisites.
6816
6817     This checks that the instance is in the cluster.
6818
6819     """
6820     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6821     assert self.instance is not None, \
6822       "Cannot retrieve locked instance %s" % self.op.instance_name
6823     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6824     _CheckNodeOnline(self, instance.primary_node)
6825
6826     # check bridges existence
6827     _CheckInstanceBridgesExist(self, instance)
6828
6829   def Exec(self, feedback_fn):
6830     """Reboot the instance.
6831
6832     """
6833     instance = self.instance
6834     ignore_secondaries = self.op.ignore_secondaries
6835     reboot_type = self.op.reboot_type
6836
6837     remote_info = self.rpc.call_instance_info(instance.primary_node,
6838                                               instance.name,
6839                                               instance.hypervisor)
6840     remote_info.Raise("Error checking node %s" % instance.primary_node)
6841     instance_running = bool(remote_info.payload)
6842
6843     node_current = instance.primary_node
6844
6845     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6846                                             constants.INSTANCE_REBOOT_HARD]:
6847       for disk in instance.disks:
6848         self.cfg.SetDiskID(disk, node_current)
6849       result = self.rpc.call_instance_reboot(node_current, instance,
6850                                              reboot_type,
6851                                              self.op.shutdown_timeout)
6852       result.Raise("Could not reboot instance")
6853     else:
6854       if instance_running:
6855         result = self.rpc.call_instance_shutdown(node_current, instance,
6856                                                  self.op.shutdown_timeout)
6857         result.Raise("Could not shutdown instance for full reboot")
6858         _ShutdownInstanceDisks(self, instance)
6859       else:
6860         self.LogInfo("Instance %s was already stopped, starting now",
6861                      instance.name)
6862       _StartInstanceDisks(self, instance, ignore_secondaries)
6863       result = self.rpc.call_instance_start(node_current,
6864                                             (instance, None, None), False)
6865       msg = result.fail_msg
6866       if msg:
6867         _ShutdownInstanceDisks(self, instance)
6868         raise errors.OpExecError("Could not start instance for"
6869                                  " full reboot: %s" % msg)
6870
6871     self.cfg.MarkInstanceUp(instance.name)
6872
6873
6874 class LUInstanceShutdown(LogicalUnit):
6875   """Shutdown an instance.
6876
6877   """
6878   HPATH = "instance-stop"
6879   HTYPE = constants.HTYPE_INSTANCE
6880   REQ_BGL = False
6881
6882   def ExpandNames(self):
6883     self._ExpandAndLockInstance()
6884
6885   def BuildHooksEnv(self):
6886     """Build hooks env.
6887
6888     This runs on master, primary and secondary nodes of the instance.
6889
6890     """
6891     env = _BuildInstanceHookEnvByObject(self, self.instance)
6892     env["TIMEOUT"] = self.op.timeout
6893     return env
6894
6895   def BuildHooksNodes(self):
6896     """Build hooks nodes.
6897
6898     """
6899     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6900     return (nl, nl)
6901
6902   def CheckPrereq(self):
6903     """Check prerequisites.
6904
6905     This checks that the instance is in the cluster.
6906
6907     """
6908     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6909     assert self.instance is not None, \
6910       "Cannot retrieve locked instance %s" % self.op.instance_name
6911
6912     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6913
6914     self.primary_offline = \
6915       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6916
6917     if self.primary_offline and self.op.ignore_offline_nodes:
6918       self.proc.LogWarning("Ignoring offline primary node")
6919     else:
6920       _CheckNodeOnline(self, self.instance.primary_node)
6921
6922   def Exec(self, feedback_fn):
6923     """Shutdown the instance.
6924
6925     """
6926     instance = self.instance
6927     node_current = instance.primary_node
6928     timeout = self.op.timeout
6929
6930     if not self.op.no_remember:
6931       self.cfg.MarkInstanceDown(instance.name)
6932
6933     if self.primary_offline:
6934       assert self.op.ignore_offline_nodes
6935       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6936     else:
6937       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6938       msg = result.fail_msg
6939       if msg:
6940         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6941
6942       _ShutdownInstanceDisks(self, instance)
6943
6944
6945 class LUInstanceReinstall(LogicalUnit):
6946   """Reinstall an instance.
6947
6948   """
6949   HPATH = "instance-reinstall"
6950   HTYPE = constants.HTYPE_INSTANCE
6951   REQ_BGL = False
6952
6953   def ExpandNames(self):
6954     self._ExpandAndLockInstance()
6955
6956   def BuildHooksEnv(self):
6957     """Build hooks env.
6958
6959     This runs on master, primary and secondary nodes of the instance.
6960
6961     """
6962     return _BuildInstanceHookEnvByObject(self, self.instance)
6963
6964   def BuildHooksNodes(self):
6965     """Build hooks nodes.
6966
6967     """
6968     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6969     return (nl, nl)
6970
6971   def CheckPrereq(self):
6972     """Check prerequisites.
6973
6974     This checks that the instance is in the cluster and is not running.
6975
6976     """
6977     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6978     assert instance is not None, \
6979       "Cannot retrieve locked instance %s" % self.op.instance_name
6980     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6981                      " offline, cannot reinstall")
6982     for node in instance.secondary_nodes:
6983       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6984                        " cannot reinstall")
6985
6986     if instance.disk_template == constants.DT_DISKLESS:
6987       raise errors.OpPrereqError("Instance '%s' has no disks" %
6988                                  self.op.instance_name,
6989                                  errors.ECODE_INVAL)
6990     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6991
6992     if self.op.os_type is not None:
6993       # OS verification
6994       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6995       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6996       instance_os = self.op.os_type
6997     else:
6998       instance_os = instance.os
6999
7000     nodelist = list(instance.all_nodes)
7001
7002     if self.op.osparams:
7003       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7004       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7005       self.os_inst = i_osdict # the new dict (without defaults)
7006     else:
7007       self.os_inst = None
7008
7009     self.instance = instance
7010
7011   def Exec(self, feedback_fn):
7012     """Reinstall the instance.
7013
7014     """
7015     inst = self.instance
7016
7017     if self.op.os_type is not None:
7018       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7019       inst.os = self.op.os_type
7020       # Write to configuration
7021       self.cfg.Update(inst, feedback_fn)
7022
7023     _StartInstanceDisks(self, inst, None)
7024     try:
7025       feedback_fn("Running the instance OS create scripts...")
7026       # FIXME: pass debug option from opcode to backend
7027       result = self.rpc.call_instance_os_add(inst.primary_node,
7028                                              (inst, self.os_inst), True,
7029                                              self.op.debug_level)
7030       result.Raise("Could not install OS for instance %s on node %s" %
7031                    (inst.name, inst.primary_node))
7032     finally:
7033       _ShutdownInstanceDisks(self, inst)
7034
7035
7036 class LUInstanceRecreateDisks(LogicalUnit):
7037   """Recreate an instance's missing disks.
7038
7039   """
7040   HPATH = "instance-recreate-disks"
7041   HTYPE = constants.HTYPE_INSTANCE
7042   REQ_BGL = False
7043
7044   _MODIFYABLE = frozenset([
7045     constants.IDISK_SIZE,
7046     constants.IDISK_MODE,
7047     ])
7048
7049   # New or changed disk parameters may have different semantics
7050   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7051     constants.IDISK_ADOPT,
7052
7053     # TODO: Implement support changing VG while recreating
7054     constants.IDISK_VG,
7055     constants.IDISK_METAVG,
7056     ]))
7057
7058   def CheckArguments(self):
7059     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7060       # Normalize and convert deprecated list of disk indices
7061       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7062
7063     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7064     if duplicates:
7065       raise errors.OpPrereqError("Some disks have been specified more than"
7066                                  " once: %s" % utils.CommaJoin(duplicates),
7067                                  errors.ECODE_INVAL)
7068
7069     for (idx, params) in self.op.disks:
7070       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7071       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7072       if unsupported:
7073         raise errors.OpPrereqError("Parameters for disk %s try to change"
7074                                    " unmodifyable parameter(s): %s" %
7075                                    (idx, utils.CommaJoin(unsupported)),
7076                                    errors.ECODE_INVAL)
7077
7078   def ExpandNames(self):
7079     self._ExpandAndLockInstance()
7080     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7081     if self.op.nodes:
7082       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7083       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7084     else:
7085       self.needed_locks[locking.LEVEL_NODE] = []
7086     self.needed_locks[locking.LEVEL_NODE_RES] = []
7087
7088   def DeclareLocks(self, level):
7089     if level == locking.LEVEL_NODE:
7090       # if we replace the nodes, we only need to lock the old primary,
7091       # otherwise we need to lock all nodes for disk re-creation
7092       primary_only = bool(self.op.nodes)
7093       self._LockInstancesNodes(primary_only=primary_only)
7094     elif level == locking.LEVEL_NODE_RES:
7095       # Copy node locks
7096       self.needed_locks[locking.LEVEL_NODE_RES] = \
7097         self.needed_locks[locking.LEVEL_NODE][:]
7098
7099   def BuildHooksEnv(self):
7100     """Build hooks env.
7101
7102     This runs on master, primary and secondary nodes of the instance.
7103
7104     """
7105     return _BuildInstanceHookEnvByObject(self, self.instance)
7106
7107   def BuildHooksNodes(self):
7108     """Build hooks nodes.
7109
7110     """
7111     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7112     return (nl, nl)
7113
7114   def CheckPrereq(self):
7115     """Check prerequisites.
7116
7117     This checks that the instance is in the cluster and is not running.
7118
7119     """
7120     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7121     assert instance is not None, \
7122       "Cannot retrieve locked instance %s" % self.op.instance_name
7123     if self.op.nodes:
7124       if len(self.op.nodes) != len(instance.all_nodes):
7125         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7126                                    " %d replacement nodes were specified" %
7127                                    (instance.name, len(instance.all_nodes),
7128                                     len(self.op.nodes)),
7129                                    errors.ECODE_INVAL)
7130       assert instance.disk_template != constants.DT_DRBD8 or \
7131           len(self.op.nodes) == 2
7132       assert instance.disk_template != constants.DT_PLAIN or \
7133           len(self.op.nodes) == 1
7134       primary_node = self.op.nodes[0]
7135     else:
7136       primary_node = instance.primary_node
7137     _CheckNodeOnline(self, primary_node)
7138
7139     if instance.disk_template == constants.DT_DISKLESS:
7140       raise errors.OpPrereqError("Instance '%s' has no disks" %
7141                                  self.op.instance_name, errors.ECODE_INVAL)
7142
7143     # if we replace nodes *and* the old primary is offline, we don't
7144     # check
7145     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7146     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7147     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7148     if not (self.op.nodes and old_pnode.offline):
7149       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7150                           msg="cannot recreate disks")
7151
7152     if self.op.disks:
7153       self.disks = dict(self.op.disks)
7154     else:
7155       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7156
7157     maxidx = max(self.disks.keys())
7158     if maxidx >= len(instance.disks):
7159       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7160                                  errors.ECODE_INVAL)
7161
7162     if (self.op.nodes and
7163         sorted(self.disks.keys()) != range(len(instance.disks))):
7164       raise errors.OpPrereqError("Can't recreate disks partially and"
7165                                  " change the nodes at the same time",
7166                                  errors.ECODE_INVAL)
7167
7168     self.instance = instance
7169
7170   def Exec(self, feedback_fn):
7171     """Recreate the disks.
7172
7173     """
7174     instance = self.instance
7175
7176     assert (self.owned_locks(locking.LEVEL_NODE) ==
7177             self.owned_locks(locking.LEVEL_NODE_RES))
7178
7179     to_skip = []
7180     mods = [] # keeps track of needed changes
7181
7182     for idx, disk in enumerate(instance.disks):
7183       try:
7184         changes = self.disks[idx]
7185       except KeyError:
7186         # Disk should not be recreated
7187         to_skip.append(idx)
7188         continue
7189
7190       # update secondaries for disks, if needed
7191       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7192         # need to update the nodes and minors
7193         assert len(self.op.nodes) == 2
7194         assert len(disk.logical_id) == 6 # otherwise disk internals
7195                                          # have changed
7196         (_, _, old_port, _, _, old_secret) = disk.logical_id
7197         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7198         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7199                   new_minors[0], new_minors[1], old_secret)
7200         assert len(disk.logical_id) == len(new_id)
7201       else:
7202         new_id = None
7203
7204       mods.append((idx, new_id, changes))
7205
7206     # now that we have passed all asserts above, we can apply the mods
7207     # in a single run (to avoid partial changes)
7208     for idx, new_id, changes in mods:
7209       disk = instance.disks[idx]
7210       if new_id is not None:
7211         assert disk.dev_type == constants.LD_DRBD8
7212         disk.logical_id = new_id
7213       if changes:
7214         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7215                     mode=changes.get(constants.IDISK_MODE, None))
7216
7217     # change primary node, if needed
7218     if self.op.nodes:
7219       instance.primary_node = self.op.nodes[0]
7220       self.LogWarning("Changing the instance's nodes, you will have to"
7221                       " remove any disks left on the older nodes manually")
7222
7223     if self.op.nodes:
7224       self.cfg.Update(instance, feedback_fn)
7225
7226     _CreateDisks(self, instance, to_skip=to_skip)
7227
7228
7229 class LUInstanceRename(LogicalUnit):
7230   """Rename an instance.
7231
7232   """
7233   HPATH = "instance-rename"
7234   HTYPE = constants.HTYPE_INSTANCE
7235
7236   def CheckArguments(self):
7237     """Check arguments.
7238
7239     """
7240     if self.op.ip_check and not self.op.name_check:
7241       # TODO: make the ip check more flexible and not depend on the name check
7242       raise errors.OpPrereqError("IP address check requires a name check",
7243                                  errors.ECODE_INVAL)
7244
7245   def BuildHooksEnv(self):
7246     """Build hooks env.
7247
7248     This runs on master, primary and secondary nodes of the instance.
7249
7250     """
7251     env = _BuildInstanceHookEnvByObject(self, self.instance)
7252     env["INSTANCE_NEW_NAME"] = self.op.new_name
7253     return env
7254
7255   def BuildHooksNodes(self):
7256     """Build hooks nodes.
7257
7258     """
7259     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7260     return (nl, nl)
7261
7262   def CheckPrereq(self):
7263     """Check prerequisites.
7264
7265     This checks that the instance is in the cluster and is not running.
7266
7267     """
7268     self.op.instance_name = _ExpandInstanceName(self.cfg,
7269                                                 self.op.instance_name)
7270     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7271     assert instance is not None
7272     _CheckNodeOnline(self, instance.primary_node)
7273     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7274                         msg="cannot rename")
7275     self.instance = instance
7276
7277     new_name = self.op.new_name
7278     if self.op.name_check:
7279       hostname = netutils.GetHostname(name=new_name)
7280       if hostname.name != new_name:
7281         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7282                      hostname.name)
7283       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7284         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7285                                     " same as given hostname '%s'") %
7286                                     (hostname.name, self.op.new_name),
7287                                     errors.ECODE_INVAL)
7288       new_name = self.op.new_name = hostname.name
7289       if (self.op.ip_check and
7290           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7291         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7292                                    (hostname.ip, new_name),
7293                                    errors.ECODE_NOTUNIQUE)
7294
7295     instance_list = self.cfg.GetInstanceList()
7296     if new_name in instance_list and new_name != instance.name:
7297       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7298                                  new_name, errors.ECODE_EXISTS)
7299
7300   def Exec(self, feedback_fn):
7301     """Rename the instance.
7302
7303     """
7304     inst = self.instance
7305     old_name = inst.name
7306
7307     rename_file_storage = False
7308     if (inst.disk_template in constants.DTS_FILEBASED and
7309         self.op.new_name != inst.name):
7310       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7311       rename_file_storage = True
7312
7313     self.cfg.RenameInstance(inst.name, self.op.new_name)
7314     # Change the instance lock. This is definitely safe while we hold the BGL.
7315     # Otherwise the new lock would have to be added in acquired mode.
7316     assert self.REQ_BGL
7317     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7318     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7319
7320     # re-read the instance from the configuration after rename
7321     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7322
7323     if rename_file_storage:
7324       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7325       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7326                                                      old_file_storage_dir,
7327                                                      new_file_storage_dir)
7328       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7329                    " (but the instance has been renamed in Ganeti)" %
7330                    (inst.primary_node, old_file_storage_dir,
7331                     new_file_storage_dir))
7332
7333     _StartInstanceDisks(self, inst, None)
7334     try:
7335       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7336                                                  old_name, self.op.debug_level)
7337       msg = result.fail_msg
7338       if msg:
7339         msg = ("Could not run OS rename script for instance %s on node %s"
7340                " (but the instance has been renamed in Ganeti): %s" %
7341                (inst.name, inst.primary_node, msg))
7342         self.proc.LogWarning(msg)
7343     finally:
7344       _ShutdownInstanceDisks(self, inst)
7345
7346     return inst.name
7347
7348
7349 class LUInstanceRemove(LogicalUnit):
7350   """Remove an instance.
7351
7352   """
7353   HPATH = "instance-remove"
7354   HTYPE = constants.HTYPE_INSTANCE
7355   REQ_BGL = False
7356
7357   def ExpandNames(self):
7358     self._ExpandAndLockInstance()
7359     self.needed_locks[locking.LEVEL_NODE] = []
7360     self.needed_locks[locking.LEVEL_NODE_RES] = []
7361     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7362
7363   def DeclareLocks(self, level):
7364     if level == locking.LEVEL_NODE:
7365       self._LockInstancesNodes()
7366     elif level == locking.LEVEL_NODE_RES:
7367       # Copy node locks
7368       self.needed_locks[locking.LEVEL_NODE_RES] = \
7369         self.needed_locks[locking.LEVEL_NODE][:]
7370
7371   def BuildHooksEnv(self):
7372     """Build hooks env.
7373
7374     This runs on master, primary and secondary nodes of the instance.
7375
7376     """
7377     env = _BuildInstanceHookEnvByObject(self, self.instance)
7378     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7379     return env
7380
7381   def BuildHooksNodes(self):
7382     """Build hooks nodes.
7383
7384     """
7385     nl = [self.cfg.GetMasterNode()]
7386     nl_post = list(self.instance.all_nodes) + nl
7387     return (nl, nl_post)
7388
7389   def CheckPrereq(self):
7390     """Check prerequisites.
7391
7392     This checks that the instance is in the cluster.
7393
7394     """
7395     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7396     assert self.instance is not None, \
7397       "Cannot retrieve locked instance %s" % self.op.instance_name
7398
7399   def Exec(self, feedback_fn):
7400     """Remove the instance.
7401
7402     """
7403     instance = self.instance
7404     logging.info("Shutting down instance %s on node %s",
7405                  instance.name, instance.primary_node)
7406
7407     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7408                                              self.op.shutdown_timeout)
7409     msg = result.fail_msg
7410     if msg:
7411       if self.op.ignore_failures:
7412         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7413       else:
7414         raise errors.OpExecError("Could not shutdown instance %s on"
7415                                  " node %s: %s" %
7416                                  (instance.name, instance.primary_node, msg))
7417
7418     assert (self.owned_locks(locking.LEVEL_NODE) ==
7419             self.owned_locks(locking.LEVEL_NODE_RES))
7420     assert not (set(instance.all_nodes) -
7421                 self.owned_locks(locking.LEVEL_NODE)), \
7422       "Not owning correct locks"
7423
7424     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7425
7426
7427 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7428   """Utility function to remove an instance.
7429
7430   """
7431   logging.info("Removing block devices for instance %s", instance.name)
7432
7433   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7434     if not ignore_failures:
7435       raise errors.OpExecError("Can't remove instance's disks")
7436     feedback_fn("Warning: can't remove instance's disks")
7437
7438   logging.info("Removing instance %s out of cluster config", instance.name)
7439
7440   lu.cfg.RemoveInstance(instance.name)
7441
7442   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7443     "Instance lock removal conflict"
7444
7445   # Remove lock for the instance
7446   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7447
7448
7449 class LUInstanceQuery(NoHooksLU):
7450   """Logical unit for querying instances.
7451
7452   """
7453   # pylint: disable=W0142
7454   REQ_BGL = False
7455
7456   def CheckArguments(self):
7457     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7458                              self.op.output_fields, self.op.use_locking)
7459
7460   def ExpandNames(self):
7461     self.iq.ExpandNames(self)
7462
7463   def DeclareLocks(self, level):
7464     self.iq.DeclareLocks(self, level)
7465
7466   def Exec(self, feedback_fn):
7467     return self.iq.OldStyleQuery(self)
7468
7469
7470 class LUInstanceFailover(LogicalUnit):
7471   """Failover an instance.
7472
7473   """
7474   HPATH = "instance-failover"
7475   HTYPE = constants.HTYPE_INSTANCE
7476   REQ_BGL = False
7477
7478   def CheckArguments(self):
7479     """Check the arguments.
7480
7481     """
7482     self.iallocator = getattr(self.op, "iallocator", None)
7483     self.target_node = getattr(self.op, "target_node", None)
7484
7485   def ExpandNames(self):
7486     self._ExpandAndLockInstance()
7487
7488     if self.op.target_node is not None:
7489       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7490
7491     self.needed_locks[locking.LEVEL_NODE] = []
7492     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7493
7494     self.needed_locks[locking.LEVEL_NODE_RES] = []
7495     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7496
7497     ignore_consistency = self.op.ignore_consistency
7498     shutdown_timeout = self.op.shutdown_timeout
7499     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7500                                        cleanup=False,
7501                                        failover=True,
7502                                        ignore_consistency=ignore_consistency,
7503                                        shutdown_timeout=shutdown_timeout,
7504                                        ignore_ipolicy=self.op.ignore_ipolicy)
7505     self.tasklets = [self._migrater]
7506
7507   def DeclareLocks(self, level):
7508     if level == locking.LEVEL_NODE:
7509       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7510       if instance.disk_template in constants.DTS_EXT_MIRROR:
7511         if self.op.target_node is None:
7512           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7513         else:
7514           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7515                                                    self.op.target_node]
7516         del self.recalculate_locks[locking.LEVEL_NODE]
7517       else:
7518         self._LockInstancesNodes()
7519     elif level == locking.LEVEL_NODE_RES:
7520       # Copy node locks
7521       self.needed_locks[locking.LEVEL_NODE_RES] = \
7522         self.needed_locks[locking.LEVEL_NODE][:]
7523
7524   def BuildHooksEnv(self):
7525     """Build hooks env.
7526
7527     This runs on master, primary and secondary nodes of the instance.
7528
7529     """
7530     instance = self._migrater.instance
7531     source_node = instance.primary_node
7532     target_node = self.op.target_node
7533     env = {
7534       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7535       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7536       "OLD_PRIMARY": source_node,
7537       "NEW_PRIMARY": target_node,
7538       }
7539
7540     if instance.disk_template in constants.DTS_INT_MIRROR:
7541       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7542       env["NEW_SECONDARY"] = source_node
7543     else:
7544       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7545
7546     env.update(_BuildInstanceHookEnvByObject(self, instance))
7547
7548     return env
7549
7550   def BuildHooksNodes(self):
7551     """Build hooks nodes.
7552
7553     """
7554     instance = self._migrater.instance
7555     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7556     return (nl, nl + [instance.primary_node])
7557
7558
7559 class LUInstanceMigrate(LogicalUnit):
7560   """Migrate an instance.
7561
7562   This is migration without shutting down, compared to the failover,
7563   which is done with shutdown.
7564
7565   """
7566   HPATH = "instance-migrate"
7567   HTYPE = constants.HTYPE_INSTANCE
7568   REQ_BGL = False
7569
7570   def ExpandNames(self):
7571     self._ExpandAndLockInstance()
7572
7573     if self.op.target_node is not None:
7574       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7575
7576     self.needed_locks[locking.LEVEL_NODE] = []
7577     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7578
7579     self.needed_locks[locking.LEVEL_NODE] = []
7580     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7581
7582     self._migrater = \
7583       TLMigrateInstance(self, self.op.instance_name,
7584                         cleanup=self.op.cleanup,
7585                         failover=False,
7586                         fallback=self.op.allow_failover,
7587                         allow_runtime_changes=self.op.allow_runtime_changes,
7588                         ignore_ipolicy=self.op.ignore_ipolicy)
7589     self.tasklets = [self._migrater]
7590
7591   def DeclareLocks(self, level):
7592     if level == locking.LEVEL_NODE:
7593       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7594       if instance.disk_template in constants.DTS_EXT_MIRROR:
7595         if self.op.target_node is None:
7596           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7597         else:
7598           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7599                                                    self.op.target_node]
7600         del self.recalculate_locks[locking.LEVEL_NODE]
7601       else:
7602         self._LockInstancesNodes()
7603     elif level == locking.LEVEL_NODE_RES:
7604       # Copy node locks
7605       self.needed_locks[locking.LEVEL_NODE_RES] = \
7606         self.needed_locks[locking.LEVEL_NODE][:]
7607
7608   def BuildHooksEnv(self):
7609     """Build hooks env.
7610
7611     This runs on master, primary and secondary nodes of the instance.
7612
7613     """
7614     instance = self._migrater.instance
7615     source_node = instance.primary_node
7616     target_node = self.op.target_node
7617     env = _BuildInstanceHookEnvByObject(self, instance)
7618     env.update({
7619       "MIGRATE_LIVE": self._migrater.live,
7620       "MIGRATE_CLEANUP": self.op.cleanup,
7621       "OLD_PRIMARY": source_node,
7622       "NEW_PRIMARY": target_node,
7623       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7624       })
7625
7626     if instance.disk_template in constants.DTS_INT_MIRROR:
7627       env["OLD_SECONDARY"] = target_node
7628       env["NEW_SECONDARY"] = source_node
7629     else:
7630       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7631
7632     return env
7633
7634   def BuildHooksNodes(self):
7635     """Build hooks nodes.
7636
7637     """
7638     instance = self._migrater.instance
7639     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7640     return (nl, nl + [instance.primary_node])
7641
7642
7643 class LUInstanceMove(LogicalUnit):
7644   """Move an instance by data-copying.
7645
7646   """
7647   HPATH = "instance-move"
7648   HTYPE = constants.HTYPE_INSTANCE
7649   REQ_BGL = False
7650
7651   def ExpandNames(self):
7652     self._ExpandAndLockInstance()
7653     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7654     self.op.target_node = target_node
7655     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7656     self.needed_locks[locking.LEVEL_NODE_RES] = []
7657     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7658
7659   def DeclareLocks(self, level):
7660     if level == locking.LEVEL_NODE:
7661       self._LockInstancesNodes(primary_only=True)
7662     elif level == locking.LEVEL_NODE_RES:
7663       # Copy node locks
7664       self.needed_locks[locking.LEVEL_NODE_RES] = \
7665         self.needed_locks[locking.LEVEL_NODE][:]
7666
7667   def BuildHooksEnv(self):
7668     """Build hooks env.
7669
7670     This runs on master, primary and secondary nodes of the instance.
7671
7672     """
7673     env = {
7674       "TARGET_NODE": self.op.target_node,
7675       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7676       }
7677     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7678     return env
7679
7680   def BuildHooksNodes(self):
7681     """Build hooks nodes.
7682
7683     """
7684     nl = [
7685       self.cfg.GetMasterNode(),
7686       self.instance.primary_node,
7687       self.op.target_node,
7688       ]
7689     return (nl, nl)
7690
7691   def CheckPrereq(self):
7692     """Check prerequisites.
7693
7694     This checks that the instance is in the cluster.
7695
7696     """
7697     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7698     assert self.instance is not None, \
7699       "Cannot retrieve locked instance %s" % self.op.instance_name
7700
7701     node = self.cfg.GetNodeInfo(self.op.target_node)
7702     assert node is not None, \
7703       "Cannot retrieve locked node %s" % self.op.target_node
7704
7705     self.target_node = target_node = node.name
7706
7707     if target_node == instance.primary_node:
7708       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7709                                  (instance.name, target_node),
7710                                  errors.ECODE_STATE)
7711
7712     bep = self.cfg.GetClusterInfo().FillBE(instance)
7713
7714     for idx, dsk in enumerate(instance.disks):
7715       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7716         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7717                                    " cannot copy" % idx, errors.ECODE_STATE)
7718
7719     _CheckNodeOnline(self, target_node)
7720     _CheckNodeNotDrained(self, target_node)
7721     _CheckNodeVmCapable(self, target_node)
7722     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7723                                      self.cfg.GetNodeGroup(node.group))
7724     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7725                             ignore=self.op.ignore_ipolicy)
7726
7727     if instance.admin_state == constants.ADMINST_UP:
7728       # check memory requirements on the secondary node
7729       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7730                            instance.name, bep[constants.BE_MAXMEM],
7731                            instance.hypervisor)
7732     else:
7733       self.LogInfo("Not checking memory on the secondary node as"
7734                    " instance will not be started")
7735
7736     # check bridge existance
7737     _CheckInstanceBridgesExist(self, instance, node=target_node)
7738
7739   def Exec(self, feedback_fn):
7740     """Move an instance.
7741
7742     The move is done by shutting it down on its present node, copying
7743     the data over (slow) and starting it on the new node.
7744
7745     """
7746     instance = self.instance
7747
7748     source_node = instance.primary_node
7749     target_node = self.target_node
7750
7751     self.LogInfo("Shutting down instance %s on source node %s",
7752                  instance.name, source_node)
7753
7754     assert (self.owned_locks(locking.LEVEL_NODE) ==
7755             self.owned_locks(locking.LEVEL_NODE_RES))
7756
7757     result = self.rpc.call_instance_shutdown(source_node, instance,
7758                                              self.op.shutdown_timeout)
7759     msg = result.fail_msg
7760     if msg:
7761       if self.op.ignore_consistency:
7762         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7763                              " Proceeding anyway. Please make sure node"
7764                              " %s is down. Error details: %s",
7765                              instance.name, source_node, source_node, msg)
7766       else:
7767         raise errors.OpExecError("Could not shutdown instance %s on"
7768                                  " node %s: %s" %
7769                                  (instance.name, source_node, msg))
7770
7771     # create the target disks
7772     try:
7773       _CreateDisks(self, instance, target_node=target_node)
7774     except errors.OpExecError:
7775       self.LogWarning("Device creation failed, reverting...")
7776       try:
7777         _RemoveDisks(self, instance, target_node=target_node)
7778       finally:
7779         self.cfg.ReleaseDRBDMinors(instance.name)
7780         raise
7781
7782     cluster_name = self.cfg.GetClusterInfo().cluster_name
7783
7784     errs = []
7785     # activate, get path, copy the data over
7786     for idx, disk in enumerate(instance.disks):
7787       self.LogInfo("Copying data for disk %d", idx)
7788       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7789                                                instance.name, True, idx)
7790       if result.fail_msg:
7791         self.LogWarning("Can't assemble newly created disk %d: %s",
7792                         idx, result.fail_msg)
7793         errs.append(result.fail_msg)
7794         break
7795       dev_path = result.payload
7796       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7797                                              target_node, dev_path,
7798                                              cluster_name)
7799       if result.fail_msg:
7800         self.LogWarning("Can't copy data over for disk %d: %s",
7801                         idx, result.fail_msg)
7802         errs.append(result.fail_msg)
7803         break
7804
7805     if errs:
7806       self.LogWarning("Some disks failed to copy, aborting")
7807       try:
7808         _RemoveDisks(self, instance, target_node=target_node)
7809       finally:
7810         self.cfg.ReleaseDRBDMinors(instance.name)
7811         raise errors.OpExecError("Errors during disk copy: %s" %
7812                                  (",".join(errs),))
7813
7814     instance.primary_node = target_node
7815     self.cfg.Update(instance, feedback_fn)
7816
7817     self.LogInfo("Removing the disks on the original node")
7818     _RemoveDisks(self, instance, target_node=source_node)
7819
7820     # Only start the instance if it's marked as up
7821     if instance.admin_state == constants.ADMINST_UP:
7822       self.LogInfo("Starting instance %s on node %s",
7823                    instance.name, target_node)
7824
7825       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7826                                            ignore_secondaries=True)
7827       if not disks_ok:
7828         _ShutdownInstanceDisks(self, instance)
7829         raise errors.OpExecError("Can't activate the instance's disks")
7830
7831       result = self.rpc.call_instance_start(target_node,
7832                                             (instance, None, None), False)
7833       msg = result.fail_msg
7834       if msg:
7835         _ShutdownInstanceDisks(self, instance)
7836         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7837                                  (instance.name, target_node, msg))
7838
7839
7840 class LUNodeMigrate(LogicalUnit):
7841   """Migrate all instances from a node.
7842
7843   """
7844   HPATH = "node-migrate"
7845   HTYPE = constants.HTYPE_NODE
7846   REQ_BGL = False
7847
7848   def CheckArguments(self):
7849     pass
7850
7851   def ExpandNames(self):
7852     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7853
7854     self.share_locks = _ShareAll()
7855     self.needed_locks = {
7856       locking.LEVEL_NODE: [self.op.node_name],
7857       }
7858
7859   def BuildHooksEnv(self):
7860     """Build hooks env.
7861
7862     This runs on the master, the primary and all the secondaries.
7863
7864     """
7865     return {
7866       "NODE_NAME": self.op.node_name,
7867       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7868       }
7869
7870   def BuildHooksNodes(self):
7871     """Build hooks nodes.
7872
7873     """
7874     nl = [self.cfg.GetMasterNode()]
7875     return (nl, nl)
7876
7877   def CheckPrereq(self):
7878     pass
7879
7880   def Exec(self, feedback_fn):
7881     # Prepare jobs for migration instances
7882     allow_runtime_changes = self.op.allow_runtime_changes
7883     jobs = [
7884       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7885                                  mode=self.op.mode,
7886                                  live=self.op.live,
7887                                  iallocator=self.op.iallocator,
7888                                  target_node=self.op.target_node,
7889                                  allow_runtime_changes=allow_runtime_changes,
7890                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7891       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7892       ]
7893
7894     # TODO: Run iallocator in this opcode and pass correct placement options to
7895     # OpInstanceMigrate. Since other jobs can modify the cluster between
7896     # running the iallocator and the actual migration, a good consistency model
7897     # will have to be found.
7898
7899     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7900             frozenset([self.op.node_name]))
7901
7902     return ResultWithJobs(jobs)
7903
7904
7905 class TLMigrateInstance(Tasklet):
7906   """Tasklet class for instance migration.
7907
7908   @type live: boolean
7909   @ivar live: whether the migration will be done live or non-live;
7910       this variable is initalized only after CheckPrereq has run
7911   @type cleanup: boolean
7912   @ivar cleanup: Wheater we cleanup from a failed migration
7913   @type iallocator: string
7914   @ivar iallocator: The iallocator used to determine target_node
7915   @type target_node: string
7916   @ivar target_node: If given, the target_node to reallocate the instance to
7917   @type failover: boolean
7918   @ivar failover: Whether operation results in failover or migration
7919   @type fallback: boolean
7920   @ivar fallback: Whether fallback to failover is allowed if migration not
7921                   possible
7922   @type ignore_consistency: boolean
7923   @ivar ignore_consistency: Wheter we should ignore consistency between source
7924                             and target node
7925   @type shutdown_timeout: int
7926   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7927   @type ignore_ipolicy: bool
7928   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7929
7930   """
7931
7932   # Constants
7933   _MIGRATION_POLL_INTERVAL = 1      # seconds
7934   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7935
7936   def __init__(self, lu, instance_name, cleanup=False,
7937                failover=False, fallback=False,
7938                ignore_consistency=False,
7939                allow_runtime_changes=True,
7940                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7941                ignore_ipolicy=False):
7942     """Initializes this class.
7943
7944     """
7945     Tasklet.__init__(self, lu)
7946
7947     # Parameters
7948     self.instance_name = instance_name
7949     self.cleanup = cleanup
7950     self.live = False # will be overridden later
7951     self.failover = failover
7952     self.fallback = fallback
7953     self.ignore_consistency = ignore_consistency
7954     self.shutdown_timeout = shutdown_timeout
7955     self.ignore_ipolicy = ignore_ipolicy
7956     self.allow_runtime_changes = allow_runtime_changes
7957
7958   def CheckPrereq(self):
7959     """Check prerequisites.
7960
7961     This checks that the instance is in the cluster.
7962
7963     """
7964     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7965     instance = self.cfg.GetInstanceInfo(instance_name)
7966     assert instance is not None
7967     self.instance = instance
7968     cluster = self.cfg.GetClusterInfo()
7969
7970     if (not self.cleanup and
7971         not instance.admin_state == constants.ADMINST_UP and
7972         not self.failover and self.fallback):
7973       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7974                       " switching to failover")
7975       self.failover = True
7976
7977     if instance.disk_template not in constants.DTS_MIRRORED:
7978       if self.failover:
7979         text = "failovers"
7980       else:
7981         text = "migrations"
7982       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7983                                  " %s" % (instance.disk_template, text),
7984                                  errors.ECODE_STATE)
7985
7986     if instance.disk_template in constants.DTS_EXT_MIRROR:
7987       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7988
7989       if self.lu.op.iallocator:
7990         self._RunAllocator()
7991       else:
7992         # We set set self.target_node as it is required by
7993         # BuildHooksEnv
7994         self.target_node = self.lu.op.target_node
7995
7996       # Check that the target node is correct in terms of instance policy
7997       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7998       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7999       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8000       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8001                               ignore=self.ignore_ipolicy)
8002
8003       # self.target_node is already populated, either directly or by the
8004       # iallocator run
8005       target_node = self.target_node
8006       if self.target_node == instance.primary_node:
8007         raise errors.OpPrereqError("Cannot migrate instance %s"
8008                                    " to its primary (%s)" %
8009                                    (instance.name, instance.primary_node))
8010
8011       if len(self.lu.tasklets) == 1:
8012         # It is safe to release locks only when we're the only tasklet
8013         # in the LU
8014         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8015                       keep=[instance.primary_node, self.target_node])
8016
8017     else:
8018       secondary_nodes = instance.secondary_nodes
8019       if not secondary_nodes:
8020         raise errors.ConfigurationError("No secondary node but using"
8021                                         " %s disk template" %
8022                                         instance.disk_template)
8023       target_node = secondary_nodes[0]
8024       if self.lu.op.iallocator or (self.lu.op.target_node and
8025                                    self.lu.op.target_node != target_node):
8026         if self.failover:
8027           text = "failed over"
8028         else:
8029           text = "migrated"
8030         raise errors.OpPrereqError("Instances with disk template %s cannot"
8031                                    " be %s to arbitrary nodes"
8032                                    " (neither an iallocator nor a target"
8033                                    " node can be passed)" %
8034                                    (instance.disk_template, text),
8035                                    errors.ECODE_INVAL)
8036       nodeinfo = self.cfg.GetNodeInfo(target_node)
8037       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8038       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8039       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8040                               ignore=self.ignore_ipolicy)
8041
8042     i_be = cluster.FillBE(instance)
8043
8044     # check memory requirements on the secondary node
8045     if (not self.cleanup and
8046          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8047       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8048                                                "migrating instance %s" %
8049                                                instance.name,
8050                                                i_be[constants.BE_MINMEM],
8051                                                instance.hypervisor)
8052     else:
8053       self.lu.LogInfo("Not checking memory on the secondary node as"
8054                       " instance will not be started")
8055
8056     # check if failover must be forced instead of migration
8057     if (not self.cleanup and not self.failover and
8058         i_be[constants.BE_ALWAYS_FAILOVER]):
8059       if self.fallback:
8060         self.lu.LogInfo("Instance configured to always failover; fallback"
8061                         " to failover")
8062         self.failover = True
8063       else:
8064         raise errors.OpPrereqError("This instance has been configured to"
8065                                    " always failover, please allow failover",
8066                                    errors.ECODE_STATE)
8067
8068     # check bridge existance
8069     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8070
8071     if not self.cleanup:
8072       _CheckNodeNotDrained(self.lu, target_node)
8073       if not self.failover:
8074         result = self.rpc.call_instance_migratable(instance.primary_node,
8075                                                    instance)
8076         if result.fail_msg and self.fallback:
8077           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8078                           " failover")
8079           self.failover = True
8080         else:
8081           result.Raise("Can't migrate, please use failover",
8082                        prereq=True, ecode=errors.ECODE_STATE)
8083
8084     assert not (self.failover and self.cleanup)
8085
8086     if not self.failover:
8087       if self.lu.op.live is not None and self.lu.op.mode is not None:
8088         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8089                                    " parameters are accepted",
8090                                    errors.ECODE_INVAL)
8091       if self.lu.op.live is not None:
8092         if self.lu.op.live:
8093           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8094         else:
8095           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8096         # reset the 'live' parameter to None so that repeated
8097         # invocations of CheckPrereq do not raise an exception
8098         self.lu.op.live = None
8099       elif self.lu.op.mode is None:
8100         # read the default value from the hypervisor
8101         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8102         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8103
8104       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8105     else:
8106       # Failover is never live
8107       self.live = False
8108
8109     if not (self.failover or self.cleanup):
8110       remote_info = self.rpc.call_instance_info(instance.primary_node,
8111                                                 instance.name,
8112                                                 instance.hypervisor)
8113       remote_info.Raise("Error checking instance on node %s" %
8114                         instance.primary_node)
8115       instance_running = bool(remote_info.payload)
8116       if instance_running:
8117         self.current_mem = int(remote_info.payload["memory"])
8118
8119   def _RunAllocator(self):
8120     """Run the allocator based on input opcode.
8121
8122     """
8123     # FIXME: add a self.ignore_ipolicy option
8124     ial = IAllocator(self.cfg, self.rpc,
8125                      mode=constants.IALLOCATOR_MODE_RELOC,
8126                      name=self.instance_name,
8127                      relocate_from=[self.instance.primary_node],
8128                      )
8129
8130     ial.Run(self.lu.op.iallocator)
8131
8132     if not ial.success:
8133       raise errors.OpPrereqError("Can't compute nodes using"
8134                                  " iallocator '%s': %s" %
8135                                  (self.lu.op.iallocator, ial.info),
8136                                  errors.ECODE_NORES)
8137     if len(ial.result) != ial.required_nodes:
8138       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8139                                  " of nodes (%s), required %s" %
8140                                  (self.lu.op.iallocator, len(ial.result),
8141                                   ial.required_nodes), errors.ECODE_FAULT)
8142     self.target_node = ial.result[0]
8143     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8144                  self.instance_name, self.lu.op.iallocator,
8145                  utils.CommaJoin(ial.result))
8146
8147   def _WaitUntilSync(self):
8148     """Poll with custom rpc for disk sync.
8149
8150     This uses our own step-based rpc call.
8151
8152     """
8153     self.feedback_fn("* wait until resync is done")
8154     all_done = False
8155     while not all_done:
8156       all_done = True
8157       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8158                                             self.nodes_ip,
8159                                             (self.instance.disks,
8160                                              self.instance))
8161       min_percent = 100
8162       for node, nres in result.items():
8163         nres.Raise("Cannot resync disks on node %s" % node)
8164         node_done, node_percent = nres.payload
8165         all_done = all_done and node_done
8166         if node_percent is not None:
8167           min_percent = min(min_percent, node_percent)
8168       if not all_done:
8169         if min_percent < 100:
8170           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8171         time.sleep(2)
8172
8173   def _EnsureSecondary(self, node):
8174     """Demote a node to secondary.
8175
8176     """
8177     self.feedback_fn("* switching node %s to secondary mode" % node)
8178
8179     for dev in self.instance.disks:
8180       self.cfg.SetDiskID(dev, node)
8181
8182     result = self.rpc.call_blockdev_close(node, self.instance.name,
8183                                           self.instance.disks)
8184     result.Raise("Cannot change disk to secondary on node %s" % node)
8185
8186   def _GoStandalone(self):
8187     """Disconnect from the network.
8188
8189     """
8190     self.feedback_fn("* changing into standalone mode")
8191     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8192                                                self.instance.disks)
8193     for node, nres in result.items():
8194       nres.Raise("Cannot disconnect disks node %s" % node)
8195
8196   def _GoReconnect(self, multimaster):
8197     """Reconnect to the network.
8198
8199     """
8200     if multimaster:
8201       msg = "dual-master"
8202     else:
8203       msg = "single-master"
8204     self.feedback_fn("* changing disks into %s mode" % msg)
8205     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8206                                            (self.instance.disks, self.instance),
8207                                            self.instance.name, multimaster)
8208     for node, nres in result.items():
8209       nres.Raise("Cannot change disks config on node %s" % node)
8210
8211   def _ExecCleanup(self):
8212     """Try to cleanup after a failed migration.
8213
8214     The cleanup is done by:
8215       - check that the instance is running only on one node
8216         (and update the config if needed)
8217       - change disks on its secondary node to secondary
8218       - wait until disks are fully synchronized
8219       - disconnect from the network
8220       - change disks into single-master mode
8221       - wait again until disks are fully synchronized
8222
8223     """
8224     instance = self.instance
8225     target_node = self.target_node
8226     source_node = self.source_node
8227
8228     # check running on only one node
8229     self.feedback_fn("* checking where the instance actually runs"
8230                      " (if this hangs, the hypervisor might be in"
8231                      " a bad state)")
8232     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8233     for node, result in ins_l.items():
8234       result.Raise("Can't contact node %s" % node)
8235
8236     runningon_source = instance.name in ins_l[source_node].payload
8237     runningon_target = instance.name in ins_l[target_node].payload
8238
8239     if runningon_source and runningon_target:
8240       raise errors.OpExecError("Instance seems to be running on two nodes,"
8241                                " or the hypervisor is confused; you will have"
8242                                " to ensure manually that it runs only on one"
8243                                " and restart this operation")
8244
8245     if not (runningon_source or runningon_target):
8246       raise errors.OpExecError("Instance does not seem to be running at all;"
8247                                " in this case it's safer to repair by"
8248                                " running 'gnt-instance stop' to ensure disk"
8249                                " shutdown, and then restarting it")
8250
8251     if runningon_target:
8252       # the migration has actually succeeded, we need to update the config
8253       self.feedback_fn("* instance running on secondary node (%s),"
8254                        " updating config" % target_node)
8255       instance.primary_node = target_node
8256       self.cfg.Update(instance, self.feedback_fn)
8257       demoted_node = source_node
8258     else:
8259       self.feedback_fn("* instance confirmed to be running on its"
8260                        " primary node (%s)" % source_node)
8261       demoted_node = target_node
8262
8263     if instance.disk_template in constants.DTS_INT_MIRROR:
8264       self._EnsureSecondary(demoted_node)
8265       try:
8266         self._WaitUntilSync()
8267       except errors.OpExecError:
8268         # we ignore here errors, since if the device is standalone, it
8269         # won't be able to sync
8270         pass
8271       self._GoStandalone()
8272       self._GoReconnect(False)
8273       self._WaitUntilSync()
8274
8275     self.feedback_fn("* done")
8276
8277   def _RevertDiskStatus(self):
8278     """Try to revert the disk status after a failed migration.
8279
8280     """
8281     target_node = self.target_node
8282     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8283       return
8284
8285     try:
8286       self._EnsureSecondary(target_node)
8287       self._GoStandalone()
8288       self._GoReconnect(False)
8289       self._WaitUntilSync()
8290     except errors.OpExecError, err:
8291       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8292                          " please try to recover the instance manually;"
8293                          " error '%s'" % str(err))
8294
8295   def _AbortMigration(self):
8296     """Call the hypervisor code to abort a started migration.
8297
8298     """
8299     instance = self.instance
8300     target_node = self.target_node
8301     source_node = self.source_node
8302     migration_info = self.migration_info
8303
8304     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8305                                                                  instance,
8306                                                                  migration_info,
8307                                                                  False)
8308     abort_msg = abort_result.fail_msg
8309     if abort_msg:
8310       logging.error("Aborting migration failed on target node %s: %s",
8311                     target_node, abort_msg)
8312       # Don't raise an exception here, as we stil have to try to revert the
8313       # disk status, even if this step failed.
8314
8315     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8316         instance, False, self.live)
8317     abort_msg = abort_result.fail_msg
8318     if abort_msg:
8319       logging.error("Aborting migration failed on source node %s: %s",
8320                     source_node, abort_msg)
8321
8322   def _ExecMigration(self):
8323     """Migrate an instance.
8324
8325     The migrate is done by:
8326       - change the disks into dual-master mode
8327       - wait until disks are fully synchronized again
8328       - migrate the instance
8329       - change disks on the new secondary node (the old primary) to secondary
8330       - wait until disks are fully synchronized
8331       - change disks into single-master mode
8332
8333     """
8334     instance = self.instance
8335     target_node = self.target_node
8336     source_node = self.source_node
8337
8338     # Check for hypervisor version mismatch and warn the user.
8339     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8340                                        None, [self.instance.hypervisor])
8341     for ninfo in nodeinfo.values():
8342       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8343                   ninfo.node)
8344     (_, _, (src_info, )) = nodeinfo[source_node].payload
8345     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8346
8347     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8348         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8349       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8350       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8351       if src_version != dst_version:
8352         self.feedback_fn("* warning: hypervisor version mismatch between"
8353                          " source (%s) and target (%s) node" %
8354                          (src_version, dst_version))
8355
8356     self.feedback_fn("* checking disk consistency between source and target")
8357     for (idx, dev) in enumerate(instance.disks):
8358       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8359         raise errors.OpExecError("Disk %s is degraded or not fully"
8360                                  " synchronized on target node,"
8361                                  " aborting migration" % idx)
8362
8363     if self.current_mem > self.tgt_free_mem:
8364       if not self.allow_runtime_changes:
8365         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8366                                  " free memory to fit instance %s on target"
8367                                  " node %s (have %dMB, need %dMB)" %
8368                                  (instance.name, target_node,
8369                                   self.tgt_free_mem, self.current_mem))
8370       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8371       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8372                                                      instance,
8373                                                      self.tgt_free_mem)
8374       rpcres.Raise("Cannot modify instance runtime memory")
8375
8376     # First get the migration information from the remote node
8377     result = self.rpc.call_migration_info(source_node, instance)
8378     msg = result.fail_msg
8379     if msg:
8380       log_err = ("Failed fetching source migration information from %s: %s" %
8381                  (source_node, msg))
8382       logging.error(log_err)
8383       raise errors.OpExecError(log_err)
8384
8385     self.migration_info = migration_info = result.payload
8386
8387     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8388       # Then switch the disks to master/master mode
8389       self._EnsureSecondary(target_node)
8390       self._GoStandalone()
8391       self._GoReconnect(True)
8392       self._WaitUntilSync()
8393
8394     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8395     result = self.rpc.call_accept_instance(target_node,
8396                                            instance,
8397                                            migration_info,
8398                                            self.nodes_ip[target_node])
8399
8400     msg = result.fail_msg
8401     if msg:
8402       logging.error("Instance pre-migration failed, trying to revert"
8403                     " disk status: %s", msg)
8404       self.feedback_fn("Pre-migration failed, aborting")
8405       self._AbortMigration()
8406       self._RevertDiskStatus()
8407       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8408                                (instance.name, msg))
8409
8410     self.feedback_fn("* migrating instance to %s" % target_node)
8411     result = self.rpc.call_instance_migrate(source_node, instance,
8412                                             self.nodes_ip[target_node],
8413                                             self.live)
8414     msg = result.fail_msg
8415     if msg:
8416       logging.error("Instance migration failed, trying to revert"
8417                     " disk status: %s", msg)
8418       self.feedback_fn("Migration failed, aborting")
8419       self._AbortMigration()
8420       self._RevertDiskStatus()
8421       raise errors.OpExecError("Could not migrate instance %s: %s" %
8422                                (instance.name, msg))
8423
8424     self.feedback_fn("* starting memory transfer")
8425     last_feedback = time.time()
8426     while True:
8427       result = self.rpc.call_instance_get_migration_status(source_node,
8428                                                            instance)
8429       msg = result.fail_msg
8430       ms = result.payload   # MigrationStatus instance
8431       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8432         logging.error("Instance migration failed, trying to revert"
8433                       " disk status: %s", msg)
8434         self.feedback_fn("Migration failed, aborting")
8435         self._AbortMigration()
8436         self._RevertDiskStatus()
8437         raise errors.OpExecError("Could not migrate instance %s: %s" %
8438                                  (instance.name, msg))
8439
8440       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8441         self.feedback_fn("* memory transfer complete")
8442         break
8443
8444       if (utils.TimeoutExpired(last_feedback,
8445                                self._MIGRATION_FEEDBACK_INTERVAL) and
8446           ms.transferred_ram is not None):
8447         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8448         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8449         last_feedback = time.time()
8450
8451       time.sleep(self._MIGRATION_POLL_INTERVAL)
8452
8453     result = self.rpc.call_instance_finalize_migration_src(source_node,
8454                                                            instance,
8455                                                            True,
8456                                                            self.live)
8457     msg = result.fail_msg
8458     if msg:
8459       logging.error("Instance migration succeeded, but finalization failed"
8460                     " on the source node: %s", msg)
8461       raise errors.OpExecError("Could not finalize instance migration: %s" %
8462                                msg)
8463
8464     instance.primary_node = target_node
8465
8466     # distribute new instance config to the other nodes
8467     self.cfg.Update(instance, self.feedback_fn)
8468
8469     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8470                                                            instance,
8471                                                            migration_info,
8472                                                            True)
8473     msg = result.fail_msg
8474     if msg:
8475       logging.error("Instance migration succeeded, but finalization failed"
8476                     " on the target node: %s", msg)
8477       raise errors.OpExecError("Could not finalize instance migration: %s" %
8478                                msg)
8479
8480     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8481       self._EnsureSecondary(source_node)
8482       self._WaitUntilSync()
8483       self._GoStandalone()
8484       self._GoReconnect(False)
8485       self._WaitUntilSync()
8486
8487     # If the instance's disk template is `rbd' and there was a successful
8488     # migration, unmap the device from the source node.
8489     if self.instance.disk_template == constants.DT_RBD:
8490       disks = _ExpandCheckDisks(instance, instance.disks)
8491       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8492       for disk in disks:
8493         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8494         msg = result.fail_msg
8495         if msg:
8496           logging.error("Migration was successful, but couldn't unmap the"
8497                         " block device %s on source node %s: %s",
8498                         disk.iv_name, source_node, msg)
8499           logging.error("You need to unmap the device %s manually on %s",
8500                         disk.iv_name, source_node)
8501
8502     self.feedback_fn("* done")
8503
8504   def _ExecFailover(self):
8505     """Failover an instance.
8506
8507     The failover is done by shutting it down on its present node and
8508     starting it on the secondary.
8509
8510     """
8511     instance = self.instance
8512     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8513
8514     source_node = instance.primary_node
8515     target_node = self.target_node
8516
8517     if instance.admin_state == constants.ADMINST_UP:
8518       self.feedback_fn("* checking disk consistency between source and target")
8519       for (idx, dev) in enumerate(instance.disks):
8520         # for drbd, these are drbd over lvm
8521         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8522                                      False):
8523           if primary_node.offline:
8524             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8525                              " target node %s" %
8526                              (primary_node.name, idx, target_node))
8527           elif not self.ignore_consistency:
8528             raise errors.OpExecError("Disk %s is degraded on target node,"
8529                                      " aborting failover" % idx)
8530     else:
8531       self.feedback_fn("* not checking disk consistency as instance is not"
8532                        " running")
8533
8534     self.feedback_fn("* shutting down instance on source node")
8535     logging.info("Shutting down instance %s on node %s",
8536                  instance.name, source_node)
8537
8538     result = self.rpc.call_instance_shutdown(source_node, instance,
8539                                              self.shutdown_timeout)
8540     msg = result.fail_msg
8541     if msg:
8542       if self.ignore_consistency or primary_node.offline:
8543         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8544                            " proceeding anyway; please make sure node"
8545                            " %s is down; error details: %s",
8546                            instance.name, source_node, source_node, msg)
8547       else:
8548         raise errors.OpExecError("Could not shutdown instance %s on"
8549                                  " node %s: %s" %
8550                                  (instance.name, source_node, msg))
8551
8552     self.feedback_fn("* deactivating the instance's disks on source node")
8553     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8554       raise errors.OpExecError("Can't shut down the instance's disks")
8555
8556     instance.primary_node = target_node
8557     # distribute new instance config to the other nodes
8558     self.cfg.Update(instance, self.feedback_fn)
8559
8560     # Only start the instance if it's marked as up
8561     if instance.admin_state == constants.ADMINST_UP:
8562       self.feedback_fn("* activating the instance's disks on target node %s" %
8563                        target_node)
8564       logging.info("Starting instance %s on node %s",
8565                    instance.name, target_node)
8566
8567       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8568                                            ignore_secondaries=True)
8569       if not disks_ok:
8570         _ShutdownInstanceDisks(self.lu, instance)
8571         raise errors.OpExecError("Can't activate the instance's disks")
8572
8573       self.feedback_fn("* starting the instance on the target node %s" %
8574                        target_node)
8575       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8576                                             False)
8577       msg = result.fail_msg
8578       if msg:
8579         _ShutdownInstanceDisks(self.lu, instance)
8580         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8581                                  (instance.name, target_node, msg))
8582
8583   def Exec(self, feedback_fn):
8584     """Perform the migration.
8585
8586     """
8587     self.feedback_fn = feedback_fn
8588     self.source_node = self.instance.primary_node
8589
8590     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8591     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8592       self.target_node = self.instance.secondary_nodes[0]
8593       # Otherwise self.target_node has been populated either
8594       # directly, or through an iallocator.
8595
8596     self.all_nodes = [self.source_node, self.target_node]
8597     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8598                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8599
8600     if self.failover:
8601       feedback_fn("Failover instance %s" % self.instance.name)
8602       self._ExecFailover()
8603     else:
8604       feedback_fn("Migrating instance %s" % self.instance.name)
8605
8606       if self.cleanup:
8607         return self._ExecCleanup()
8608       else:
8609         return self._ExecMigration()
8610
8611
8612 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8613                     force_open):
8614   """Wrapper around L{_CreateBlockDevInner}.
8615
8616   This method annotates the root device first.
8617
8618   """
8619   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8620   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8621                               force_open)
8622
8623
8624 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8625                          info, force_open):
8626   """Create a tree of block devices on a given node.
8627
8628   If this device type has to be created on secondaries, create it and
8629   all its children.
8630
8631   If not, just recurse to children keeping the same 'force' value.
8632
8633   @attention: The device has to be annotated already.
8634
8635   @param lu: the lu on whose behalf we execute
8636   @param node: the node on which to create the device
8637   @type instance: L{objects.Instance}
8638   @param instance: the instance which owns the device
8639   @type device: L{objects.Disk}
8640   @param device: the device to create
8641   @type force_create: boolean
8642   @param force_create: whether to force creation of this device; this
8643       will be change to True whenever we find a device which has
8644       CreateOnSecondary() attribute
8645   @param info: the extra 'metadata' we should attach to the device
8646       (this will be represented as a LVM tag)
8647   @type force_open: boolean
8648   @param force_open: this parameter will be passes to the
8649       L{backend.BlockdevCreate} function where it specifies
8650       whether we run on primary or not, and it affects both
8651       the child assembly and the device own Open() execution
8652
8653   """
8654   if device.CreateOnSecondary():
8655     force_create = True
8656
8657   if device.children:
8658     for child in device.children:
8659       _CreateBlockDevInner(lu, node, instance, child, force_create,
8660                            info, force_open)
8661
8662   if not force_create:
8663     return
8664
8665   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8666
8667
8668 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8669   """Create a single block device on a given node.
8670
8671   This will not recurse over children of the device, so they must be
8672   created in advance.
8673
8674   @param lu: the lu on whose behalf we execute
8675   @param node: the node on which to create the device
8676   @type instance: L{objects.Instance}
8677   @param instance: the instance which owns the device
8678   @type device: L{objects.Disk}
8679   @param device: the device to create
8680   @param info: the extra 'metadata' we should attach to the device
8681       (this will be represented as a LVM tag)
8682   @type force_open: boolean
8683   @param force_open: this parameter will be passes to the
8684       L{backend.BlockdevCreate} function where it specifies
8685       whether we run on primary or not, and it affects both
8686       the child assembly and the device own Open() execution
8687
8688   """
8689   lu.cfg.SetDiskID(device, node)
8690   result = lu.rpc.call_blockdev_create(node, device, device.size,
8691                                        instance.name, force_open, info)
8692   result.Raise("Can't create block device %s on"
8693                " node %s for instance %s" % (device, node, instance.name))
8694   if device.physical_id is None:
8695     device.physical_id = result.payload
8696
8697
8698 def _GenerateUniqueNames(lu, exts):
8699   """Generate a suitable LV name.
8700
8701   This will generate a logical volume name for the given instance.
8702
8703   """
8704   results = []
8705   for val in exts:
8706     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8707     results.append("%s%s" % (new_id, val))
8708   return results
8709
8710
8711 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8712                          iv_name, p_minor, s_minor):
8713   """Generate a drbd8 device complete with its children.
8714
8715   """
8716   assert len(vgnames) == len(names) == 2
8717   port = lu.cfg.AllocatePort()
8718   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8719
8720   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8721                           logical_id=(vgnames[0], names[0]),
8722                           params={})
8723   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8724                           logical_id=(vgnames[1], names[1]),
8725                           params={})
8726   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8727                           logical_id=(primary, secondary, port,
8728                                       p_minor, s_minor,
8729                                       shared_secret),
8730                           children=[dev_data, dev_meta],
8731                           iv_name=iv_name, params={})
8732   return drbd_dev
8733
8734
8735 _DISK_TEMPLATE_NAME_PREFIX = {
8736   constants.DT_PLAIN: "",
8737   constants.DT_RBD: ".rbd",
8738   }
8739
8740
8741 _DISK_TEMPLATE_DEVICE_TYPE = {
8742   constants.DT_PLAIN: constants.LD_LV,
8743   constants.DT_FILE: constants.LD_FILE,
8744   constants.DT_SHARED_FILE: constants.LD_FILE,
8745   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8746   constants.DT_RBD: constants.LD_RBD,
8747   }
8748
8749
8750 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8751     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8752     feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8753     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8754   """Generate the entire disk layout for a given template type.
8755
8756   """
8757   #TODO: compute space requirements
8758
8759   vgname = lu.cfg.GetVGName()
8760   disk_count = len(disk_info)
8761   disks = []
8762
8763   if template_name == constants.DT_DISKLESS:
8764     pass
8765   elif template_name == constants.DT_DRBD8:
8766     if len(secondary_nodes) != 1:
8767       raise errors.ProgrammerError("Wrong template configuration")
8768     remote_node = secondary_nodes[0]
8769     minors = lu.cfg.AllocateDRBDMinor(
8770       [primary_node, remote_node] * len(disk_info), instance_name)
8771
8772     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8773                                                        full_disk_params)
8774     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8775
8776     names = []
8777     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8778                                                for i in range(disk_count)]):
8779       names.append(lv_prefix + "_data")
8780       names.append(lv_prefix + "_meta")
8781     for idx, disk in enumerate(disk_info):
8782       disk_index = idx + base_index
8783       data_vg = disk.get(constants.IDISK_VG, vgname)
8784       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8785       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8786                                       disk[constants.IDISK_SIZE],
8787                                       [data_vg, meta_vg],
8788                                       names[idx * 2:idx * 2 + 2],
8789                                       "disk/%d" % disk_index,
8790                                       minors[idx * 2], minors[idx * 2 + 1])
8791       disk_dev.mode = disk[constants.IDISK_MODE]
8792       disks.append(disk_dev)
8793   else:
8794     if secondary_nodes:
8795       raise errors.ProgrammerError("Wrong template configuration")
8796
8797     if template_name == constants.DT_FILE:
8798       _req_file_storage()
8799     elif template_name == constants.DT_SHARED_FILE:
8800       _req_shr_file_storage()
8801
8802     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8803     if name_prefix is None:
8804       names = None
8805     else:
8806       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8807                                         (name_prefix, base_index + i)
8808                                         for i in range(disk_count)])
8809
8810     if template_name == constants.DT_PLAIN:
8811       def logical_id_fn(idx, _, disk):
8812         vg = disk.get(constants.IDISK_VG, vgname)
8813         return (vg, names[idx])
8814     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8815       logical_id_fn = \
8816         lambda _, disk_index, disk: (file_driver,
8817                                      "%s/disk%d" % (file_storage_dir,
8818                                                     disk_index))
8819     elif template_name == constants.DT_BLOCK:
8820       logical_id_fn = \
8821         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8822                                        disk[constants.IDISK_ADOPT])
8823     elif template_name == constants.DT_RBD:
8824       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8825     else:
8826       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8827
8828     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8829
8830     for idx, disk in enumerate(disk_info):
8831       disk_index = idx + base_index
8832       size = disk[constants.IDISK_SIZE]
8833       feedback_fn("* disk %s, size %s" %
8834                   (disk_index, utils.FormatUnit(size, "h")))
8835       disks.append(objects.Disk(dev_type=dev_type, size=size,
8836                                 logical_id=logical_id_fn(idx, disk_index, disk),
8837                                 iv_name="disk/%d" % disk_index,
8838                                 mode=disk[constants.IDISK_MODE],
8839                                 params={}))
8840
8841   return disks
8842
8843
8844 def _GetInstanceInfoText(instance):
8845   """Compute that text that should be added to the disk's metadata.
8846
8847   """
8848   return "originstname+%s" % instance.name
8849
8850
8851 def _CalcEta(time_taken, written, total_size):
8852   """Calculates the ETA based on size written and total size.
8853
8854   @param time_taken: The time taken so far
8855   @param written: amount written so far
8856   @param total_size: The total size of data to be written
8857   @return: The remaining time in seconds
8858
8859   """
8860   avg_time = time_taken / float(written)
8861   return (total_size - written) * avg_time
8862
8863
8864 def _WipeDisks(lu, instance):
8865   """Wipes instance disks.
8866
8867   @type lu: L{LogicalUnit}
8868   @param lu: the logical unit on whose behalf we execute
8869   @type instance: L{objects.Instance}
8870   @param instance: the instance whose disks we should create
8871   @return: the success of the wipe
8872
8873   """
8874   node = instance.primary_node
8875
8876   for device in instance.disks:
8877     lu.cfg.SetDiskID(device, node)
8878
8879   logging.info("Pause sync of instance %s disks", instance.name)
8880   result = lu.rpc.call_blockdev_pause_resume_sync(node,
8881                                                   (instance.disks, instance),
8882                                                   True)
8883
8884   for idx, success in enumerate(result.payload):
8885     if not success:
8886       logging.warn("pause-sync of instance %s for disks %d failed",
8887                    instance.name, idx)
8888
8889   try:
8890     for idx, device in enumerate(instance.disks):
8891       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8892       # MAX_WIPE_CHUNK at max
8893       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8894                             constants.MIN_WIPE_CHUNK_PERCENT)
8895       # we _must_ make this an int, otherwise rounding errors will
8896       # occur
8897       wipe_chunk_size = int(wipe_chunk_size)
8898
8899       lu.LogInfo("* Wiping disk %d", idx)
8900       logging.info("Wiping disk %d for instance %s, node %s using"
8901                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8902
8903       offset = 0
8904       size = device.size
8905       last_output = 0
8906       start_time = time.time()
8907
8908       while offset < size:
8909         wipe_size = min(wipe_chunk_size, size - offset)
8910         logging.debug("Wiping disk %d, offset %s, chunk %s",
8911                       idx, offset, wipe_size)
8912         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8913                                            wipe_size)
8914         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8915                      (idx, offset, wipe_size))
8916         now = time.time()
8917         offset += wipe_size
8918         if now - last_output >= 60:
8919           eta = _CalcEta(now - start_time, offset, size)
8920           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8921                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8922           last_output = now
8923   finally:
8924     logging.info("Resume sync of instance %s disks", instance.name)
8925
8926     result = lu.rpc.call_blockdev_pause_resume_sync(node,
8927                                                     (instance.disks, instance),
8928                                                     False)
8929
8930     for idx, success in enumerate(result.payload):
8931       if not success:
8932         lu.LogWarning("Resume sync of disk %d failed, please have a"
8933                       " look at the status and troubleshoot the issue", idx)
8934         logging.warn("resume-sync of instance %s for disks %d failed",
8935                      instance.name, idx)
8936
8937
8938 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8939   """Create all disks for an instance.
8940
8941   This abstracts away some work from AddInstance.
8942
8943   @type lu: L{LogicalUnit}
8944   @param lu: the logical unit on whose behalf we execute
8945   @type instance: L{objects.Instance}
8946   @param instance: the instance whose disks we should create
8947   @type to_skip: list
8948   @param to_skip: list of indices to skip
8949   @type target_node: string
8950   @param target_node: if passed, overrides the target node for creation
8951   @rtype: boolean
8952   @return: the success of the creation
8953
8954   """
8955   info = _GetInstanceInfoText(instance)
8956   if target_node is None:
8957     pnode = instance.primary_node
8958     all_nodes = instance.all_nodes
8959   else:
8960     pnode = target_node
8961     all_nodes = [pnode]
8962
8963   if instance.disk_template in constants.DTS_FILEBASED:
8964     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8965     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8966
8967     result.Raise("Failed to create directory '%s' on"
8968                  " node %s" % (file_storage_dir, pnode))
8969
8970   # Note: this needs to be kept in sync with adding of disks in
8971   # LUInstanceSetParams
8972   for idx, device in enumerate(instance.disks):
8973     if to_skip and idx in to_skip:
8974       continue
8975     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8976     #HARDCODE
8977     for node in all_nodes:
8978       f_create = node == pnode
8979       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8980
8981
8982 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8983   """Remove all disks for an instance.
8984
8985   This abstracts away some work from `AddInstance()` and
8986   `RemoveInstance()`. Note that in case some of the devices couldn't
8987   be removed, the removal will continue with the other ones (compare
8988   with `_CreateDisks()`).
8989
8990   @type lu: L{LogicalUnit}
8991   @param lu: the logical unit on whose behalf we execute
8992   @type instance: L{objects.Instance}
8993   @param instance: the instance whose disks we should remove
8994   @type target_node: string
8995   @param target_node: used to override the node on which to remove the disks
8996   @rtype: boolean
8997   @return: the success of the removal
8998
8999   """
9000   logging.info("Removing block devices for instance %s", instance.name)
9001
9002   all_result = True
9003   ports_to_release = set()
9004   for (idx, device) in enumerate(instance.disks):
9005     if target_node:
9006       edata = [(target_node, device)]
9007     else:
9008       edata = device.ComputeNodeTree(instance.primary_node)
9009     for node, disk in edata:
9010       lu.cfg.SetDiskID(disk, node)
9011       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
9012       if msg:
9013         lu.LogWarning("Could not remove disk %s on node %s,"
9014                       " continuing anyway: %s", idx, node, msg)
9015         all_result = False
9016
9017     # if this is a DRBD disk, return its port to the pool
9018     if device.dev_type in constants.LDS_DRBD:
9019       ports_to_release.add(device.logical_id[2])
9020
9021   if all_result or ignore_failures:
9022     for port in ports_to_release:
9023       lu.cfg.AddTcpUdpPort(port)
9024
9025   if instance.disk_template == constants.DT_FILE:
9026     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9027     if target_node:
9028       tgt = target_node
9029     else:
9030       tgt = instance.primary_node
9031     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9032     if result.fail_msg:
9033       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9034                     file_storage_dir, instance.primary_node, result.fail_msg)
9035       all_result = False
9036
9037   return all_result
9038
9039
9040 def _ComputeDiskSizePerVG(disk_template, disks):
9041   """Compute disk size requirements in the volume group
9042
9043   """
9044   def _compute(disks, payload):
9045     """Universal algorithm.
9046
9047     """
9048     vgs = {}
9049     for disk in disks:
9050       vgs[disk[constants.IDISK_VG]] = \
9051         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9052
9053     return vgs
9054
9055   # Required free disk space as a function of disk and swap space
9056   req_size_dict = {
9057     constants.DT_DISKLESS: {},
9058     constants.DT_PLAIN: _compute(disks, 0),
9059     # 128 MB are added for drbd metadata for each disk
9060     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9061     constants.DT_FILE: {},
9062     constants.DT_SHARED_FILE: {},
9063   }
9064
9065   if disk_template not in req_size_dict:
9066     raise errors.ProgrammerError("Disk template '%s' size requirement"
9067                                  " is unknown" % disk_template)
9068
9069   return req_size_dict[disk_template]
9070
9071
9072 def _ComputeDiskSize(disk_template, disks):
9073   """Compute disk size requirements in the volume group
9074
9075   """
9076   # Required free disk space as a function of disk and swap space
9077   req_size_dict = {
9078     constants.DT_DISKLESS: None,
9079     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9080     # 128 MB are added for drbd metadata for each disk
9081     constants.DT_DRBD8:
9082       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9083     constants.DT_FILE: None,
9084     constants.DT_SHARED_FILE: 0,
9085     constants.DT_BLOCK: 0,
9086     constants.DT_RBD: 0,
9087   }
9088
9089   if disk_template not in req_size_dict:
9090     raise errors.ProgrammerError("Disk template '%s' size requirement"
9091                                  " is unknown" % disk_template)
9092
9093   return req_size_dict[disk_template]
9094
9095
9096 def _FilterVmNodes(lu, nodenames):
9097   """Filters out non-vm_capable nodes from a list.
9098
9099   @type lu: L{LogicalUnit}
9100   @param lu: the logical unit for which we check
9101   @type nodenames: list
9102   @param nodenames: the list of nodes on which we should check
9103   @rtype: list
9104   @return: the list of vm-capable nodes
9105
9106   """
9107   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9108   return [name for name in nodenames if name not in vm_nodes]
9109
9110
9111 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9112   """Hypervisor parameter validation.
9113
9114   This function abstract the hypervisor parameter validation to be
9115   used in both instance create and instance modify.
9116
9117   @type lu: L{LogicalUnit}
9118   @param lu: the logical unit for which we check
9119   @type nodenames: list
9120   @param nodenames: the list of nodes on which we should check
9121   @type hvname: string
9122   @param hvname: the name of the hypervisor we should use
9123   @type hvparams: dict
9124   @param hvparams: the parameters which we need to check
9125   @raise errors.OpPrereqError: if the parameters are not valid
9126
9127   """
9128   nodenames = _FilterVmNodes(lu, nodenames)
9129
9130   cluster = lu.cfg.GetClusterInfo()
9131   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9132
9133   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9134   for node in nodenames:
9135     info = hvinfo[node]
9136     if info.offline:
9137       continue
9138     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9139
9140
9141 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9142   """OS parameters validation.
9143
9144   @type lu: L{LogicalUnit}
9145   @param lu: the logical unit for which we check
9146   @type required: boolean
9147   @param required: whether the validation should fail if the OS is not
9148       found
9149   @type nodenames: list
9150   @param nodenames: the list of nodes on which we should check
9151   @type osname: string
9152   @param osname: the name of the hypervisor we should use
9153   @type osparams: dict
9154   @param osparams: the parameters which we need to check
9155   @raise errors.OpPrereqError: if the parameters are not valid
9156
9157   """
9158   nodenames = _FilterVmNodes(lu, nodenames)
9159   result = lu.rpc.call_os_validate(nodenames, required, osname,
9160                                    [constants.OS_VALIDATE_PARAMETERS],
9161                                    osparams)
9162   for node, nres in result.items():
9163     # we don't check for offline cases since this should be run only
9164     # against the master node and/or an instance's nodes
9165     nres.Raise("OS Parameters validation failed on node %s" % node)
9166     if not nres.payload:
9167       lu.LogInfo("OS %s not found on node %s, validation skipped",
9168                  osname, node)
9169
9170
9171 class LUInstanceCreate(LogicalUnit):
9172   """Create an instance.
9173
9174   """
9175   HPATH = "instance-add"
9176   HTYPE = constants.HTYPE_INSTANCE
9177   REQ_BGL = False
9178
9179   def CheckArguments(self):
9180     """Check arguments.
9181
9182     """
9183     # do not require name_check to ease forward/backward compatibility
9184     # for tools
9185     if self.op.no_install and self.op.start:
9186       self.LogInfo("No-installation mode selected, disabling startup")
9187       self.op.start = False
9188     # validate/normalize the instance name
9189     self.op.instance_name = \
9190       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9191
9192     if self.op.ip_check and not self.op.name_check:
9193       # TODO: make the ip check more flexible and not depend on the name check
9194       raise errors.OpPrereqError("Cannot do IP address check without a name"
9195                                  " check", errors.ECODE_INVAL)
9196
9197     # check nics' parameter names
9198     for nic in self.op.nics:
9199       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9200
9201     # check disks. parameter names and consistent adopt/no-adopt strategy
9202     has_adopt = has_no_adopt = False
9203     for disk in self.op.disks:
9204       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9205       if constants.IDISK_ADOPT in disk:
9206         has_adopt = True
9207       else:
9208         has_no_adopt = True
9209     if has_adopt and has_no_adopt:
9210       raise errors.OpPrereqError("Either all disks are adopted or none is",
9211                                  errors.ECODE_INVAL)
9212     if has_adopt:
9213       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9214         raise errors.OpPrereqError("Disk adoption is not supported for the"
9215                                    " '%s' disk template" %
9216                                    self.op.disk_template,
9217                                    errors.ECODE_INVAL)
9218       if self.op.iallocator is not None:
9219         raise errors.OpPrereqError("Disk adoption not allowed with an"
9220                                    " iallocator script", errors.ECODE_INVAL)
9221       if self.op.mode == constants.INSTANCE_IMPORT:
9222         raise errors.OpPrereqError("Disk adoption not allowed for"
9223                                    " instance import", errors.ECODE_INVAL)
9224     else:
9225       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9226         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9227                                    " but no 'adopt' parameter given" %
9228                                    self.op.disk_template,
9229                                    errors.ECODE_INVAL)
9230
9231     self.adopt_disks = has_adopt
9232
9233     # instance name verification
9234     if self.op.name_check:
9235       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9236       self.op.instance_name = self.hostname1.name
9237       # used in CheckPrereq for ip ping check
9238       self.check_ip = self.hostname1.ip
9239     else:
9240       self.check_ip = None
9241
9242     # file storage checks
9243     if (self.op.file_driver and
9244         not self.op.file_driver in constants.FILE_DRIVER):
9245       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9246                                  self.op.file_driver, errors.ECODE_INVAL)
9247
9248     if self.op.disk_template == constants.DT_FILE:
9249       opcodes.RequireFileStorage()
9250     elif self.op.disk_template == constants.DT_SHARED_FILE:
9251       opcodes.RequireSharedFileStorage()
9252
9253     ### Node/iallocator related checks
9254     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9255
9256     if self.op.pnode is not None:
9257       if self.op.disk_template in constants.DTS_INT_MIRROR:
9258         if self.op.snode is None:
9259           raise errors.OpPrereqError("The networked disk templates need"
9260                                      " a mirror node", errors.ECODE_INVAL)
9261       elif self.op.snode:
9262         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9263                         " template")
9264         self.op.snode = None
9265
9266     self._cds = _GetClusterDomainSecret()
9267
9268     if self.op.mode == constants.INSTANCE_IMPORT:
9269       # On import force_variant must be True, because if we forced it at
9270       # initial install, our only chance when importing it back is that it
9271       # works again!
9272       self.op.force_variant = True
9273
9274       if self.op.no_install:
9275         self.LogInfo("No-installation mode has no effect during import")
9276
9277     elif self.op.mode == constants.INSTANCE_CREATE:
9278       if self.op.os_type is None:
9279         raise errors.OpPrereqError("No guest OS specified",
9280                                    errors.ECODE_INVAL)
9281       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9282         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9283                                    " installation" % self.op.os_type,
9284                                    errors.ECODE_STATE)
9285       if self.op.disk_template is None:
9286         raise errors.OpPrereqError("No disk template specified",
9287                                    errors.ECODE_INVAL)
9288
9289     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9290       # Check handshake to ensure both clusters have the same domain secret
9291       src_handshake = self.op.source_handshake
9292       if not src_handshake:
9293         raise errors.OpPrereqError("Missing source handshake",
9294                                    errors.ECODE_INVAL)
9295
9296       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9297                                                            src_handshake)
9298       if errmsg:
9299         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9300                                    errors.ECODE_INVAL)
9301
9302       # Load and check source CA
9303       self.source_x509_ca_pem = self.op.source_x509_ca
9304       if not self.source_x509_ca_pem:
9305         raise errors.OpPrereqError("Missing source X509 CA",
9306                                    errors.ECODE_INVAL)
9307
9308       try:
9309         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9310                                                     self._cds)
9311       except OpenSSL.crypto.Error, err:
9312         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9313                                    (err, ), errors.ECODE_INVAL)
9314
9315       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9316       if errcode is not None:
9317         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9318                                    errors.ECODE_INVAL)
9319
9320       self.source_x509_ca = cert
9321
9322       src_instance_name = self.op.source_instance_name
9323       if not src_instance_name:
9324         raise errors.OpPrereqError("Missing source instance name",
9325                                    errors.ECODE_INVAL)
9326
9327       self.source_instance_name = \
9328           netutils.GetHostname(name=src_instance_name).name
9329
9330     else:
9331       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9332                                  self.op.mode, errors.ECODE_INVAL)
9333
9334   def ExpandNames(self):
9335     """ExpandNames for CreateInstance.
9336
9337     Figure out the right locks for instance creation.
9338
9339     """
9340     self.needed_locks = {}
9341
9342     instance_name = self.op.instance_name
9343     # this is just a preventive check, but someone might still add this
9344     # instance in the meantime, and creation will fail at lock-add time
9345     if instance_name in self.cfg.GetInstanceList():
9346       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9347                                  instance_name, errors.ECODE_EXISTS)
9348
9349     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9350
9351     if self.op.iallocator:
9352       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9353       # specifying a group on instance creation and then selecting nodes from
9354       # that group
9355       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9356       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9357     else:
9358       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9359       nodelist = [self.op.pnode]
9360       if self.op.snode is not None:
9361         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9362         nodelist.append(self.op.snode)
9363       self.needed_locks[locking.LEVEL_NODE] = nodelist
9364       # Lock resources of instance's primary and secondary nodes (copy to
9365       # prevent accidential modification)
9366       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9367
9368     # in case of import lock the source node too
9369     if self.op.mode == constants.INSTANCE_IMPORT:
9370       src_node = self.op.src_node
9371       src_path = self.op.src_path
9372
9373       if src_path is None:
9374         self.op.src_path = src_path = self.op.instance_name
9375
9376       if src_node is None:
9377         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9378         self.op.src_node = None
9379         if os.path.isabs(src_path):
9380           raise errors.OpPrereqError("Importing an instance from a path"
9381                                      " requires a source node option",
9382                                      errors.ECODE_INVAL)
9383       else:
9384         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9385         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9386           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9387         if not os.path.isabs(src_path):
9388           self.op.src_path = src_path = \
9389             utils.PathJoin(constants.EXPORT_DIR, src_path)
9390
9391   def _RunAllocator(self):
9392     """Run the allocator based on input opcode.
9393
9394     """
9395     nics = [n.ToDict() for n in self.nics]
9396     ial = IAllocator(self.cfg, self.rpc,
9397                      mode=constants.IALLOCATOR_MODE_ALLOC,
9398                      name=self.op.instance_name,
9399                      disk_template=self.op.disk_template,
9400                      tags=self.op.tags,
9401                      os=self.op.os_type,
9402                      vcpus=self.be_full[constants.BE_VCPUS],
9403                      memory=self.be_full[constants.BE_MAXMEM],
9404                      spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9405                      disks=self.disks,
9406                      nics=nics,
9407                      hypervisor=self.op.hypervisor,
9408                      )
9409
9410     ial.Run(self.op.iallocator)
9411
9412     if not ial.success:
9413       raise errors.OpPrereqError("Can't compute nodes using"
9414                                  " iallocator '%s': %s" %
9415                                  (self.op.iallocator, ial.info),
9416                                  errors.ECODE_NORES)
9417     if len(ial.result) != ial.required_nodes:
9418       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9419                                  " of nodes (%s), required %s" %
9420                                  (self.op.iallocator, len(ial.result),
9421                                   ial.required_nodes), errors.ECODE_FAULT)
9422     self.op.pnode = ial.result[0]
9423     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9424                  self.op.instance_name, self.op.iallocator,
9425                  utils.CommaJoin(ial.result))
9426     if ial.required_nodes == 2:
9427       self.op.snode = ial.result[1]
9428
9429   def BuildHooksEnv(self):
9430     """Build hooks env.
9431
9432     This runs on master, primary and secondary nodes of the instance.
9433
9434     """
9435     env = {
9436       "ADD_MODE": self.op.mode,
9437       }
9438     if self.op.mode == constants.INSTANCE_IMPORT:
9439       env["SRC_NODE"] = self.op.src_node
9440       env["SRC_PATH"] = self.op.src_path
9441       env["SRC_IMAGES"] = self.src_images
9442
9443     env.update(_BuildInstanceHookEnv(
9444       name=self.op.instance_name,
9445       primary_node=self.op.pnode,
9446       secondary_nodes=self.secondaries,
9447       status=self.op.start,
9448       os_type=self.op.os_type,
9449       minmem=self.be_full[constants.BE_MINMEM],
9450       maxmem=self.be_full[constants.BE_MAXMEM],
9451       vcpus=self.be_full[constants.BE_VCPUS],
9452       nics=_NICListToTuple(self, self.nics),
9453       disk_template=self.op.disk_template,
9454       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9455              for d in self.disks],
9456       bep=self.be_full,
9457       hvp=self.hv_full,
9458       hypervisor_name=self.op.hypervisor,
9459       tags=self.op.tags,
9460     ))
9461
9462     return env
9463
9464   def BuildHooksNodes(self):
9465     """Build hooks nodes.
9466
9467     """
9468     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9469     return nl, nl
9470
9471   def _ReadExportInfo(self):
9472     """Reads the export information from disk.
9473
9474     It will override the opcode source node and path with the actual
9475     information, if these two were not specified before.
9476
9477     @return: the export information
9478
9479     """
9480     assert self.op.mode == constants.INSTANCE_IMPORT
9481
9482     src_node = self.op.src_node
9483     src_path = self.op.src_path
9484
9485     if src_node is None:
9486       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9487       exp_list = self.rpc.call_export_list(locked_nodes)
9488       found = False
9489       for node in exp_list:
9490         if exp_list[node].fail_msg:
9491           continue
9492         if src_path in exp_list[node].payload:
9493           found = True
9494           self.op.src_node = src_node = node
9495           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9496                                                        src_path)
9497           break
9498       if not found:
9499         raise errors.OpPrereqError("No export found for relative path %s" %
9500                                     src_path, errors.ECODE_INVAL)
9501
9502     _CheckNodeOnline(self, src_node)
9503     result = self.rpc.call_export_info(src_node, src_path)
9504     result.Raise("No export or invalid export found in dir %s" % src_path)
9505
9506     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9507     if not export_info.has_section(constants.INISECT_EXP):
9508       raise errors.ProgrammerError("Corrupted export config",
9509                                    errors.ECODE_ENVIRON)
9510
9511     ei_version = export_info.get(constants.INISECT_EXP, "version")
9512     if (int(ei_version) != constants.EXPORT_VERSION):
9513       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9514                                  (ei_version, constants.EXPORT_VERSION),
9515                                  errors.ECODE_ENVIRON)
9516     return export_info
9517
9518   def _ReadExportParams(self, einfo):
9519     """Use export parameters as defaults.
9520
9521     In case the opcode doesn't specify (as in override) some instance
9522     parameters, then try to use them from the export information, if
9523     that declares them.
9524
9525     """
9526     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9527
9528     if self.op.disk_template is None:
9529       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9530         self.op.disk_template = einfo.get(constants.INISECT_INS,
9531                                           "disk_template")
9532         if self.op.disk_template not in constants.DISK_TEMPLATES:
9533           raise errors.OpPrereqError("Disk template specified in configuration"
9534                                      " file is not one of the allowed values:"
9535                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9536       else:
9537         raise errors.OpPrereqError("No disk template specified and the export"
9538                                    " is missing the disk_template information",
9539                                    errors.ECODE_INVAL)
9540
9541     if not self.op.disks:
9542       disks = []
9543       # TODO: import the disk iv_name too
9544       for idx in range(constants.MAX_DISKS):
9545         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9546           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9547           disks.append({constants.IDISK_SIZE: disk_sz})
9548       self.op.disks = disks
9549       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9550         raise errors.OpPrereqError("No disk info specified and the export"
9551                                    " is missing the disk information",
9552                                    errors.ECODE_INVAL)
9553
9554     if not self.op.nics:
9555       nics = []
9556       for idx in range(constants.MAX_NICS):
9557         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9558           ndict = {}
9559           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9560             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9561             ndict[name] = v
9562           nics.append(ndict)
9563         else:
9564           break
9565       self.op.nics = nics
9566
9567     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9568       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9569
9570     if (self.op.hypervisor is None and
9571         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9572       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9573
9574     if einfo.has_section(constants.INISECT_HYP):
9575       # use the export parameters but do not override the ones
9576       # specified by the user
9577       for name, value in einfo.items(constants.INISECT_HYP):
9578         if name not in self.op.hvparams:
9579           self.op.hvparams[name] = value
9580
9581     if einfo.has_section(constants.INISECT_BEP):
9582       # use the parameters, without overriding
9583       for name, value in einfo.items(constants.INISECT_BEP):
9584         if name not in self.op.beparams:
9585           self.op.beparams[name] = value
9586         # Compatibility for the old "memory" be param
9587         if name == constants.BE_MEMORY:
9588           if constants.BE_MAXMEM not in self.op.beparams:
9589             self.op.beparams[constants.BE_MAXMEM] = value
9590           if constants.BE_MINMEM not in self.op.beparams:
9591             self.op.beparams[constants.BE_MINMEM] = value
9592     else:
9593       # try to read the parameters old style, from the main section
9594       for name in constants.BES_PARAMETERS:
9595         if (name not in self.op.beparams and
9596             einfo.has_option(constants.INISECT_INS, name)):
9597           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9598
9599     if einfo.has_section(constants.INISECT_OSP):
9600       # use the parameters, without overriding
9601       for name, value in einfo.items(constants.INISECT_OSP):
9602         if name not in self.op.osparams:
9603           self.op.osparams[name] = value
9604
9605   def _RevertToDefaults(self, cluster):
9606     """Revert the instance parameters to the default values.
9607
9608     """
9609     # hvparams
9610     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9611     for name in self.op.hvparams.keys():
9612       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9613         del self.op.hvparams[name]
9614     # beparams
9615     be_defs = cluster.SimpleFillBE({})
9616     for name in self.op.beparams.keys():
9617       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9618         del self.op.beparams[name]
9619     # nic params
9620     nic_defs = cluster.SimpleFillNIC({})
9621     for nic in self.op.nics:
9622       for name in constants.NICS_PARAMETERS:
9623         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9624           del nic[name]
9625     # osparams
9626     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9627     for name in self.op.osparams.keys():
9628       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9629         del self.op.osparams[name]
9630
9631   def _CalculateFileStorageDir(self):
9632     """Calculate final instance file storage dir.
9633
9634     """
9635     # file storage dir calculation/check
9636     self.instance_file_storage_dir = None
9637     if self.op.disk_template in constants.DTS_FILEBASED:
9638       # build the full file storage dir path
9639       joinargs = []
9640
9641       if self.op.disk_template == constants.DT_SHARED_FILE:
9642         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9643       else:
9644         get_fsd_fn = self.cfg.GetFileStorageDir
9645
9646       cfg_storagedir = get_fsd_fn()
9647       if not cfg_storagedir:
9648         raise errors.OpPrereqError("Cluster file storage dir not defined")
9649       joinargs.append(cfg_storagedir)
9650
9651       if self.op.file_storage_dir is not None:
9652         joinargs.append(self.op.file_storage_dir)
9653
9654       joinargs.append(self.op.instance_name)
9655
9656       # pylint: disable=W0142
9657       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9658
9659   def CheckPrereq(self): # pylint: disable=R0914
9660     """Check prerequisites.
9661
9662     """
9663     self._CalculateFileStorageDir()
9664
9665     if self.op.mode == constants.INSTANCE_IMPORT:
9666       export_info = self._ReadExportInfo()
9667       self._ReadExportParams(export_info)
9668       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9669     else:
9670       self._old_instance_name = None
9671
9672     if (not self.cfg.GetVGName() and
9673         self.op.disk_template not in constants.DTS_NOT_LVM):
9674       raise errors.OpPrereqError("Cluster does not support lvm-based"
9675                                  " instances", errors.ECODE_STATE)
9676
9677     if (self.op.hypervisor is None or
9678         self.op.hypervisor == constants.VALUE_AUTO):
9679       self.op.hypervisor = self.cfg.GetHypervisorType()
9680
9681     cluster = self.cfg.GetClusterInfo()
9682     enabled_hvs = cluster.enabled_hypervisors
9683     if self.op.hypervisor not in enabled_hvs:
9684       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9685                                  " cluster (%s)" % (self.op.hypervisor,
9686                                   ",".join(enabled_hvs)),
9687                                  errors.ECODE_STATE)
9688
9689     # Check tag validity
9690     for tag in self.op.tags:
9691       objects.TaggableObject.ValidateTag(tag)
9692
9693     # check hypervisor parameter syntax (locally)
9694     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9695     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9696                                       self.op.hvparams)
9697     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9698     hv_type.CheckParameterSyntax(filled_hvp)
9699     self.hv_full = filled_hvp
9700     # check that we don't specify global parameters on an instance
9701     _CheckGlobalHvParams(self.op.hvparams)
9702
9703     # fill and remember the beparams dict
9704     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9705     for param, value in self.op.beparams.iteritems():
9706       if value == constants.VALUE_AUTO:
9707         self.op.beparams[param] = default_beparams[param]
9708     objects.UpgradeBeParams(self.op.beparams)
9709     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9710     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9711
9712     # build os parameters
9713     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9714
9715     # now that hvp/bep are in final format, let's reset to defaults,
9716     # if told to do so
9717     if self.op.identify_defaults:
9718       self._RevertToDefaults(cluster)
9719
9720     # NIC buildup
9721     self.nics = []
9722     for idx, nic in enumerate(self.op.nics):
9723       nic_mode_req = nic.get(constants.INIC_MODE, None)
9724       nic_mode = nic_mode_req
9725       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9726         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9727
9728       # in routed mode, for the first nic, the default ip is 'auto'
9729       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9730         default_ip_mode = constants.VALUE_AUTO
9731       else:
9732         default_ip_mode = constants.VALUE_NONE
9733
9734       # ip validity checks
9735       ip = nic.get(constants.INIC_IP, default_ip_mode)
9736       if ip is None or ip.lower() == constants.VALUE_NONE:
9737         nic_ip = None
9738       elif ip.lower() == constants.VALUE_AUTO:
9739         if not self.op.name_check:
9740           raise errors.OpPrereqError("IP address set to auto but name checks"
9741                                      " have been skipped",
9742                                      errors.ECODE_INVAL)
9743         nic_ip = self.hostname1.ip
9744       else:
9745         if not netutils.IPAddress.IsValid(ip):
9746           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9747                                      errors.ECODE_INVAL)
9748         nic_ip = ip
9749
9750       # TODO: check the ip address for uniqueness
9751       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9752         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9753                                    errors.ECODE_INVAL)
9754
9755       # MAC address verification
9756       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9757       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9758         mac = utils.NormalizeAndValidateMac(mac)
9759
9760         try:
9761           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9762         except errors.ReservationError:
9763           raise errors.OpPrereqError("MAC address %s already in use"
9764                                      " in cluster" % mac,
9765                                      errors.ECODE_NOTUNIQUE)
9766
9767       #  Build nic parameters
9768       link = nic.get(constants.INIC_LINK, None)
9769       if link == constants.VALUE_AUTO:
9770         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9771       nicparams = {}
9772       if nic_mode_req:
9773         nicparams[constants.NIC_MODE] = nic_mode
9774       if link:
9775         nicparams[constants.NIC_LINK] = link
9776
9777       check_params = cluster.SimpleFillNIC(nicparams)
9778       objects.NIC.CheckParameterSyntax(check_params)
9779       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9780
9781     # disk checks/pre-build
9782     default_vg = self.cfg.GetVGName()
9783     self.disks = []
9784     for disk in self.op.disks:
9785       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9786       if mode not in constants.DISK_ACCESS_SET:
9787         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9788                                    mode, errors.ECODE_INVAL)
9789       size = disk.get(constants.IDISK_SIZE, None)
9790       if size is None:
9791         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9792       try:
9793         size = int(size)
9794       except (TypeError, ValueError):
9795         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9796                                    errors.ECODE_INVAL)
9797
9798       data_vg = disk.get(constants.IDISK_VG, default_vg)
9799       new_disk = {
9800         constants.IDISK_SIZE: size,
9801         constants.IDISK_MODE: mode,
9802         constants.IDISK_VG: data_vg,
9803         }
9804       if constants.IDISK_METAVG in disk:
9805         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9806       if constants.IDISK_ADOPT in disk:
9807         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9808       self.disks.append(new_disk)
9809
9810     if self.op.mode == constants.INSTANCE_IMPORT:
9811       disk_images = []
9812       for idx in range(len(self.disks)):
9813         option = "disk%d_dump" % idx
9814         if export_info.has_option(constants.INISECT_INS, option):
9815           # FIXME: are the old os-es, disk sizes, etc. useful?
9816           export_name = export_info.get(constants.INISECT_INS, option)
9817           image = utils.PathJoin(self.op.src_path, export_name)
9818           disk_images.append(image)
9819         else:
9820           disk_images.append(False)
9821
9822       self.src_images = disk_images
9823
9824       if self.op.instance_name == self._old_instance_name:
9825         for idx, nic in enumerate(self.nics):
9826           if nic.mac == constants.VALUE_AUTO:
9827             nic_mac_ini = "nic%d_mac" % idx
9828             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9829
9830     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9831
9832     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9833     if self.op.ip_check:
9834       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9835         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9836                                    (self.check_ip, self.op.instance_name),
9837                                    errors.ECODE_NOTUNIQUE)
9838
9839     #### mac address generation
9840     # By generating here the mac address both the allocator and the hooks get
9841     # the real final mac address rather than the 'auto' or 'generate' value.
9842     # There is a race condition between the generation and the instance object
9843     # creation, which means that we know the mac is valid now, but we're not
9844     # sure it will be when we actually add the instance. If things go bad
9845     # adding the instance will abort because of a duplicate mac, and the
9846     # creation job will fail.
9847     for nic in self.nics:
9848       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9849         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9850
9851     #### allocator run
9852
9853     if self.op.iallocator is not None:
9854       self._RunAllocator()
9855
9856     # Release all unneeded node locks
9857     _ReleaseLocks(self, locking.LEVEL_NODE,
9858                   keep=filter(None, [self.op.pnode, self.op.snode,
9859                                      self.op.src_node]))
9860     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9861                   keep=filter(None, [self.op.pnode, self.op.snode,
9862                                      self.op.src_node]))
9863
9864     #### node related checks
9865
9866     # check primary node
9867     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9868     assert self.pnode is not None, \
9869       "Cannot retrieve locked node %s" % self.op.pnode
9870     if pnode.offline:
9871       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9872                                  pnode.name, errors.ECODE_STATE)
9873     if pnode.drained:
9874       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9875                                  pnode.name, errors.ECODE_STATE)
9876     if not pnode.vm_capable:
9877       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9878                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9879
9880     self.secondaries = []
9881
9882     # mirror node verification
9883     if self.op.disk_template in constants.DTS_INT_MIRROR:
9884       if self.op.snode == pnode.name:
9885         raise errors.OpPrereqError("The secondary node cannot be the"
9886                                    " primary node", errors.ECODE_INVAL)
9887       _CheckNodeOnline(self, self.op.snode)
9888       _CheckNodeNotDrained(self, self.op.snode)
9889       _CheckNodeVmCapable(self, self.op.snode)
9890       self.secondaries.append(self.op.snode)
9891
9892       snode = self.cfg.GetNodeInfo(self.op.snode)
9893       if pnode.group != snode.group:
9894         self.LogWarning("The primary and secondary nodes are in two"
9895                         " different node groups; the disk parameters"
9896                         " from the first disk's node group will be"
9897                         " used")
9898
9899     nodenames = [pnode.name] + self.secondaries
9900
9901     # Verify instance specs
9902     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9903     ispec = {
9904       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9905       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9906       constants.ISPEC_DISK_COUNT: len(self.disks),
9907       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9908       constants.ISPEC_NIC_COUNT: len(self.nics),
9909       constants.ISPEC_SPINDLE_USE: spindle_use,
9910       }
9911
9912     group_info = self.cfg.GetNodeGroup(pnode.group)
9913     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9914     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9915     if not self.op.ignore_ipolicy and res:
9916       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9917                                   " policy: %s") % (pnode.group,
9918                                                     utils.CommaJoin(res)),
9919                                   errors.ECODE_INVAL)
9920
9921     if not self.adopt_disks:
9922       if self.op.disk_template == constants.DT_RBD:
9923         # _CheckRADOSFreeSpace() is just a placeholder.
9924         # Any function that checks prerequisites can be placed here.
9925         # Check if there is enough space on the RADOS cluster.
9926         _CheckRADOSFreeSpace()
9927       else:
9928         # Check lv size requirements, if not adopting
9929         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9930         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9931
9932     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9933       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9934                                 disk[constants.IDISK_ADOPT])
9935                      for disk in self.disks])
9936       if len(all_lvs) != len(self.disks):
9937         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9938                                    errors.ECODE_INVAL)
9939       for lv_name in all_lvs:
9940         try:
9941           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9942           # to ReserveLV uses the same syntax
9943           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9944         except errors.ReservationError:
9945           raise errors.OpPrereqError("LV named %s used by another instance" %
9946                                      lv_name, errors.ECODE_NOTUNIQUE)
9947
9948       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9949       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9950
9951       node_lvs = self.rpc.call_lv_list([pnode.name],
9952                                        vg_names.payload.keys())[pnode.name]
9953       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9954       node_lvs = node_lvs.payload
9955
9956       delta = all_lvs.difference(node_lvs.keys())
9957       if delta:
9958         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9959                                    utils.CommaJoin(delta),
9960                                    errors.ECODE_INVAL)
9961       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9962       if online_lvs:
9963         raise errors.OpPrereqError("Online logical volumes found, cannot"
9964                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9965                                    errors.ECODE_STATE)
9966       # update the size of disk based on what is found
9967       for dsk in self.disks:
9968         dsk[constants.IDISK_SIZE] = \
9969           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9970                                         dsk[constants.IDISK_ADOPT])][0]))
9971
9972     elif self.op.disk_template == constants.DT_BLOCK:
9973       # Normalize and de-duplicate device paths
9974       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9975                        for disk in self.disks])
9976       if len(all_disks) != len(self.disks):
9977         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9978                                    errors.ECODE_INVAL)
9979       baddisks = [d for d in all_disks
9980                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9981       if baddisks:
9982         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9983                                    " cannot be adopted" %
9984                                    (", ".join(baddisks),
9985                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9986                                    errors.ECODE_INVAL)
9987
9988       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9989                                             list(all_disks))[pnode.name]
9990       node_disks.Raise("Cannot get block device information from node %s" %
9991                        pnode.name)
9992       node_disks = node_disks.payload
9993       delta = all_disks.difference(node_disks.keys())
9994       if delta:
9995         raise errors.OpPrereqError("Missing block device(s): %s" %
9996                                    utils.CommaJoin(delta),
9997                                    errors.ECODE_INVAL)
9998       for dsk in self.disks:
9999         dsk[constants.IDISK_SIZE] = \
10000           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10001
10002     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10003
10004     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10005     # check OS parameters (remotely)
10006     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10007
10008     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10009
10010     # memory check on primary node
10011     #TODO(dynmem): use MINMEM for checking
10012     if self.op.start:
10013       _CheckNodeFreeMemory(self, self.pnode.name,
10014                            "creating instance %s" % self.op.instance_name,
10015                            self.be_full[constants.BE_MAXMEM],
10016                            self.op.hypervisor)
10017
10018     self.dry_run_result = list(nodenames)
10019
10020   def Exec(self, feedback_fn):
10021     """Create and add the instance to the cluster.
10022
10023     """
10024     instance = self.op.instance_name
10025     pnode_name = self.pnode.name
10026
10027     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10028                 self.owned_locks(locking.LEVEL_NODE)), \
10029       "Node locks differ from node resource locks"
10030
10031     ht_kind = self.op.hypervisor
10032     if ht_kind in constants.HTS_REQ_PORT:
10033       network_port = self.cfg.AllocatePort()
10034     else:
10035       network_port = None
10036
10037     # This is ugly but we got a chicken-egg problem here
10038     # We can only take the group disk parameters, as the instance
10039     # has no disks yet (we are generating them right here).
10040     node = self.cfg.GetNodeInfo(pnode_name)
10041     nodegroup = self.cfg.GetNodeGroup(node.group)
10042     disks = _GenerateDiskTemplate(self,
10043                                   self.op.disk_template,
10044                                   instance, pnode_name,
10045                                   self.secondaries,
10046                                   self.disks,
10047                                   self.instance_file_storage_dir,
10048                                   self.op.file_driver,
10049                                   0,
10050                                   feedback_fn,
10051                                   self.cfg.GetGroupDiskParams(nodegroup))
10052
10053     iobj = objects.Instance(name=instance, os=self.op.os_type,
10054                             primary_node=pnode_name,
10055                             nics=self.nics, disks=disks,
10056                             disk_template=self.op.disk_template,
10057                             admin_state=constants.ADMINST_DOWN,
10058                             network_port=network_port,
10059                             beparams=self.op.beparams,
10060                             hvparams=self.op.hvparams,
10061                             hypervisor=self.op.hypervisor,
10062                             osparams=self.op.osparams,
10063                             )
10064
10065     if self.op.tags:
10066       for tag in self.op.tags:
10067         iobj.AddTag(tag)
10068
10069     if self.adopt_disks:
10070       if self.op.disk_template == constants.DT_PLAIN:
10071         # rename LVs to the newly-generated names; we need to construct
10072         # 'fake' LV disks with the old data, plus the new unique_id
10073         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10074         rename_to = []
10075         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10076           rename_to.append(t_dsk.logical_id)
10077           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10078           self.cfg.SetDiskID(t_dsk, pnode_name)
10079         result = self.rpc.call_blockdev_rename(pnode_name,
10080                                                zip(tmp_disks, rename_to))
10081         result.Raise("Failed to rename adoped LVs")
10082     else:
10083       feedback_fn("* creating instance disks...")
10084       try:
10085         _CreateDisks(self, iobj)
10086       except errors.OpExecError:
10087         self.LogWarning("Device creation failed, reverting...")
10088         try:
10089           _RemoveDisks(self, iobj)
10090         finally:
10091           self.cfg.ReleaseDRBDMinors(instance)
10092           raise
10093
10094     feedback_fn("adding instance %s to cluster config" % instance)
10095
10096     self.cfg.AddInstance(iobj, self.proc.GetECId())
10097
10098     # Declare that we don't want to remove the instance lock anymore, as we've
10099     # added the instance to the config
10100     del self.remove_locks[locking.LEVEL_INSTANCE]
10101
10102     if self.op.mode == constants.INSTANCE_IMPORT:
10103       # Release unused nodes
10104       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10105     else:
10106       # Release all nodes
10107       _ReleaseLocks(self, locking.LEVEL_NODE)
10108
10109     disk_abort = False
10110     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10111       feedback_fn("* wiping instance disks...")
10112       try:
10113         _WipeDisks(self, iobj)
10114       except errors.OpExecError, err:
10115         logging.exception("Wiping disks failed")
10116         self.LogWarning("Wiping instance disks failed (%s)", err)
10117         disk_abort = True
10118
10119     if disk_abort:
10120       # Something is already wrong with the disks, don't do anything else
10121       pass
10122     elif self.op.wait_for_sync:
10123       disk_abort = not _WaitForSync(self, iobj)
10124     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10125       # make sure the disks are not degraded (still sync-ing is ok)
10126       feedback_fn("* checking mirrors status")
10127       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10128     else:
10129       disk_abort = False
10130
10131     if disk_abort:
10132       _RemoveDisks(self, iobj)
10133       self.cfg.RemoveInstance(iobj.name)
10134       # Make sure the instance lock gets removed
10135       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10136       raise errors.OpExecError("There are some degraded disks for"
10137                                " this instance")
10138
10139     # Release all node resource locks
10140     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10141
10142     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10143       if self.op.mode == constants.INSTANCE_CREATE:
10144         if not self.op.no_install:
10145           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10146                         not self.op.wait_for_sync)
10147           if pause_sync:
10148             feedback_fn("* pausing disk sync to install instance OS")
10149             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10150                                                               (iobj.disks,
10151                                                                iobj), True)
10152             for idx, success in enumerate(result.payload):
10153               if not success:
10154                 logging.warn("pause-sync of instance %s for disk %d failed",
10155                              instance, idx)
10156
10157           feedback_fn("* running the instance OS create scripts...")
10158           # FIXME: pass debug option from opcode to backend
10159           os_add_result = \
10160             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10161                                           self.op.debug_level)
10162           if pause_sync:
10163             feedback_fn("* resuming disk sync")
10164             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10165                                                               (iobj.disks,
10166                                                                iobj), False)
10167             for idx, success in enumerate(result.payload):
10168               if not success:
10169                 logging.warn("resume-sync of instance %s for disk %d failed",
10170                              instance, idx)
10171
10172           os_add_result.Raise("Could not add os for instance %s"
10173                               " on node %s" % (instance, pnode_name))
10174
10175       else:
10176         if self.op.mode == constants.INSTANCE_IMPORT:
10177           feedback_fn("* running the instance OS import scripts...")
10178
10179           transfers = []
10180
10181           for idx, image in enumerate(self.src_images):
10182             if not image:
10183               continue
10184
10185             # FIXME: pass debug option from opcode to backend
10186             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10187                                                constants.IEIO_FILE, (image, ),
10188                                                constants.IEIO_SCRIPT,
10189                                                (iobj.disks[idx], idx),
10190                                                None)
10191             transfers.append(dt)
10192
10193           import_result = \
10194             masterd.instance.TransferInstanceData(self, feedback_fn,
10195                                                   self.op.src_node, pnode_name,
10196                                                   self.pnode.secondary_ip,
10197                                                   iobj, transfers)
10198           if not compat.all(import_result):
10199             self.LogWarning("Some disks for instance %s on node %s were not"
10200                             " imported successfully" % (instance, pnode_name))
10201
10202           rename_from = self._old_instance_name
10203
10204         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10205           feedback_fn("* preparing remote import...")
10206           # The source cluster will stop the instance before attempting to make
10207           # a connection. In some cases stopping an instance can take a long
10208           # time, hence the shutdown timeout is added to the connection
10209           # timeout.
10210           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10211                              self.op.source_shutdown_timeout)
10212           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10213
10214           assert iobj.primary_node == self.pnode.name
10215           disk_results = \
10216             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10217                                           self.source_x509_ca,
10218                                           self._cds, timeouts)
10219           if not compat.all(disk_results):
10220             # TODO: Should the instance still be started, even if some disks
10221             # failed to import (valid for local imports, too)?
10222             self.LogWarning("Some disks for instance %s on node %s were not"
10223                             " imported successfully" % (instance, pnode_name))
10224
10225           rename_from = self.source_instance_name
10226
10227         else:
10228           # also checked in the prereq part
10229           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10230                                        % self.op.mode)
10231
10232         # Run rename script on newly imported instance
10233         assert iobj.name == instance
10234         feedback_fn("Running rename script for %s" % instance)
10235         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10236                                                    rename_from,
10237                                                    self.op.debug_level)
10238         if result.fail_msg:
10239           self.LogWarning("Failed to run rename script for %s on node"
10240                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10241
10242     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10243
10244     if self.op.start:
10245       iobj.admin_state = constants.ADMINST_UP
10246       self.cfg.Update(iobj, feedback_fn)
10247       logging.info("Starting instance %s on node %s", instance, pnode_name)
10248       feedback_fn("* starting instance...")
10249       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10250                                             False)
10251       result.Raise("Could not start instance")
10252
10253     return list(iobj.all_nodes)
10254
10255
10256 def _CheckRADOSFreeSpace():
10257   """Compute disk size requirements inside the RADOS cluster.
10258
10259   """
10260   # For the RADOS cluster we assume there is always enough space.
10261   pass
10262
10263
10264 class LUInstanceConsole(NoHooksLU):
10265   """Connect to an instance's console.
10266
10267   This is somewhat special in that it returns the command line that
10268   you need to run on the master node in order to connect to the
10269   console.
10270
10271   """
10272   REQ_BGL = False
10273
10274   def ExpandNames(self):
10275     self.share_locks = _ShareAll()
10276     self._ExpandAndLockInstance()
10277
10278   def CheckPrereq(self):
10279     """Check prerequisites.
10280
10281     This checks that the instance is in the cluster.
10282
10283     """
10284     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10285     assert self.instance is not None, \
10286       "Cannot retrieve locked instance %s" % self.op.instance_name
10287     _CheckNodeOnline(self, self.instance.primary_node)
10288
10289   def Exec(self, feedback_fn):
10290     """Connect to the console of an instance
10291
10292     """
10293     instance = self.instance
10294     node = instance.primary_node
10295
10296     node_insts = self.rpc.call_instance_list([node],
10297                                              [instance.hypervisor])[node]
10298     node_insts.Raise("Can't get node information from %s" % node)
10299
10300     if instance.name not in node_insts.payload:
10301       if instance.admin_state == constants.ADMINST_UP:
10302         state = constants.INSTST_ERRORDOWN
10303       elif instance.admin_state == constants.ADMINST_DOWN:
10304         state = constants.INSTST_ADMINDOWN
10305       else:
10306         state = constants.INSTST_ADMINOFFLINE
10307       raise errors.OpExecError("Instance %s is not running (state %s)" %
10308                                (instance.name, state))
10309
10310     logging.debug("Connecting to console of %s on %s", instance.name, node)
10311
10312     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10313
10314
10315 def _GetInstanceConsole(cluster, instance):
10316   """Returns console information for an instance.
10317
10318   @type cluster: L{objects.Cluster}
10319   @type instance: L{objects.Instance}
10320   @rtype: dict
10321
10322   """
10323   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10324   # beparams and hvparams are passed separately, to avoid editing the
10325   # instance and then saving the defaults in the instance itself.
10326   hvparams = cluster.FillHV(instance)
10327   beparams = cluster.FillBE(instance)
10328   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10329
10330   assert console.instance == instance.name
10331   assert console.Validate()
10332
10333   return console.ToDict()
10334
10335
10336 class LUInstanceReplaceDisks(LogicalUnit):
10337   """Replace the disks of an instance.
10338
10339   """
10340   HPATH = "mirrors-replace"
10341   HTYPE = constants.HTYPE_INSTANCE
10342   REQ_BGL = False
10343
10344   def CheckArguments(self):
10345     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10346                                   self.op.iallocator)
10347
10348   def ExpandNames(self):
10349     self._ExpandAndLockInstance()
10350
10351     assert locking.LEVEL_NODE not in self.needed_locks
10352     assert locking.LEVEL_NODE_RES not in self.needed_locks
10353     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10354
10355     assert self.op.iallocator is None or self.op.remote_node is None, \
10356       "Conflicting options"
10357
10358     if self.op.remote_node is not None:
10359       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10360
10361       # Warning: do not remove the locking of the new secondary here
10362       # unless DRBD8.AddChildren is changed to work in parallel;
10363       # currently it doesn't since parallel invocations of
10364       # FindUnusedMinor will conflict
10365       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10366       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10367     else:
10368       self.needed_locks[locking.LEVEL_NODE] = []
10369       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10370
10371       if self.op.iallocator is not None:
10372         # iallocator will select a new node in the same group
10373         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10374
10375     self.needed_locks[locking.LEVEL_NODE_RES] = []
10376
10377     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10378                                    self.op.iallocator, self.op.remote_node,
10379                                    self.op.disks, False, self.op.early_release,
10380                                    self.op.ignore_ipolicy)
10381
10382     self.tasklets = [self.replacer]
10383
10384   def DeclareLocks(self, level):
10385     if level == locking.LEVEL_NODEGROUP:
10386       assert self.op.remote_node is None
10387       assert self.op.iallocator is not None
10388       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10389
10390       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10391       # Lock all groups used by instance optimistically; this requires going
10392       # via the node before it's locked, requiring verification later on
10393       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10394         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10395
10396     elif level == locking.LEVEL_NODE:
10397       if self.op.iallocator is not None:
10398         assert self.op.remote_node is None
10399         assert not self.needed_locks[locking.LEVEL_NODE]
10400
10401         # Lock member nodes of all locked groups
10402         self.needed_locks[locking.LEVEL_NODE] = [node_name
10403           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10404           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10405       else:
10406         self._LockInstancesNodes()
10407     elif level == locking.LEVEL_NODE_RES:
10408       # Reuse node locks
10409       self.needed_locks[locking.LEVEL_NODE_RES] = \
10410         self.needed_locks[locking.LEVEL_NODE]
10411
10412   def BuildHooksEnv(self):
10413     """Build hooks env.
10414
10415     This runs on the master, the primary and all the secondaries.
10416
10417     """
10418     instance = self.replacer.instance
10419     env = {
10420       "MODE": self.op.mode,
10421       "NEW_SECONDARY": self.op.remote_node,
10422       "OLD_SECONDARY": instance.secondary_nodes[0],
10423       }
10424     env.update(_BuildInstanceHookEnvByObject(self, instance))
10425     return env
10426
10427   def BuildHooksNodes(self):
10428     """Build hooks nodes.
10429
10430     """
10431     instance = self.replacer.instance
10432     nl = [
10433       self.cfg.GetMasterNode(),
10434       instance.primary_node,
10435       ]
10436     if self.op.remote_node is not None:
10437       nl.append(self.op.remote_node)
10438     return nl, nl
10439
10440   def CheckPrereq(self):
10441     """Check prerequisites.
10442
10443     """
10444     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10445             self.op.iallocator is None)
10446
10447     # Verify if node group locks are still correct
10448     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10449     if owned_groups:
10450       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10451
10452     return LogicalUnit.CheckPrereq(self)
10453
10454
10455 class TLReplaceDisks(Tasklet):
10456   """Replaces disks for an instance.
10457
10458   Note: Locking is not within the scope of this class.
10459
10460   """
10461   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10462                disks, delay_iallocator, early_release, ignore_ipolicy):
10463     """Initializes this class.
10464
10465     """
10466     Tasklet.__init__(self, lu)
10467
10468     # Parameters
10469     self.instance_name = instance_name
10470     self.mode = mode
10471     self.iallocator_name = iallocator_name
10472     self.remote_node = remote_node
10473     self.disks = disks
10474     self.delay_iallocator = delay_iallocator
10475     self.early_release = early_release
10476     self.ignore_ipolicy = ignore_ipolicy
10477
10478     # Runtime data
10479     self.instance = None
10480     self.new_node = None
10481     self.target_node = None
10482     self.other_node = None
10483     self.remote_node_info = None
10484     self.node_secondary_ip = None
10485
10486   @staticmethod
10487   def CheckArguments(mode, remote_node, iallocator):
10488     """Helper function for users of this class.
10489
10490     """
10491     # check for valid parameter combination
10492     if mode == constants.REPLACE_DISK_CHG:
10493       if remote_node is None and iallocator is None:
10494         raise errors.OpPrereqError("When changing the secondary either an"
10495                                    " iallocator script must be used or the"
10496                                    " new node given", errors.ECODE_INVAL)
10497
10498       if remote_node is not None and iallocator is not None:
10499         raise errors.OpPrereqError("Give either the iallocator or the new"
10500                                    " secondary, not both", errors.ECODE_INVAL)
10501
10502     elif remote_node is not None or iallocator is not None:
10503       # Not replacing the secondary
10504       raise errors.OpPrereqError("The iallocator and new node options can"
10505                                  " only be used when changing the"
10506                                  " secondary node", errors.ECODE_INVAL)
10507
10508   @staticmethod
10509   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10510     """Compute a new secondary node using an IAllocator.
10511
10512     """
10513     ial = IAllocator(lu.cfg, lu.rpc,
10514                      mode=constants.IALLOCATOR_MODE_RELOC,
10515                      name=instance_name,
10516                      relocate_from=list(relocate_from))
10517
10518     ial.Run(iallocator_name)
10519
10520     if not ial.success:
10521       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10522                                  " %s" % (iallocator_name, ial.info),
10523                                  errors.ECODE_NORES)
10524
10525     if len(ial.result) != ial.required_nodes:
10526       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10527                                  " of nodes (%s), required %s" %
10528                                  (iallocator_name,
10529                                   len(ial.result), ial.required_nodes),
10530                                  errors.ECODE_FAULT)
10531
10532     remote_node_name = ial.result[0]
10533
10534     lu.LogInfo("Selected new secondary for instance '%s': %s",
10535                instance_name, remote_node_name)
10536
10537     return remote_node_name
10538
10539   def _FindFaultyDisks(self, node_name):
10540     """Wrapper for L{_FindFaultyInstanceDisks}.
10541
10542     """
10543     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10544                                     node_name, True)
10545
10546   def _CheckDisksActivated(self, instance):
10547     """Checks if the instance disks are activated.
10548
10549     @param instance: The instance to check disks
10550     @return: True if they are activated, False otherwise
10551
10552     """
10553     nodes = instance.all_nodes
10554
10555     for idx, dev in enumerate(instance.disks):
10556       for node in nodes:
10557         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10558         self.cfg.SetDiskID(dev, node)
10559
10560         result = _BlockdevFind(self, node, dev, instance)
10561
10562         if result.offline:
10563           continue
10564         elif result.fail_msg or not result.payload:
10565           return False
10566
10567     return True
10568
10569   def CheckPrereq(self):
10570     """Check prerequisites.
10571
10572     This checks that the instance is in the cluster.
10573
10574     """
10575     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10576     assert instance is not None, \
10577       "Cannot retrieve locked instance %s" % self.instance_name
10578
10579     if instance.disk_template != constants.DT_DRBD8:
10580       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10581                                  " instances", errors.ECODE_INVAL)
10582
10583     if len(instance.secondary_nodes) != 1:
10584       raise errors.OpPrereqError("The instance has a strange layout,"
10585                                  " expected one secondary but found %d" %
10586                                  len(instance.secondary_nodes),
10587                                  errors.ECODE_FAULT)
10588
10589     if not self.delay_iallocator:
10590       self._CheckPrereq2()
10591
10592   def _CheckPrereq2(self):
10593     """Check prerequisites, second part.
10594
10595     This function should always be part of CheckPrereq. It was separated and is
10596     now called from Exec because during node evacuation iallocator was only
10597     called with an unmodified cluster model, not taking planned changes into
10598     account.
10599
10600     """
10601     instance = self.instance
10602     secondary_node = instance.secondary_nodes[0]
10603
10604     if self.iallocator_name is None:
10605       remote_node = self.remote_node
10606     else:
10607       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10608                                        instance.name, instance.secondary_nodes)
10609
10610     if remote_node is None:
10611       self.remote_node_info = None
10612     else:
10613       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10614              "Remote node '%s' is not locked" % remote_node
10615
10616       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10617       assert self.remote_node_info is not None, \
10618         "Cannot retrieve locked node %s" % remote_node
10619
10620     if remote_node == self.instance.primary_node:
10621       raise errors.OpPrereqError("The specified node is the primary node of"
10622                                  " the instance", errors.ECODE_INVAL)
10623
10624     if remote_node == secondary_node:
10625       raise errors.OpPrereqError("The specified node is already the"
10626                                  " secondary node of the instance",
10627                                  errors.ECODE_INVAL)
10628
10629     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10630                                     constants.REPLACE_DISK_CHG):
10631       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10632                                  errors.ECODE_INVAL)
10633
10634     if self.mode == constants.REPLACE_DISK_AUTO:
10635       if not self._CheckDisksActivated(instance):
10636         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10637                                    " first" % self.instance_name,
10638                                    errors.ECODE_STATE)
10639       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10640       faulty_secondary = self._FindFaultyDisks(secondary_node)
10641
10642       if faulty_primary and faulty_secondary:
10643         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10644                                    " one node and can not be repaired"
10645                                    " automatically" % self.instance_name,
10646                                    errors.ECODE_STATE)
10647
10648       if faulty_primary:
10649         self.disks = faulty_primary
10650         self.target_node = instance.primary_node
10651         self.other_node = secondary_node
10652         check_nodes = [self.target_node, self.other_node]
10653       elif faulty_secondary:
10654         self.disks = faulty_secondary
10655         self.target_node = secondary_node
10656         self.other_node = instance.primary_node
10657         check_nodes = [self.target_node, self.other_node]
10658       else:
10659         self.disks = []
10660         check_nodes = []
10661
10662     else:
10663       # Non-automatic modes
10664       if self.mode == constants.REPLACE_DISK_PRI:
10665         self.target_node = instance.primary_node
10666         self.other_node = secondary_node
10667         check_nodes = [self.target_node, self.other_node]
10668
10669       elif self.mode == constants.REPLACE_DISK_SEC:
10670         self.target_node = secondary_node
10671         self.other_node = instance.primary_node
10672         check_nodes = [self.target_node, self.other_node]
10673
10674       elif self.mode == constants.REPLACE_DISK_CHG:
10675         self.new_node = remote_node
10676         self.other_node = instance.primary_node
10677         self.target_node = secondary_node
10678         check_nodes = [self.new_node, self.other_node]
10679
10680         _CheckNodeNotDrained(self.lu, remote_node)
10681         _CheckNodeVmCapable(self.lu, remote_node)
10682
10683         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10684         assert old_node_info is not None
10685         if old_node_info.offline and not self.early_release:
10686           # doesn't make sense to delay the release
10687           self.early_release = True
10688           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10689                           " early-release mode", secondary_node)
10690
10691       else:
10692         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10693                                      self.mode)
10694
10695       # If not specified all disks should be replaced
10696       if not self.disks:
10697         self.disks = range(len(self.instance.disks))
10698
10699     # TODO: This is ugly, but right now we can't distinguish between internal
10700     # submitted opcode and external one. We should fix that.
10701     if self.remote_node_info:
10702       # We change the node, lets verify it still meets instance policy
10703       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10704       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10705                                        new_group_info)
10706       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10707                               ignore=self.ignore_ipolicy)
10708
10709     for node in check_nodes:
10710       _CheckNodeOnline(self.lu, node)
10711
10712     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10713                                                           self.other_node,
10714                                                           self.target_node]
10715                               if node_name is not None)
10716
10717     # Release unneeded node and node resource locks
10718     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10719     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10720
10721     # Release any owned node group
10722     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10723       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10724
10725     # Check whether disks are valid
10726     for disk_idx in self.disks:
10727       instance.FindDisk(disk_idx)
10728
10729     # Get secondary node IP addresses
10730     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10731                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10732
10733   def Exec(self, feedback_fn):
10734     """Execute disk replacement.
10735
10736     This dispatches the disk replacement to the appropriate handler.
10737
10738     """
10739     if self.delay_iallocator:
10740       self._CheckPrereq2()
10741
10742     if __debug__:
10743       # Verify owned locks before starting operation
10744       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10745       assert set(owned_nodes) == set(self.node_secondary_ip), \
10746           ("Incorrect node locks, owning %s, expected %s" %
10747            (owned_nodes, self.node_secondary_ip.keys()))
10748       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10749               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10750
10751       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10752       assert list(owned_instances) == [self.instance_name], \
10753           "Instance '%s' not locked" % self.instance_name
10754
10755       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10756           "Should not own any node group lock at this point"
10757
10758     if not self.disks:
10759       feedback_fn("No disks need replacement")
10760       return
10761
10762     feedback_fn("Replacing disk(s) %s for %s" %
10763                 (utils.CommaJoin(self.disks), self.instance.name))
10764
10765     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10766
10767     # Activate the instance disks if we're replacing them on a down instance
10768     if activate_disks:
10769       _StartInstanceDisks(self.lu, self.instance, True)
10770
10771     try:
10772       # Should we replace the secondary node?
10773       if self.new_node is not None:
10774         fn = self._ExecDrbd8Secondary
10775       else:
10776         fn = self._ExecDrbd8DiskOnly
10777
10778       result = fn(feedback_fn)
10779     finally:
10780       # Deactivate the instance disks if we're replacing them on a
10781       # down instance
10782       if activate_disks:
10783         _SafeShutdownInstanceDisks(self.lu, self.instance)
10784
10785     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10786
10787     if __debug__:
10788       # Verify owned locks
10789       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10790       nodes = frozenset(self.node_secondary_ip)
10791       assert ((self.early_release and not owned_nodes) or
10792               (not self.early_release and not (set(owned_nodes) - nodes))), \
10793         ("Not owning the correct locks, early_release=%s, owned=%r,"
10794          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10795
10796     return result
10797
10798   def _CheckVolumeGroup(self, nodes):
10799     self.lu.LogInfo("Checking volume groups")
10800
10801     vgname = self.cfg.GetVGName()
10802
10803     # Make sure volume group exists on all involved nodes
10804     results = self.rpc.call_vg_list(nodes)
10805     if not results:
10806       raise errors.OpExecError("Can't list volume groups on the nodes")
10807
10808     for node in nodes:
10809       res = results[node]
10810       res.Raise("Error checking node %s" % node)
10811       if vgname not in res.payload:
10812         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10813                                  (vgname, node))
10814
10815   def _CheckDisksExistence(self, nodes):
10816     # Check disk existence
10817     for idx, dev in enumerate(self.instance.disks):
10818       if idx not in self.disks:
10819         continue
10820
10821       for node in nodes:
10822         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10823         self.cfg.SetDiskID(dev, node)
10824
10825         result = _BlockdevFind(self, node, dev, self.instance)
10826
10827         msg = result.fail_msg
10828         if msg or not result.payload:
10829           if not msg:
10830             msg = "disk not found"
10831           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10832                                    (idx, node, msg))
10833
10834   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10835     for idx, dev in enumerate(self.instance.disks):
10836       if idx not in self.disks:
10837         continue
10838
10839       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10840                       (idx, node_name))
10841
10842       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10843                                    on_primary, ldisk=ldisk):
10844         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10845                                  " replace disks for instance %s" %
10846                                  (node_name, self.instance.name))
10847
10848   def _CreateNewStorage(self, node_name):
10849     """Create new storage on the primary or secondary node.
10850
10851     This is only used for same-node replaces, not for changing the
10852     secondary node, hence we don't want to modify the existing disk.
10853
10854     """
10855     iv_names = {}
10856
10857     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10858     for idx, dev in enumerate(disks):
10859       if idx not in self.disks:
10860         continue
10861
10862       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10863
10864       self.cfg.SetDiskID(dev, node_name)
10865
10866       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10867       names = _GenerateUniqueNames(self.lu, lv_names)
10868
10869       (data_disk, meta_disk) = dev.children
10870       vg_data = data_disk.logical_id[0]
10871       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10872                              logical_id=(vg_data, names[0]),
10873                              params=data_disk.params)
10874       vg_meta = meta_disk.logical_id[0]
10875       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10876                              logical_id=(vg_meta, names[1]),
10877                              params=meta_disk.params)
10878
10879       new_lvs = [lv_data, lv_meta]
10880       old_lvs = [child.Copy() for child in dev.children]
10881       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10882
10883       # we pass force_create=True to force the LVM creation
10884       for new_lv in new_lvs:
10885         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10886                              _GetInstanceInfoText(self.instance), False)
10887
10888     return iv_names
10889
10890   def _CheckDevices(self, node_name, iv_names):
10891     for name, (dev, _, _) in iv_names.iteritems():
10892       self.cfg.SetDiskID(dev, node_name)
10893
10894       result = _BlockdevFind(self, node_name, dev, self.instance)
10895
10896       msg = result.fail_msg
10897       if msg or not result.payload:
10898         if not msg:
10899           msg = "disk not found"
10900         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10901                                  (name, msg))
10902
10903       if result.payload.is_degraded:
10904         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10905
10906   def _RemoveOldStorage(self, node_name, iv_names):
10907     for name, (_, old_lvs, _) in iv_names.iteritems():
10908       self.lu.LogInfo("Remove logical volumes for %s" % name)
10909
10910       for lv in old_lvs:
10911         self.cfg.SetDiskID(lv, node_name)
10912
10913         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10914         if msg:
10915           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10916                              hint="remove unused LVs manually")
10917
10918   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10919     """Replace a disk on the primary or secondary for DRBD 8.
10920
10921     The algorithm for replace is quite complicated:
10922
10923       1. for each disk to be replaced:
10924
10925         1. create new LVs on the target node with unique names
10926         1. detach old LVs from the drbd device
10927         1. rename old LVs to name_replaced.<time_t>
10928         1. rename new LVs to old LVs
10929         1. attach the new LVs (with the old names now) to the drbd device
10930
10931       1. wait for sync across all devices
10932
10933       1. for each modified disk:
10934
10935         1. remove old LVs (which have the name name_replaces.<time_t>)
10936
10937     Failures are not very well handled.
10938
10939     """
10940     steps_total = 6
10941
10942     # Step: check device activation
10943     self.lu.LogStep(1, steps_total, "Check device existence")
10944     self._CheckDisksExistence([self.other_node, self.target_node])
10945     self._CheckVolumeGroup([self.target_node, self.other_node])
10946
10947     # Step: check other node consistency
10948     self.lu.LogStep(2, steps_total, "Check peer consistency")
10949     self._CheckDisksConsistency(self.other_node,
10950                                 self.other_node == self.instance.primary_node,
10951                                 False)
10952
10953     # Step: create new storage
10954     self.lu.LogStep(3, steps_total, "Allocate new storage")
10955     iv_names = self._CreateNewStorage(self.target_node)
10956
10957     # Step: for each lv, detach+rename*2+attach
10958     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10959     for dev, old_lvs, new_lvs in iv_names.itervalues():
10960       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10961
10962       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10963                                                      old_lvs)
10964       result.Raise("Can't detach drbd from local storage on node"
10965                    " %s for device %s" % (self.target_node, dev.iv_name))
10966       #dev.children = []
10967       #cfg.Update(instance)
10968
10969       # ok, we created the new LVs, so now we know we have the needed
10970       # storage; as such, we proceed on the target node to rename
10971       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10972       # using the assumption that logical_id == physical_id (which in
10973       # turn is the unique_id on that node)
10974
10975       # FIXME(iustin): use a better name for the replaced LVs
10976       temp_suffix = int(time.time())
10977       ren_fn = lambda d, suff: (d.physical_id[0],
10978                                 d.physical_id[1] + "_replaced-%s" % suff)
10979
10980       # Build the rename list based on what LVs exist on the node
10981       rename_old_to_new = []
10982       for to_ren in old_lvs:
10983         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10984         if not result.fail_msg and result.payload:
10985           # device exists
10986           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10987
10988       self.lu.LogInfo("Renaming the old LVs on the target node")
10989       result = self.rpc.call_blockdev_rename(self.target_node,
10990                                              rename_old_to_new)
10991       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10992
10993       # Now we rename the new LVs to the old LVs
10994       self.lu.LogInfo("Renaming the new LVs on the target node")
10995       rename_new_to_old = [(new, old.physical_id)
10996                            for old, new in zip(old_lvs, new_lvs)]
10997       result = self.rpc.call_blockdev_rename(self.target_node,
10998                                              rename_new_to_old)
10999       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11000
11001       # Intermediate steps of in memory modifications
11002       for old, new in zip(old_lvs, new_lvs):
11003         new.logical_id = old.logical_id
11004         self.cfg.SetDiskID(new, self.target_node)
11005
11006       # We need to modify old_lvs so that removal later removes the
11007       # right LVs, not the newly added ones; note that old_lvs is a
11008       # copy here
11009       for disk in old_lvs:
11010         disk.logical_id = ren_fn(disk, temp_suffix)
11011         self.cfg.SetDiskID(disk, self.target_node)
11012
11013       # Now that the new lvs have the old name, we can add them to the device
11014       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11015       result = self.rpc.call_blockdev_addchildren(self.target_node,
11016                                                   (dev, self.instance), new_lvs)
11017       msg = result.fail_msg
11018       if msg:
11019         for new_lv in new_lvs:
11020           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11021                                                new_lv).fail_msg
11022           if msg2:
11023             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11024                                hint=("cleanup manually the unused logical"
11025                                      "volumes"))
11026         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11027
11028     cstep = itertools.count(5)
11029
11030     if self.early_release:
11031       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11032       self._RemoveOldStorage(self.target_node, iv_names)
11033       # TODO: Check if releasing locks early still makes sense
11034       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11035     else:
11036       # Release all resource locks except those used by the instance
11037       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11038                     keep=self.node_secondary_ip.keys())
11039
11040     # Release all node locks while waiting for sync
11041     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11042
11043     # TODO: Can the instance lock be downgraded here? Take the optional disk
11044     # shutdown in the caller into consideration.
11045
11046     # Wait for sync
11047     # This can fail as the old devices are degraded and _WaitForSync
11048     # does a combined result over all disks, so we don't check its return value
11049     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11050     _WaitForSync(self.lu, self.instance)
11051
11052     # Check all devices manually
11053     self._CheckDevices(self.instance.primary_node, iv_names)
11054
11055     # Step: remove old storage
11056     if not self.early_release:
11057       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11058       self._RemoveOldStorage(self.target_node, iv_names)
11059
11060   def _ExecDrbd8Secondary(self, feedback_fn):
11061     """Replace the secondary node for DRBD 8.
11062
11063     The algorithm for replace is quite complicated:
11064       - for all disks of the instance:
11065         - create new LVs on the new node with same names
11066         - shutdown the drbd device on the old secondary
11067         - disconnect the drbd network on the primary
11068         - create the drbd device on the new secondary
11069         - network attach the drbd on the primary, using an artifice:
11070           the drbd code for Attach() will connect to the network if it
11071           finds a device which is connected to the good local disks but
11072           not network enabled
11073       - wait for sync across all devices
11074       - remove all disks from the old secondary
11075
11076     Failures are not very well handled.
11077
11078     """
11079     steps_total = 6
11080
11081     pnode = self.instance.primary_node
11082
11083     # Step: check device activation
11084     self.lu.LogStep(1, steps_total, "Check device existence")
11085     self._CheckDisksExistence([self.instance.primary_node])
11086     self._CheckVolumeGroup([self.instance.primary_node])
11087
11088     # Step: check other node consistency
11089     self.lu.LogStep(2, steps_total, "Check peer consistency")
11090     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11091
11092     # Step: create new storage
11093     self.lu.LogStep(3, steps_total, "Allocate new storage")
11094     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11095     for idx, dev in enumerate(disks):
11096       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11097                       (self.new_node, idx))
11098       # we pass force_create=True to force LVM creation
11099       for new_lv in dev.children:
11100         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11101                              True, _GetInstanceInfoText(self.instance), False)
11102
11103     # Step 4: dbrd minors and drbd setups changes
11104     # after this, we must manually remove the drbd minors on both the
11105     # error and the success paths
11106     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11107     minors = self.cfg.AllocateDRBDMinor([self.new_node
11108                                          for dev in self.instance.disks],
11109                                         self.instance.name)
11110     logging.debug("Allocated minors %r", minors)
11111
11112     iv_names = {}
11113     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11114       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11115                       (self.new_node, idx))
11116       # create new devices on new_node; note that we create two IDs:
11117       # one without port, so the drbd will be activated without
11118       # networking information on the new node at this stage, and one
11119       # with network, for the latter activation in step 4
11120       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11121       if self.instance.primary_node == o_node1:
11122         p_minor = o_minor1
11123       else:
11124         assert self.instance.primary_node == o_node2, "Three-node instance?"
11125         p_minor = o_minor2
11126
11127       new_alone_id = (self.instance.primary_node, self.new_node, None,
11128                       p_minor, new_minor, o_secret)
11129       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11130                     p_minor, new_minor, o_secret)
11131
11132       iv_names[idx] = (dev, dev.children, new_net_id)
11133       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11134                     new_net_id)
11135       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11136                               logical_id=new_alone_id,
11137                               children=dev.children,
11138                               size=dev.size,
11139                               params={})
11140       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11141                                              self.cfg)
11142       try:
11143         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11144                               anno_new_drbd,
11145                               _GetInstanceInfoText(self.instance), False)
11146       except errors.GenericError:
11147         self.cfg.ReleaseDRBDMinors(self.instance.name)
11148         raise
11149
11150     # We have new devices, shutdown the drbd on the old secondary
11151     for idx, dev in enumerate(self.instance.disks):
11152       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11153       self.cfg.SetDiskID(dev, self.target_node)
11154       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11155       if msg:
11156         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11157                            "node: %s" % (idx, msg),
11158                            hint=("Please cleanup this device manually as"
11159                                  " soon as possible"))
11160
11161     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11162     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11163                                                self.instance.disks)[pnode]
11164
11165     msg = result.fail_msg
11166     if msg:
11167       # detaches didn't succeed (unlikely)
11168       self.cfg.ReleaseDRBDMinors(self.instance.name)
11169       raise errors.OpExecError("Can't detach the disks from the network on"
11170                                " old node: %s" % (msg,))
11171
11172     # if we managed to detach at least one, we update all the disks of
11173     # the instance to point to the new secondary
11174     self.lu.LogInfo("Updating instance configuration")
11175     for dev, _, new_logical_id in iv_names.itervalues():
11176       dev.logical_id = new_logical_id
11177       self.cfg.SetDiskID(dev, self.instance.primary_node)
11178
11179     self.cfg.Update(self.instance, feedback_fn)
11180
11181     # Release all node locks (the configuration has been updated)
11182     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11183
11184     # and now perform the drbd attach
11185     self.lu.LogInfo("Attaching primary drbds to new secondary"
11186                     " (standalone => connected)")
11187     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11188                                             self.new_node],
11189                                            self.node_secondary_ip,
11190                                            (self.instance.disks, self.instance),
11191                                            self.instance.name,
11192                                            False)
11193     for to_node, to_result in result.items():
11194       msg = to_result.fail_msg
11195       if msg:
11196         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11197                            to_node, msg,
11198                            hint=("please do a gnt-instance info to see the"
11199                                  " status of disks"))
11200
11201     cstep = itertools.count(5)
11202
11203     if self.early_release:
11204       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11205       self._RemoveOldStorage(self.target_node, iv_names)
11206       # TODO: Check if releasing locks early still makes sense
11207       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11208     else:
11209       # Release all resource locks except those used by the instance
11210       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11211                     keep=self.node_secondary_ip.keys())
11212
11213     # TODO: Can the instance lock be downgraded here? Take the optional disk
11214     # shutdown in the caller into consideration.
11215
11216     # Wait for sync
11217     # This can fail as the old devices are degraded and _WaitForSync
11218     # does a combined result over all disks, so we don't check its return value
11219     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11220     _WaitForSync(self.lu, self.instance)
11221
11222     # Check all devices manually
11223     self._CheckDevices(self.instance.primary_node, iv_names)
11224
11225     # Step: remove old storage
11226     if not self.early_release:
11227       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11228       self._RemoveOldStorage(self.target_node, iv_names)
11229
11230
11231 class LURepairNodeStorage(NoHooksLU):
11232   """Repairs the volume group on a node.
11233
11234   """
11235   REQ_BGL = False
11236
11237   def CheckArguments(self):
11238     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11239
11240     storage_type = self.op.storage_type
11241
11242     if (constants.SO_FIX_CONSISTENCY not in
11243         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11244       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11245                                  " repaired" % storage_type,
11246                                  errors.ECODE_INVAL)
11247
11248   def ExpandNames(self):
11249     self.needed_locks = {
11250       locking.LEVEL_NODE: [self.op.node_name],
11251       }
11252
11253   def _CheckFaultyDisks(self, instance, node_name):
11254     """Ensure faulty disks abort the opcode or at least warn."""
11255     try:
11256       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11257                                   node_name, True):
11258         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11259                                    " node '%s'" % (instance.name, node_name),
11260                                    errors.ECODE_STATE)
11261     except errors.OpPrereqError, err:
11262       if self.op.ignore_consistency:
11263         self.proc.LogWarning(str(err.args[0]))
11264       else:
11265         raise
11266
11267   def CheckPrereq(self):
11268     """Check prerequisites.
11269
11270     """
11271     # Check whether any instance on this node has faulty disks
11272     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11273       if inst.admin_state != constants.ADMINST_UP:
11274         continue
11275       check_nodes = set(inst.all_nodes)
11276       check_nodes.discard(self.op.node_name)
11277       for inst_node_name in check_nodes:
11278         self._CheckFaultyDisks(inst, inst_node_name)
11279
11280   def Exec(self, feedback_fn):
11281     feedback_fn("Repairing storage unit '%s' on %s ..." %
11282                 (self.op.name, self.op.node_name))
11283
11284     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11285     result = self.rpc.call_storage_execute(self.op.node_name,
11286                                            self.op.storage_type, st_args,
11287                                            self.op.name,
11288                                            constants.SO_FIX_CONSISTENCY)
11289     result.Raise("Failed to repair storage unit '%s' on %s" %
11290                  (self.op.name, self.op.node_name))
11291
11292
11293 class LUNodeEvacuate(NoHooksLU):
11294   """Evacuates instances off a list of nodes.
11295
11296   """
11297   REQ_BGL = False
11298
11299   _MODE2IALLOCATOR = {
11300     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11301     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11302     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11303     }
11304   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11305   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11306           constants.IALLOCATOR_NEVAC_MODES)
11307
11308   def CheckArguments(self):
11309     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11310
11311   def ExpandNames(self):
11312     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11313
11314     if self.op.remote_node is not None:
11315       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11316       assert self.op.remote_node
11317
11318       if self.op.remote_node == self.op.node_name:
11319         raise errors.OpPrereqError("Can not use evacuated node as a new"
11320                                    " secondary node", errors.ECODE_INVAL)
11321
11322       if self.op.mode != constants.NODE_EVAC_SEC:
11323         raise errors.OpPrereqError("Without the use of an iallocator only"
11324                                    " secondary instances can be evacuated",
11325                                    errors.ECODE_INVAL)
11326
11327     # Declare locks
11328     self.share_locks = _ShareAll()
11329     self.needed_locks = {
11330       locking.LEVEL_INSTANCE: [],
11331       locking.LEVEL_NODEGROUP: [],
11332       locking.LEVEL_NODE: [],
11333       }
11334
11335     # Determine nodes (via group) optimistically, needs verification once locks
11336     # have been acquired
11337     self.lock_nodes = self._DetermineNodes()
11338
11339   def _DetermineNodes(self):
11340     """Gets the list of nodes to operate on.
11341
11342     """
11343     if self.op.remote_node is None:
11344       # Iallocator will choose any node(s) in the same group
11345       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11346     else:
11347       group_nodes = frozenset([self.op.remote_node])
11348
11349     # Determine nodes to be locked
11350     return set([self.op.node_name]) | group_nodes
11351
11352   def _DetermineInstances(self):
11353     """Builds list of instances to operate on.
11354
11355     """
11356     assert self.op.mode in constants.NODE_EVAC_MODES
11357
11358     if self.op.mode == constants.NODE_EVAC_PRI:
11359       # Primary instances only
11360       inst_fn = _GetNodePrimaryInstances
11361       assert self.op.remote_node is None, \
11362         "Evacuating primary instances requires iallocator"
11363     elif self.op.mode == constants.NODE_EVAC_SEC:
11364       # Secondary instances only
11365       inst_fn = _GetNodeSecondaryInstances
11366     else:
11367       # All instances
11368       assert self.op.mode == constants.NODE_EVAC_ALL
11369       inst_fn = _GetNodeInstances
11370       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11371       # per instance
11372       raise errors.OpPrereqError("Due to an issue with the iallocator"
11373                                  " interface it is not possible to evacuate"
11374                                  " all instances at once; specify explicitly"
11375                                  " whether to evacuate primary or secondary"
11376                                  " instances",
11377                                  errors.ECODE_INVAL)
11378
11379     return inst_fn(self.cfg, self.op.node_name)
11380
11381   def DeclareLocks(self, level):
11382     if level == locking.LEVEL_INSTANCE:
11383       # Lock instances optimistically, needs verification once node and group
11384       # locks have been acquired
11385       self.needed_locks[locking.LEVEL_INSTANCE] = \
11386         set(i.name for i in self._DetermineInstances())
11387
11388     elif level == locking.LEVEL_NODEGROUP:
11389       # Lock node groups for all potential target nodes optimistically, needs
11390       # verification once nodes have been acquired
11391       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11392         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11393
11394     elif level == locking.LEVEL_NODE:
11395       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11396
11397   def CheckPrereq(self):
11398     # Verify locks
11399     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11400     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11401     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11402
11403     need_nodes = self._DetermineNodes()
11404
11405     if not owned_nodes.issuperset(need_nodes):
11406       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11407                                  " locks were acquired, current nodes are"
11408                                  " are '%s', used to be '%s'; retry the"
11409                                  " operation" %
11410                                  (self.op.node_name,
11411                                   utils.CommaJoin(need_nodes),
11412                                   utils.CommaJoin(owned_nodes)),
11413                                  errors.ECODE_STATE)
11414
11415     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11416     if owned_groups != wanted_groups:
11417       raise errors.OpExecError("Node groups changed since locks were acquired,"
11418                                " current groups are '%s', used to be '%s';"
11419                                " retry the operation" %
11420                                (utils.CommaJoin(wanted_groups),
11421                                 utils.CommaJoin(owned_groups)))
11422
11423     # Determine affected instances
11424     self.instances = self._DetermineInstances()
11425     self.instance_names = [i.name for i in self.instances]
11426
11427     if set(self.instance_names) != owned_instances:
11428       raise errors.OpExecError("Instances on node '%s' changed since locks"
11429                                " were acquired, current instances are '%s',"
11430                                " used to be '%s'; retry the operation" %
11431                                (self.op.node_name,
11432                                 utils.CommaJoin(self.instance_names),
11433                                 utils.CommaJoin(owned_instances)))
11434
11435     if self.instance_names:
11436       self.LogInfo("Evacuating instances from node '%s': %s",
11437                    self.op.node_name,
11438                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11439     else:
11440       self.LogInfo("No instances to evacuate from node '%s'",
11441                    self.op.node_name)
11442
11443     if self.op.remote_node is not None:
11444       for i in self.instances:
11445         if i.primary_node == self.op.remote_node:
11446           raise errors.OpPrereqError("Node %s is the primary node of"
11447                                      " instance %s, cannot use it as"
11448                                      " secondary" %
11449                                      (self.op.remote_node, i.name),
11450                                      errors.ECODE_INVAL)
11451
11452   def Exec(self, feedback_fn):
11453     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11454
11455     if not self.instance_names:
11456       # No instances to evacuate
11457       jobs = []
11458
11459     elif self.op.iallocator is not None:
11460       # TODO: Implement relocation to other group
11461       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11462                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11463                        instances=list(self.instance_names))
11464
11465       ial.Run(self.op.iallocator)
11466
11467       if not ial.success:
11468         raise errors.OpPrereqError("Can't compute node evacuation using"
11469                                    " iallocator '%s': %s" %
11470                                    (self.op.iallocator, ial.info),
11471                                    errors.ECODE_NORES)
11472
11473       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11474
11475     elif self.op.remote_node is not None:
11476       assert self.op.mode == constants.NODE_EVAC_SEC
11477       jobs = [
11478         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11479                                         remote_node=self.op.remote_node,
11480                                         disks=[],
11481                                         mode=constants.REPLACE_DISK_CHG,
11482                                         early_release=self.op.early_release)]
11483         for instance_name in self.instance_names
11484         ]
11485
11486     else:
11487       raise errors.ProgrammerError("No iallocator or remote node")
11488
11489     return ResultWithJobs(jobs)
11490
11491
11492 def _SetOpEarlyRelease(early_release, op):
11493   """Sets C{early_release} flag on opcodes if available.
11494
11495   """
11496   try:
11497     op.early_release = early_release
11498   except AttributeError:
11499     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11500
11501   return op
11502
11503
11504 def _NodeEvacDest(use_nodes, group, nodes):
11505   """Returns group or nodes depending on caller's choice.
11506
11507   """
11508   if use_nodes:
11509     return utils.CommaJoin(nodes)
11510   else:
11511     return group
11512
11513
11514 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11515   """Unpacks the result of change-group and node-evacuate iallocator requests.
11516
11517   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11518   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11519
11520   @type lu: L{LogicalUnit}
11521   @param lu: Logical unit instance
11522   @type alloc_result: tuple/list
11523   @param alloc_result: Result from iallocator
11524   @type early_release: bool
11525   @param early_release: Whether to release locks early if possible
11526   @type use_nodes: bool
11527   @param use_nodes: Whether to display node names instead of groups
11528
11529   """
11530   (moved, failed, jobs) = alloc_result
11531
11532   if failed:
11533     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11534                                  for (name, reason) in failed)
11535     lu.LogWarning("Unable to evacuate instances %s", failreason)
11536     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11537
11538   if moved:
11539     lu.LogInfo("Instances to be moved: %s",
11540                utils.CommaJoin("%s (to %s)" %
11541                                (name, _NodeEvacDest(use_nodes, group, nodes))
11542                                for (name, group, nodes) in moved))
11543
11544   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11545               map(opcodes.OpCode.LoadOpCode, ops))
11546           for ops in jobs]
11547
11548
11549 class LUInstanceGrowDisk(LogicalUnit):
11550   """Grow a disk of an instance.
11551
11552   """
11553   HPATH = "disk-grow"
11554   HTYPE = constants.HTYPE_INSTANCE
11555   REQ_BGL = False
11556
11557   def ExpandNames(self):
11558     self._ExpandAndLockInstance()
11559     self.needed_locks[locking.LEVEL_NODE] = []
11560     self.needed_locks[locking.LEVEL_NODE_RES] = []
11561     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11562     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11563
11564   def DeclareLocks(self, level):
11565     if level == locking.LEVEL_NODE:
11566       self._LockInstancesNodes()
11567     elif level == locking.LEVEL_NODE_RES:
11568       # Copy node locks
11569       self.needed_locks[locking.LEVEL_NODE_RES] = \
11570         self.needed_locks[locking.LEVEL_NODE][:]
11571
11572   def BuildHooksEnv(self):
11573     """Build hooks env.
11574
11575     This runs on the master, the primary and all the secondaries.
11576
11577     """
11578     env = {
11579       "DISK": self.op.disk,
11580       "AMOUNT": self.op.amount,
11581       "ABSOLUTE": self.op.absolute,
11582       }
11583     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11584     return env
11585
11586   def BuildHooksNodes(self):
11587     """Build hooks nodes.
11588
11589     """
11590     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11591     return (nl, nl)
11592
11593   def CheckPrereq(self):
11594     """Check prerequisites.
11595
11596     This checks that the instance is in the cluster.
11597
11598     """
11599     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11600     assert instance is not None, \
11601       "Cannot retrieve locked instance %s" % self.op.instance_name
11602     nodenames = list(instance.all_nodes)
11603     for node in nodenames:
11604       _CheckNodeOnline(self, node)
11605
11606     self.instance = instance
11607
11608     if instance.disk_template not in constants.DTS_GROWABLE:
11609       raise errors.OpPrereqError("Instance's disk layout does not support"
11610                                  " growing", errors.ECODE_INVAL)
11611
11612     self.disk = instance.FindDisk(self.op.disk)
11613
11614     if self.op.absolute:
11615       self.target = self.op.amount
11616       self.delta = self.target - self.disk.size
11617       if self.delta < 0:
11618         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11619                                    "current disk size (%s)" %
11620                                    (utils.FormatUnit(self.target, "h"),
11621                                     utils.FormatUnit(self.disk.size, "h")),
11622                                    errors.ECODE_STATE)
11623     else:
11624       self.delta = self.op.amount
11625       self.target = self.disk.size + self.delta
11626       if self.delta < 0:
11627         raise errors.OpPrereqError("Requested increment (%s) is negative" %
11628                                    utils.FormatUnit(self.delta, "h"),
11629                                    errors.ECODE_INVAL)
11630
11631     if instance.disk_template not in (constants.DT_FILE,
11632                                       constants.DT_SHARED_FILE,
11633                                       constants.DT_RBD):
11634       # TODO: check the free disk space for file, when that feature will be
11635       # supported
11636       _CheckNodesFreeDiskPerVG(self, nodenames,
11637                                self.disk.ComputeGrowth(self.delta))
11638
11639   def Exec(self, feedback_fn):
11640     """Execute disk grow.
11641
11642     """
11643     instance = self.instance
11644     disk = self.disk
11645
11646     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11647     assert (self.owned_locks(locking.LEVEL_NODE) ==
11648             self.owned_locks(locking.LEVEL_NODE_RES))
11649
11650     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11651     if not disks_ok:
11652       raise errors.OpExecError("Cannot activate block device to grow")
11653
11654     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11655                 (self.op.disk, instance.name,
11656                  utils.FormatUnit(self.delta, "h"),
11657                  utils.FormatUnit(self.target, "h")))
11658
11659     # First run all grow ops in dry-run mode
11660     for node in instance.all_nodes:
11661       self.cfg.SetDiskID(disk, node)
11662       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11663                                            True)
11664       result.Raise("Grow request failed to node %s" % node)
11665
11666     # We know that (as far as we can test) operations across different
11667     # nodes will succeed, time to run it for real
11668     for node in instance.all_nodes:
11669       self.cfg.SetDiskID(disk, node)
11670       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11671                                            False)
11672       result.Raise("Grow request failed to node %s" % node)
11673
11674       # TODO: Rewrite code to work properly
11675       # DRBD goes into sync mode for a short amount of time after executing the
11676       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11677       # calling "resize" in sync mode fails. Sleeping for a short amount of
11678       # time is a work-around.
11679       time.sleep(5)
11680
11681     disk.RecordGrow(self.delta)
11682     self.cfg.Update(instance, feedback_fn)
11683
11684     # Changes have been recorded, release node lock
11685     _ReleaseLocks(self, locking.LEVEL_NODE)
11686
11687     # Downgrade lock while waiting for sync
11688     self.glm.downgrade(locking.LEVEL_INSTANCE)
11689
11690     if self.op.wait_for_sync:
11691       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11692       if disk_abort:
11693         self.proc.LogWarning("Disk sync-ing has not returned a good"
11694                              " status; please check the instance")
11695       if instance.admin_state != constants.ADMINST_UP:
11696         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11697     elif instance.admin_state != constants.ADMINST_UP:
11698       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11699                            " not supposed to be running because no wait for"
11700                            " sync mode was requested")
11701
11702     assert self.owned_locks(locking.LEVEL_NODE_RES)
11703     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11704
11705
11706 class LUInstanceQueryData(NoHooksLU):
11707   """Query runtime instance data.
11708
11709   """
11710   REQ_BGL = False
11711
11712   def ExpandNames(self):
11713     self.needed_locks = {}
11714
11715     # Use locking if requested or when non-static information is wanted
11716     if not (self.op.static or self.op.use_locking):
11717       self.LogWarning("Non-static data requested, locks need to be acquired")
11718       self.op.use_locking = True
11719
11720     if self.op.instances or not self.op.use_locking:
11721       # Expand instance names right here
11722       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11723     else:
11724       # Will use acquired locks
11725       self.wanted_names = None
11726
11727     if self.op.use_locking:
11728       self.share_locks = _ShareAll()
11729
11730       if self.wanted_names is None:
11731         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11732       else:
11733         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11734
11735       self.needed_locks[locking.LEVEL_NODEGROUP] = []
11736       self.needed_locks[locking.LEVEL_NODE] = []
11737       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11738
11739   def DeclareLocks(self, level):
11740     if self.op.use_locking:
11741       if level == locking.LEVEL_NODEGROUP:
11742         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11743
11744         # Lock all groups used by instances optimistically; this requires going
11745         # via the node before it's locked, requiring verification later on
11746         self.needed_locks[locking.LEVEL_NODEGROUP] = \
11747           frozenset(group_uuid
11748                     for instance_name in owned_instances
11749                     for group_uuid in
11750                       self.cfg.GetInstanceNodeGroups(instance_name))
11751
11752       elif level == locking.LEVEL_NODE:
11753         self._LockInstancesNodes()
11754
11755   def CheckPrereq(self):
11756     """Check prerequisites.
11757
11758     This only checks the optional instance list against the existing names.
11759
11760     """
11761     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11762     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11763     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11764
11765     if self.wanted_names is None:
11766       assert self.op.use_locking, "Locking was not used"
11767       self.wanted_names = owned_instances
11768
11769     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11770
11771     if self.op.use_locking:
11772       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11773                                 None)
11774     else:
11775       assert not (owned_instances or owned_groups or owned_nodes)
11776
11777     self.wanted_instances = instances.values()
11778
11779   def _ComputeBlockdevStatus(self, node, instance, dev):
11780     """Returns the status of a block device
11781
11782     """
11783     if self.op.static or not node:
11784       return None
11785
11786     self.cfg.SetDiskID(dev, node)
11787
11788     result = self.rpc.call_blockdev_find(node, dev)
11789     if result.offline:
11790       return None
11791
11792     result.Raise("Can't compute disk status for %s" % instance.name)
11793
11794     status = result.payload
11795     if status is None:
11796       return None
11797
11798     return (status.dev_path, status.major, status.minor,
11799             status.sync_percent, status.estimated_time,
11800             status.is_degraded, status.ldisk_status)
11801
11802   def _ComputeDiskStatus(self, instance, snode, dev):
11803     """Compute block device status.
11804
11805     """
11806     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11807
11808     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11809
11810   def _ComputeDiskStatusInner(self, instance, snode, dev):
11811     """Compute block device status.
11812
11813     @attention: The device has to be annotated already.
11814
11815     """
11816     if dev.dev_type in constants.LDS_DRBD:
11817       # we change the snode then (otherwise we use the one passed in)
11818       if dev.logical_id[0] == instance.primary_node:
11819         snode = dev.logical_id[1]
11820       else:
11821         snode = dev.logical_id[0]
11822
11823     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11824                                               instance, dev)
11825     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11826
11827     if dev.children:
11828       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11829                                         instance, snode),
11830                          dev.children)
11831     else:
11832       dev_children = []
11833
11834     return {
11835       "iv_name": dev.iv_name,
11836       "dev_type": dev.dev_type,
11837       "logical_id": dev.logical_id,
11838       "physical_id": dev.physical_id,
11839       "pstatus": dev_pstatus,
11840       "sstatus": dev_sstatus,
11841       "children": dev_children,
11842       "mode": dev.mode,
11843       "size": dev.size,
11844       }
11845
11846   def Exec(self, feedback_fn):
11847     """Gather and return data"""
11848     result = {}
11849
11850     cluster = self.cfg.GetClusterInfo()
11851
11852     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11853     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11854
11855     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11856                                                  for node in nodes.values()))
11857
11858     group2name_fn = lambda uuid: groups[uuid].name
11859
11860     for instance in self.wanted_instances:
11861       pnode = nodes[instance.primary_node]
11862
11863       if self.op.static or pnode.offline:
11864         remote_state = None
11865         if pnode.offline:
11866           self.LogWarning("Primary node %s is marked offline, returning static"
11867                           " information only for instance %s" %
11868                           (pnode.name, instance.name))
11869       else:
11870         remote_info = self.rpc.call_instance_info(instance.primary_node,
11871                                                   instance.name,
11872                                                   instance.hypervisor)
11873         remote_info.Raise("Error checking node %s" % instance.primary_node)
11874         remote_info = remote_info.payload
11875         if remote_info and "state" in remote_info:
11876           remote_state = "up"
11877         else:
11878           if instance.admin_state == constants.ADMINST_UP:
11879             remote_state = "down"
11880           else:
11881             remote_state = instance.admin_state
11882
11883       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11884                   instance.disks)
11885
11886       snodes_group_uuids = [nodes[snode_name].group
11887                             for snode_name in instance.secondary_nodes]
11888
11889       result[instance.name] = {
11890         "name": instance.name,
11891         "config_state": instance.admin_state,
11892         "run_state": remote_state,
11893         "pnode": instance.primary_node,
11894         "pnode_group_uuid": pnode.group,
11895         "pnode_group_name": group2name_fn(pnode.group),
11896         "snodes": instance.secondary_nodes,
11897         "snodes_group_uuids": snodes_group_uuids,
11898         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11899         "os": instance.os,
11900         # this happens to be the same format used for hooks
11901         "nics": _NICListToTuple(self, instance.nics),
11902         "disk_template": instance.disk_template,
11903         "disks": disks,
11904         "hypervisor": instance.hypervisor,
11905         "network_port": instance.network_port,
11906         "hv_instance": instance.hvparams,
11907         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11908         "be_instance": instance.beparams,
11909         "be_actual": cluster.FillBE(instance),
11910         "os_instance": instance.osparams,
11911         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11912         "serial_no": instance.serial_no,
11913         "mtime": instance.mtime,
11914         "ctime": instance.ctime,
11915         "uuid": instance.uuid,
11916         }
11917
11918     return result
11919
11920
11921 def PrepareContainerMods(mods, private_fn):
11922   """Prepares a list of container modifications by adding a private data field.
11923
11924   @type mods: list of tuples; (operation, index, parameters)
11925   @param mods: List of modifications
11926   @type private_fn: callable or None
11927   @param private_fn: Callable for constructing a private data field for a
11928     modification
11929   @rtype: list
11930
11931   """
11932   if private_fn is None:
11933     fn = lambda: None
11934   else:
11935     fn = private_fn
11936
11937   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11938
11939
11940 #: Type description for changes as returned by L{ApplyContainerMods}'s
11941 #: callbacks
11942 _TApplyContModsCbChanges = \
11943   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11944     ht.TNonEmptyString,
11945     ht.TAny,
11946     ])))
11947
11948
11949 def ApplyContainerMods(kind, container, chgdesc, mods,
11950                        create_fn, modify_fn, remove_fn):
11951   """Applies descriptions in C{mods} to C{container}.
11952
11953   @type kind: string
11954   @param kind: One-word item description
11955   @type container: list
11956   @param container: Container to modify
11957   @type chgdesc: None or list
11958   @param chgdesc: List of applied changes
11959   @type mods: list
11960   @param mods: Modifications as returned by L{PrepareContainerMods}
11961   @type create_fn: callable
11962   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11963     receives absolute item index, parameters and private data object as added
11964     by L{PrepareContainerMods}, returns tuple containing new item and changes
11965     as list
11966   @type modify_fn: callable
11967   @param modify_fn: Callback for modifying an existing item
11968     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11969     and private data object as added by L{PrepareContainerMods}, returns
11970     changes as list
11971   @type remove_fn: callable
11972   @param remove_fn: Callback on removing item; receives absolute item index,
11973     item and private data object as added by L{PrepareContainerMods}
11974
11975   """
11976   for (op, idx, params, private) in mods:
11977     if idx == -1:
11978       # Append
11979       absidx = len(container) - 1
11980     elif idx < 0:
11981       raise IndexError("Not accepting negative indices other than -1")
11982     elif idx > len(container):
11983       raise IndexError("Got %s index %s, but there are only %s" %
11984                        (kind, idx, len(container)))
11985     else:
11986       absidx = idx
11987
11988     changes = None
11989
11990     if op == constants.DDM_ADD:
11991       # Calculate where item will be added
11992       if idx == -1:
11993         addidx = len(container)
11994       else:
11995         addidx = idx
11996
11997       if create_fn is None:
11998         item = params
11999       else:
12000         (item, changes) = create_fn(addidx, params, private)
12001
12002       if idx == -1:
12003         container.append(item)
12004       else:
12005         assert idx >= 0
12006         assert idx <= len(container)
12007         # list.insert does so before the specified index
12008         container.insert(idx, item)
12009     else:
12010       # Retrieve existing item
12011       try:
12012         item = container[absidx]
12013       except IndexError:
12014         raise IndexError("Invalid %s index %s" % (kind, idx))
12015
12016       if op == constants.DDM_REMOVE:
12017         assert not params
12018
12019         if remove_fn is not None:
12020           remove_fn(absidx, item, private)
12021
12022         changes = [("%s/%s" % (kind, absidx), "remove")]
12023
12024         assert container[absidx] == item
12025         del container[absidx]
12026       elif op == constants.DDM_MODIFY:
12027         if modify_fn is not None:
12028           changes = modify_fn(absidx, item, params, private)
12029       else:
12030         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12031
12032     assert _TApplyContModsCbChanges(changes)
12033
12034     if not (chgdesc is None or changes is None):
12035       chgdesc.extend(changes)
12036
12037
12038 def _UpdateIvNames(base_index, disks):
12039   """Updates the C{iv_name} attribute of disks.
12040
12041   @type disks: list of L{objects.Disk}
12042
12043   """
12044   for (idx, disk) in enumerate(disks):
12045     disk.iv_name = "disk/%s" % (base_index + idx, )
12046
12047
12048 class _InstNicModPrivate:
12049   """Data structure for network interface modifications.
12050
12051   Used by L{LUInstanceSetParams}.
12052
12053   """
12054   def __init__(self):
12055     self.params = None
12056     self.filled = None
12057
12058
12059 class LUInstanceSetParams(LogicalUnit):
12060   """Modifies an instances's parameters.
12061
12062   """
12063   HPATH = "instance-modify"
12064   HTYPE = constants.HTYPE_INSTANCE
12065   REQ_BGL = False
12066
12067   @staticmethod
12068   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12069     assert ht.TList(mods)
12070     assert not mods or len(mods[0]) in (2, 3)
12071
12072     if mods and len(mods[0]) == 2:
12073       result = []
12074
12075       addremove = 0
12076       for op, params in mods:
12077         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12078           result.append((op, -1, params))
12079           addremove += 1
12080
12081           if addremove > 1:
12082             raise errors.OpPrereqError("Only one %s add or remove operation is"
12083                                        " supported at a time" % kind,
12084                                        errors.ECODE_INVAL)
12085         else:
12086           result.append((constants.DDM_MODIFY, op, params))
12087
12088       assert verify_fn(result)
12089     else:
12090       result = mods
12091
12092     return result
12093
12094   @staticmethod
12095   def _CheckMods(kind, mods, key_types, item_fn):
12096     """Ensures requested disk/NIC modifications are valid.
12097
12098     """
12099     for (op, _, params) in mods:
12100       assert ht.TDict(params)
12101
12102       utils.ForceDictType(params, key_types)
12103
12104       if op == constants.DDM_REMOVE:
12105         if params:
12106           raise errors.OpPrereqError("No settings should be passed when"
12107                                      " removing a %s" % kind,
12108                                      errors.ECODE_INVAL)
12109       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12110         item_fn(op, params)
12111       else:
12112         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12113
12114   @staticmethod
12115   def _VerifyDiskModification(op, params):
12116     """Verifies a disk modification.
12117
12118     """
12119     if op == constants.DDM_ADD:
12120       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12121       if mode not in constants.DISK_ACCESS_SET:
12122         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12123                                    errors.ECODE_INVAL)
12124
12125       size = params.get(constants.IDISK_SIZE, None)
12126       if size is None:
12127         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12128                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12129
12130       try:
12131         size = int(size)
12132       except (TypeError, ValueError), err:
12133         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12134                                    errors.ECODE_INVAL)
12135
12136       params[constants.IDISK_SIZE] = size
12137
12138     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12139       raise errors.OpPrereqError("Disk size change not possible, use"
12140                                  " grow-disk", errors.ECODE_INVAL)
12141
12142   @staticmethod
12143   def _VerifyNicModification(op, params):
12144     """Verifies a network interface modification.
12145
12146     """
12147     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12148       ip = params.get(constants.INIC_IP, None)
12149       if ip is None:
12150         pass
12151       elif ip.lower() == constants.VALUE_NONE:
12152         params[constants.INIC_IP] = None
12153       elif not netutils.IPAddress.IsValid(ip):
12154         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12155                                    errors.ECODE_INVAL)
12156
12157       bridge = params.get("bridge", None)
12158       link = params.get(constants.INIC_LINK, None)
12159       if bridge and link:
12160         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12161                                    " at the same time", errors.ECODE_INVAL)
12162       elif bridge and bridge.lower() == constants.VALUE_NONE:
12163         params["bridge"] = None
12164       elif link and link.lower() == constants.VALUE_NONE:
12165         params[constants.INIC_LINK] = None
12166
12167       if op == constants.DDM_ADD:
12168         macaddr = params.get(constants.INIC_MAC, None)
12169         if macaddr is None:
12170           params[constants.INIC_MAC] = constants.VALUE_AUTO
12171
12172       if constants.INIC_MAC in params:
12173         macaddr = params[constants.INIC_MAC]
12174         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12175           macaddr = utils.NormalizeAndValidateMac(macaddr)
12176
12177         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12178           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12179                                      " modifying an existing NIC",
12180                                      errors.ECODE_INVAL)
12181
12182   def CheckArguments(self):
12183     if not (self.op.nics or self.op.disks or self.op.disk_template or
12184             self.op.hvparams or self.op.beparams or self.op.os_name or
12185             self.op.offline is not None or self.op.runtime_mem):
12186       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12187
12188     if self.op.hvparams:
12189       _CheckGlobalHvParams(self.op.hvparams)
12190
12191     self.op.disks = \
12192       self._UpgradeDiskNicMods("disk", self.op.disks,
12193         opcodes.OpInstanceSetParams.TestDiskModifications)
12194     self.op.nics = \
12195       self._UpgradeDiskNicMods("NIC", self.op.nics,
12196         opcodes.OpInstanceSetParams.TestNicModifications)
12197
12198     # Check disk modifications
12199     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12200                     self._VerifyDiskModification)
12201
12202     if self.op.disks and self.op.disk_template is not None:
12203       raise errors.OpPrereqError("Disk template conversion and other disk"
12204                                  " changes not supported at the same time",
12205                                  errors.ECODE_INVAL)
12206
12207     if (self.op.disk_template and
12208         self.op.disk_template in constants.DTS_INT_MIRROR and
12209         self.op.remote_node is None):
12210       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12211                                  " one requires specifying a secondary node",
12212                                  errors.ECODE_INVAL)
12213
12214     # Check NIC modifications
12215     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12216                     self._VerifyNicModification)
12217
12218   def ExpandNames(self):
12219     self._ExpandAndLockInstance()
12220     # Can't even acquire node locks in shared mode as upcoming changes in
12221     # Ganeti 2.6 will start to modify the node object on disk conversion
12222     self.needed_locks[locking.LEVEL_NODE] = []
12223     self.needed_locks[locking.LEVEL_NODE_RES] = []
12224     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12225
12226   def DeclareLocks(self, level):
12227     # TODO: Acquire group lock in shared mode (disk parameters)
12228     if level == locking.LEVEL_NODE:
12229       self._LockInstancesNodes()
12230       if self.op.disk_template and self.op.remote_node:
12231         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12232         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12233     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12234       # Copy node locks
12235       self.needed_locks[locking.LEVEL_NODE_RES] = \
12236         self.needed_locks[locking.LEVEL_NODE][:]
12237
12238   def BuildHooksEnv(self):
12239     """Build hooks env.
12240
12241     This runs on the master, primary and secondaries.
12242
12243     """
12244     args = dict()
12245     if constants.BE_MINMEM in self.be_new:
12246       args["minmem"] = self.be_new[constants.BE_MINMEM]
12247     if constants.BE_MAXMEM in self.be_new:
12248       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12249     if constants.BE_VCPUS in self.be_new:
12250       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12251     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12252     # information at all.
12253
12254     if self._new_nics is not None:
12255       nics = []
12256
12257       for nic in self._new_nics:
12258         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12259         mode = nicparams[constants.NIC_MODE]
12260         link = nicparams[constants.NIC_LINK]
12261         nics.append((nic.ip, nic.mac, mode, link))
12262
12263       args["nics"] = nics
12264
12265     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12266     if self.op.disk_template:
12267       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12268     if self.op.runtime_mem:
12269       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12270
12271     return env
12272
12273   def BuildHooksNodes(self):
12274     """Build hooks nodes.
12275
12276     """
12277     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12278     return (nl, nl)
12279
12280   def _PrepareNicModification(self, params, private, old_ip, old_params,
12281                               cluster, pnode):
12282     update_params_dict = dict([(key, params[key])
12283                                for key in constants.NICS_PARAMETERS
12284                                if key in params])
12285
12286     if "bridge" in params:
12287       update_params_dict[constants.NIC_LINK] = params["bridge"]
12288
12289     new_params = _GetUpdatedParams(old_params, update_params_dict)
12290     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12291
12292     new_filled_params = cluster.SimpleFillNIC(new_params)
12293     objects.NIC.CheckParameterSyntax(new_filled_params)
12294
12295     new_mode = new_filled_params[constants.NIC_MODE]
12296     if new_mode == constants.NIC_MODE_BRIDGED:
12297       bridge = new_filled_params[constants.NIC_LINK]
12298       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12299       if msg:
12300         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12301         if self.op.force:
12302           self.warn.append(msg)
12303         else:
12304           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12305
12306     elif new_mode == constants.NIC_MODE_ROUTED:
12307       ip = params.get(constants.INIC_IP, old_ip)
12308       if ip is None:
12309         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12310                                    " on a routed NIC", errors.ECODE_INVAL)
12311
12312     if constants.INIC_MAC in params:
12313       mac = params[constants.INIC_MAC]
12314       if mac is None:
12315         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12316                                    errors.ECODE_INVAL)
12317       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12318         # otherwise generate the MAC address
12319         params[constants.INIC_MAC] = \
12320           self.cfg.GenerateMAC(self.proc.GetECId())
12321       else:
12322         # or validate/reserve the current one
12323         try:
12324           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12325         except errors.ReservationError:
12326           raise errors.OpPrereqError("MAC address '%s' already in use"
12327                                      " in cluster" % mac,
12328                                      errors.ECODE_NOTUNIQUE)
12329
12330     private.params = new_params
12331     private.filled = new_filled_params
12332
12333     return (None, None)
12334
12335   def CheckPrereq(self):
12336     """Check prerequisites.
12337
12338     This only checks the instance list against the existing names.
12339
12340     """
12341     # checking the new params on the primary/secondary nodes
12342
12343     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12344     cluster = self.cluster = self.cfg.GetClusterInfo()
12345     assert self.instance is not None, \
12346       "Cannot retrieve locked instance %s" % self.op.instance_name
12347     pnode = instance.primary_node
12348     nodelist = list(instance.all_nodes)
12349     pnode_info = self.cfg.GetNodeInfo(pnode)
12350     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12351
12352     # Prepare disk/NIC modifications
12353     self.diskmod = PrepareContainerMods(self.op.disks, None)
12354     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12355
12356     # OS change
12357     if self.op.os_name and not self.op.force:
12358       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12359                       self.op.force_variant)
12360       instance_os = self.op.os_name
12361     else:
12362       instance_os = instance.os
12363
12364     assert not (self.op.disk_template and self.op.disks), \
12365       "Can't modify disk template and apply disk changes at the same time"
12366
12367     if self.op.disk_template:
12368       if instance.disk_template == self.op.disk_template:
12369         raise errors.OpPrereqError("Instance already has disk template %s" %
12370                                    instance.disk_template, errors.ECODE_INVAL)
12371
12372       if (instance.disk_template,
12373           self.op.disk_template) not in self._DISK_CONVERSIONS:
12374         raise errors.OpPrereqError("Unsupported disk template conversion from"
12375                                    " %s to %s" % (instance.disk_template,
12376                                                   self.op.disk_template),
12377                                    errors.ECODE_INVAL)
12378       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12379                           msg="cannot change disk template")
12380       if self.op.disk_template in constants.DTS_INT_MIRROR:
12381         if self.op.remote_node == pnode:
12382           raise errors.OpPrereqError("Given new secondary node %s is the same"
12383                                      " as the primary node of the instance" %
12384                                      self.op.remote_node, errors.ECODE_STATE)
12385         _CheckNodeOnline(self, self.op.remote_node)
12386         _CheckNodeNotDrained(self, self.op.remote_node)
12387         # FIXME: here we assume that the old instance type is DT_PLAIN
12388         assert instance.disk_template == constants.DT_PLAIN
12389         disks = [{constants.IDISK_SIZE: d.size,
12390                   constants.IDISK_VG: d.logical_id[0]}
12391                  for d in instance.disks]
12392         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12393         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12394
12395         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12396         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12397         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12398         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12399                                 ignore=self.op.ignore_ipolicy)
12400         if pnode_info.group != snode_info.group:
12401           self.LogWarning("The primary and secondary nodes are in two"
12402                           " different node groups; the disk parameters"
12403                           " from the first disk's node group will be"
12404                           " used")
12405
12406     # hvparams processing
12407     if self.op.hvparams:
12408       hv_type = instance.hypervisor
12409       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12410       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12411       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12412
12413       # local check
12414       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12415       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12416       self.hv_proposed = self.hv_new = hv_new # the new actual values
12417       self.hv_inst = i_hvdict # the new dict (without defaults)
12418     else:
12419       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12420                                               instance.hvparams)
12421       self.hv_new = self.hv_inst = {}
12422
12423     # beparams processing
12424     if self.op.beparams:
12425       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12426                                    use_none=True)
12427       objects.UpgradeBeParams(i_bedict)
12428       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12429       be_new = cluster.SimpleFillBE(i_bedict)
12430       self.be_proposed = self.be_new = be_new # the new actual values
12431       self.be_inst = i_bedict # the new dict (without defaults)
12432     else:
12433       self.be_new = self.be_inst = {}
12434       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12435     be_old = cluster.FillBE(instance)
12436
12437     # CPU param validation -- checking every time a parameter is
12438     # changed to cover all cases where either CPU mask or vcpus have
12439     # changed
12440     if (constants.BE_VCPUS in self.be_proposed and
12441         constants.HV_CPU_MASK in self.hv_proposed):
12442       cpu_list = \
12443         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12444       # Verify mask is consistent with number of vCPUs. Can skip this
12445       # test if only 1 entry in the CPU mask, which means same mask
12446       # is applied to all vCPUs.
12447       if (len(cpu_list) > 1 and
12448           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12449         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12450                                    " CPU mask [%s]" %
12451                                    (self.be_proposed[constants.BE_VCPUS],
12452                                     self.hv_proposed[constants.HV_CPU_MASK]),
12453                                    errors.ECODE_INVAL)
12454
12455       # Only perform this test if a new CPU mask is given
12456       if constants.HV_CPU_MASK in self.hv_new:
12457         # Calculate the largest CPU number requested
12458         max_requested_cpu = max(map(max, cpu_list))
12459         # Check that all of the instance's nodes have enough physical CPUs to
12460         # satisfy the requested CPU mask
12461         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12462                                 max_requested_cpu + 1, instance.hypervisor)
12463
12464     # osparams processing
12465     if self.op.osparams:
12466       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12467       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12468       self.os_inst = i_osdict # the new dict (without defaults)
12469     else:
12470       self.os_inst = {}
12471
12472     self.warn = []
12473
12474     #TODO(dynmem): do the appropriate check involving MINMEM
12475     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12476         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12477       mem_check_list = [pnode]
12478       if be_new[constants.BE_AUTO_BALANCE]:
12479         # either we changed auto_balance to yes or it was from before
12480         mem_check_list.extend(instance.secondary_nodes)
12481       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12482                                                   instance.hypervisor)
12483       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12484                                          [instance.hypervisor])
12485       pninfo = nodeinfo[pnode]
12486       msg = pninfo.fail_msg
12487       if msg:
12488         # Assume the primary node is unreachable and go ahead
12489         self.warn.append("Can't get info from primary node %s: %s" %
12490                          (pnode, msg))
12491       else:
12492         (_, _, (pnhvinfo, )) = pninfo.payload
12493         if not isinstance(pnhvinfo.get("memory_free", None), int):
12494           self.warn.append("Node data from primary node %s doesn't contain"
12495                            " free memory information" % pnode)
12496         elif instance_info.fail_msg:
12497           self.warn.append("Can't get instance runtime information: %s" %
12498                           instance_info.fail_msg)
12499         else:
12500           if instance_info.payload:
12501             current_mem = int(instance_info.payload["memory"])
12502           else:
12503             # Assume instance not running
12504             # (there is a slight race condition here, but it's not very
12505             # probable, and we have no other way to check)
12506             # TODO: Describe race condition
12507             current_mem = 0
12508           #TODO(dynmem): do the appropriate check involving MINMEM
12509           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12510                       pnhvinfo["memory_free"])
12511           if miss_mem > 0:
12512             raise errors.OpPrereqError("This change will prevent the instance"
12513                                        " from starting, due to %d MB of memory"
12514                                        " missing on its primary node" %
12515                                        miss_mem,
12516                                        errors.ECODE_NORES)
12517
12518       if be_new[constants.BE_AUTO_BALANCE]:
12519         for node, nres in nodeinfo.items():
12520           if node not in instance.secondary_nodes:
12521             continue
12522           nres.Raise("Can't get info from secondary node %s" % node,
12523                      prereq=True, ecode=errors.ECODE_STATE)
12524           (_, _, (nhvinfo, )) = nres.payload
12525           if not isinstance(nhvinfo.get("memory_free", None), int):
12526             raise errors.OpPrereqError("Secondary node %s didn't return free"
12527                                        " memory information" % node,
12528                                        errors.ECODE_STATE)
12529           #TODO(dynmem): do the appropriate check involving MINMEM
12530           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12531             raise errors.OpPrereqError("This change will prevent the instance"
12532                                        " from failover to its secondary node"
12533                                        " %s, due to not enough memory" % node,
12534                                        errors.ECODE_STATE)
12535
12536     if self.op.runtime_mem:
12537       remote_info = self.rpc.call_instance_info(instance.primary_node,
12538                                                 instance.name,
12539                                                 instance.hypervisor)
12540       remote_info.Raise("Error checking node %s" % instance.primary_node)
12541       if not remote_info.payload: # not running already
12542         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12543                                    errors.ECODE_STATE)
12544
12545       current_memory = remote_info.payload["memory"]
12546       if (not self.op.force and
12547            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12548             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12549         raise errors.OpPrereqError("Instance %s must have memory between %d"
12550                                    " and %d MB of memory unless --force is"
12551                                    " given" % (instance.name,
12552                                     self.be_proposed[constants.BE_MINMEM],
12553                                     self.be_proposed[constants.BE_MAXMEM]),
12554                                    errors.ECODE_INVAL)
12555
12556       if self.op.runtime_mem > current_memory:
12557         _CheckNodeFreeMemory(self, instance.primary_node,
12558                              "ballooning memory for instance %s" %
12559                              instance.name,
12560                              self.op.memory - current_memory,
12561                              instance.hypervisor)
12562
12563     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12564       raise errors.OpPrereqError("Disk operations not supported for"
12565                                  " diskless instances",
12566                                  errors.ECODE_INVAL)
12567
12568     def _PrepareNicCreate(_, params, private):
12569       return self._PrepareNicModification(params, private, None, {},
12570                                           cluster, pnode)
12571
12572     def _PrepareNicMod(_, nic, params, private):
12573       return self._PrepareNicModification(params, private, nic.ip,
12574                                           nic.nicparams, cluster, pnode)
12575
12576     # Verify NIC changes (operating on copy)
12577     nics = instance.nics[:]
12578     ApplyContainerMods("NIC", nics, None, self.nicmod,
12579                        _PrepareNicCreate, _PrepareNicMod, None)
12580     if len(nics) > constants.MAX_NICS:
12581       raise errors.OpPrereqError("Instance has too many network interfaces"
12582                                  " (%d), cannot add more" % constants.MAX_NICS,
12583                                  errors.ECODE_STATE)
12584
12585     # Verify disk changes (operating on a copy)
12586     disks = instance.disks[:]
12587     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12588     if len(disks) > constants.MAX_DISKS:
12589       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12590                                  " more" % constants.MAX_DISKS,
12591                                  errors.ECODE_STATE)
12592
12593     if self.op.offline is not None:
12594       if self.op.offline:
12595         msg = "can't change to offline"
12596       else:
12597         msg = "can't change to online"
12598       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12599
12600     # Pre-compute NIC changes (necessary to use result in hooks)
12601     self._nic_chgdesc = []
12602     if self.nicmod:
12603       # Operate on copies as this is still in prereq
12604       nics = [nic.Copy() for nic in instance.nics]
12605       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12606                          self._CreateNewNic, self._ApplyNicMods, None)
12607       self._new_nics = nics
12608     else:
12609       self._new_nics = None
12610
12611   def _ConvertPlainToDrbd(self, feedback_fn):
12612     """Converts an instance from plain to drbd.
12613
12614     """
12615     feedback_fn("Converting template to drbd")
12616     instance = self.instance
12617     pnode = instance.primary_node
12618     snode = self.op.remote_node
12619
12620     assert instance.disk_template == constants.DT_PLAIN
12621
12622     # create a fake disk info for _GenerateDiskTemplate
12623     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12624                   constants.IDISK_VG: d.logical_id[0]}
12625                  for d in instance.disks]
12626     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12627                                       instance.name, pnode, [snode],
12628                                       disk_info, None, None, 0, feedback_fn,
12629                                       self.diskparams)
12630     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12631                                         self.diskparams)
12632     info = _GetInstanceInfoText(instance)
12633     feedback_fn("Creating additional volumes...")
12634     # first, create the missing data and meta devices
12635     for disk in anno_disks:
12636       # unfortunately this is... not too nice
12637       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12638                             info, True)
12639       for child in disk.children:
12640         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12641     # at this stage, all new LVs have been created, we can rename the
12642     # old ones
12643     feedback_fn("Renaming original volumes...")
12644     rename_list = [(o, n.children[0].logical_id)
12645                    for (o, n) in zip(instance.disks, new_disks)]
12646     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12647     result.Raise("Failed to rename original LVs")
12648
12649     feedback_fn("Initializing DRBD devices...")
12650     # all child devices are in place, we can now create the DRBD devices
12651     for disk in anno_disks:
12652       for node in [pnode, snode]:
12653         f_create = node == pnode
12654         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12655
12656     # at this point, the instance has been modified
12657     instance.disk_template = constants.DT_DRBD8
12658     instance.disks = new_disks
12659     self.cfg.Update(instance, feedback_fn)
12660
12661     # Release node locks while waiting for sync
12662     _ReleaseLocks(self, locking.LEVEL_NODE)
12663
12664     # disks are created, waiting for sync
12665     disk_abort = not _WaitForSync(self, instance,
12666                                   oneshot=not self.op.wait_for_sync)
12667     if disk_abort:
12668       raise errors.OpExecError("There are some degraded disks for"
12669                                " this instance, please cleanup manually")
12670
12671     # Node resource locks will be released by caller
12672
12673   def _ConvertDrbdToPlain(self, feedback_fn):
12674     """Converts an instance from drbd to plain.
12675
12676     """
12677     instance = self.instance
12678
12679     assert len(instance.secondary_nodes) == 1
12680     assert instance.disk_template == constants.DT_DRBD8
12681
12682     pnode = instance.primary_node
12683     snode = instance.secondary_nodes[0]
12684     feedback_fn("Converting template to plain")
12685
12686     old_disks = instance.disks
12687     new_disks = [d.children[0] for d in old_disks]
12688
12689     # copy over size and mode
12690     for parent, child in zip(old_disks, new_disks):
12691       child.size = parent.size
12692       child.mode = parent.mode
12693
12694     # this is a DRBD disk, return its port to the pool
12695     # NOTE: this must be done right before the call to cfg.Update!
12696     for disk in old_disks:
12697       tcp_port = disk.logical_id[2]
12698       self.cfg.AddTcpUdpPort(tcp_port)
12699
12700     # update instance structure
12701     instance.disks = new_disks
12702     instance.disk_template = constants.DT_PLAIN
12703     self.cfg.Update(instance, feedback_fn)
12704
12705     # Release locks in case removing disks takes a while
12706     _ReleaseLocks(self, locking.LEVEL_NODE)
12707
12708     feedback_fn("Removing volumes on the secondary node...")
12709     for disk in old_disks:
12710       self.cfg.SetDiskID(disk, snode)
12711       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12712       if msg:
12713         self.LogWarning("Could not remove block device %s on node %s,"
12714                         " continuing anyway: %s", disk.iv_name, snode, msg)
12715
12716     feedback_fn("Removing unneeded volumes on the primary node...")
12717     for idx, disk in enumerate(old_disks):
12718       meta = disk.children[1]
12719       self.cfg.SetDiskID(meta, pnode)
12720       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12721       if msg:
12722         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12723                         " continuing anyway: %s", idx, pnode, msg)
12724
12725   def _CreateNewDisk(self, idx, params, _):
12726     """Creates a new disk.
12727
12728     """
12729     instance = self.instance
12730
12731     # add a new disk
12732     if instance.disk_template in constants.DTS_FILEBASED:
12733       (file_driver, file_path) = instance.disks[0].logical_id
12734       file_path = os.path.dirname(file_path)
12735     else:
12736       file_driver = file_path = None
12737
12738     disk = \
12739       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12740                             instance.primary_node, instance.secondary_nodes,
12741                             [params], file_path, file_driver, idx,
12742                             self.Log, self.diskparams)[0]
12743
12744     info = _GetInstanceInfoText(instance)
12745
12746     logging.info("Creating volume %s for instance %s",
12747                  disk.iv_name, instance.name)
12748     # Note: this needs to be kept in sync with _CreateDisks
12749     #HARDCODE
12750     for node in instance.all_nodes:
12751       f_create = (node == instance.primary_node)
12752       try:
12753         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12754       except errors.OpExecError, err:
12755         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12756                         disk.iv_name, disk, node, err)
12757
12758     return (disk, [
12759       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12760       ])
12761
12762   @staticmethod
12763   def _ModifyDisk(idx, disk, params, _):
12764     """Modifies a disk.
12765
12766     """
12767     disk.mode = params[constants.IDISK_MODE]
12768
12769     return [
12770       ("disk.mode/%d" % idx, disk.mode),
12771       ]
12772
12773   def _RemoveDisk(self, idx, root, _):
12774     """Removes a disk.
12775
12776     """
12777     for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12778       self.cfg.SetDiskID(disk, node)
12779       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12780       if msg:
12781         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12782                         " continuing anyway", idx, node, msg)
12783
12784     # if this is a DRBD disk, return its port to the pool
12785     if root.dev_type in constants.LDS_DRBD:
12786       self.cfg.AddTcpUdpPort(root.logical_id[2])
12787
12788   @staticmethod
12789   def _CreateNewNic(idx, params, private):
12790     """Creates data structure for a new network interface.
12791
12792     """
12793     mac = params[constants.INIC_MAC]
12794     ip = params.get(constants.INIC_IP, None)
12795     nicparams = private.params
12796
12797     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12798       ("nic.%d" % idx,
12799        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12800        (mac, ip, private.filled[constants.NIC_MODE],
12801        private.filled[constants.NIC_LINK])),
12802       ])
12803
12804   @staticmethod
12805   def _ApplyNicMods(idx, nic, params, private):
12806     """Modifies a network interface.
12807
12808     """
12809     changes = []
12810
12811     for key in [constants.INIC_MAC, constants.INIC_IP]:
12812       if key in params:
12813         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12814         setattr(nic, key, params[key])
12815
12816     if private.params:
12817       nic.nicparams = private.params
12818
12819       for (key, val) in params.items():
12820         changes.append(("nic.%s/%d" % (key, idx), val))
12821
12822     return changes
12823
12824   def Exec(self, feedback_fn):
12825     """Modifies an instance.
12826
12827     All parameters take effect only at the next restart of the instance.
12828
12829     """
12830     # Process here the warnings from CheckPrereq, as we don't have a
12831     # feedback_fn there.
12832     # TODO: Replace with self.LogWarning
12833     for warn in self.warn:
12834       feedback_fn("WARNING: %s" % warn)
12835
12836     assert ((self.op.disk_template is None) ^
12837             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12838       "Not owning any node resource locks"
12839
12840     result = []
12841     instance = self.instance
12842
12843     # runtime memory
12844     if self.op.runtime_mem:
12845       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12846                                                      instance,
12847                                                      self.op.runtime_mem)
12848       rpcres.Raise("Cannot modify instance runtime memory")
12849       result.append(("runtime_memory", self.op.runtime_mem))
12850
12851     # Apply disk changes
12852     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12853                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12854     _UpdateIvNames(0, instance.disks)
12855
12856     if self.op.disk_template:
12857       if __debug__:
12858         check_nodes = set(instance.all_nodes)
12859         if self.op.remote_node:
12860           check_nodes.add(self.op.remote_node)
12861         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12862           owned = self.owned_locks(level)
12863           assert not (check_nodes - owned), \
12864             ("Not owning the correct locks, owning %r, expected at least %r" %
12865              (owned, check_nodes))
12866
12867       r_shut = _ShutdownInstanceDisks(self, instance)
12868       if not r_shut:
12869         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12870                                  " proceed with disk template conversion")
12871       mode = (instance.disk_template, self.op.disk_template)
12872       try:
12873         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12874       except:
12875         self.cfg.ReleaseDRBDMinors(instance.name)
12876         raise
12877       result.append(("disk_template", self.op.disk_template))
12878
12879       assert instance.disk_template == self.op.disk_template, \
12880         ("Expected disk template '%s', found '%s'" %
12881          (self.op.disk_template, instance.disk_template))
12882
12883     # Release node and resource locks if there are any (they might already have
12884     # been released during disk conversion)
12885     _ReleaseLocks(self, locking.LEVEL_NODE)
12886     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12887
12888     # Apply NIC changes
12889     if self._new_nics is not None:
12890       instance.nics = self._new_nics
12891       result.extend(self._nic_chgdesc)
12892
12893     # hvparams changes
12894     if self.op.hvparams:
12895       instance.hvparams = self.hv_inst
12896       for key, val in self.op.hvparams.iteritems():
12897         result.append(("hv/%s" % key, val))
12898
12899     # beparams changes
12900     if self.op.beparams:
12901       instance.beparams = self.be_inst
12902       for key, val in self.op.beparams.iteritems():
12903         result.append(("be/%s" % key, val))
12904
12905     # OS change
12906     if self.op.os_name:
12907       instance.os = self.op.os_name
12908
12909     # osparams changes
12910     if self.op.osparams:
12911       instance.osparams = self.os_inst
12912       for key, val in self.op.osparams.iteritems():
12913         result.append(("os/%s" % key, val))
12914
12915     if self.op.offline is None:
12916       # Ignore
12917       pass
12918     elif self.op.offline:
12919       # Mark instance as offline
12920       self.cfg.MarkInstanceOffline(instance.name)
12921       result.append(("admin_state", constants.ADMINST_OFFLINE))
12922     else:
12923       # Mark instance as online, but stopped
12924       self.cfg.MarkInstanceDown(instance.name)
12925       result.append(("admin_state", constants.ADMINST_DOWN))
12926
12927     self.cfg.Update(instance, feedback_fn)
12928
12929     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12930                 self.owned_locks(locking.LEVEL_NODE)), \
12931       "All node locks should have been released by now"
12932
12933     return result
12934
12935   _DISK_CONVERSIONS = {
12936     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12937     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12938     }
12939
12940
12941 class LUInstanceChangeGroup(LogicalUnit):
12942   HPATH = "instance-change-group"
12943   HTYPE = constants.HTYPE_INSTANCE
12944   REQ_BGL = False
12945
12946   def ExpandNames(self):
12947     self.share_locks = _ShareAll()
12948     self.needed_locks = {
12949       locking.LEVEL_NODEGROUP: [],
12950       locking.LEVEL_NODE: [],
12951       }
12952
12953     self._ExpandAndLockInstance()
12954
12955     if self.op.target_groups:
12956       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12957                                   self.op.target_groups)
12958     else:
12959       self.req_target_uuids = None
12960
12961     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12962
12963   def DeclareLocks(self, level):
12964     if level == locking.LEVEL_NODEGROUP:
12965       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12966
12967       if self.req_target_uuids:
12968         lock_groups = set(self.req_target_uuids)
12969
12970         # Lock all groups used by instance optimistically; this requires going
12971         # via the node before it's locked, requiring verification later on
12972         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12973         lock_groups.update(instance_groups)
12974       else:
12975         # No target groups, need to lock all of them
12976         lock_groups = locking.ALL_SET
12977
12978       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12979
12980     elif level == locking.LEVEL_NODE:
12981       if self.req_target_uuids:
12982         # Lock all nodes used by instances
12983         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12984         self._LockInstancesNodes()
12985
12986         # Lock all nodes in all potential target groups
12987         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12988                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12989         member_nodes = [node_name
12990                         for group in lock_groups
12991                         for node_name in self.cfg.GetNodeGroup(group).members]
12992         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12993       else:
12994         # Lock all nodes as all groups are potential targets
12995         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12996
12997   def CheckPrereq(self):
12998     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12999     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13000     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13001
13002     assert (self.req_target_uuids is None or
13003             owned_groups.issuperset(self.req_target_uuids))
13004     assert owned_instances == set([self.op.instance_name])
13005
13006     # Get instance information
13007     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13008
13009     # Check if node groups for locked instance are still correct
13010     assert owned_nodes.issuperset(self.instance.all_nodes), \
13011       ("Instance %s's nodes changed while we kept the lock" %
13012        self.op.instance_name)
13013
13014     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13015                                            owned_groups)
13016
13017     if self.req_target_uuids:
13018       # User requested specific target groups
13019       self.target_uuids = frozenset(self.req_target_uuids)
13020     else:
13021       # All groups except those used by the instance are potential targets
13022       self.target_uuids = owned_groups - inst_groups
13023
13024     conflicting_groups = self.target_uuids & inst_groups
13025     if conflicting_groups:
13026       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13027                                  " used by the instance '%s'" %
13028                                  (utils.CommaJoin(conflicting_groups),
13029                                   self.op.instance_name),
13030                                  errors.ECODE_INVAL)
13031
13032     if not self.target_uuids:
13033       raise errors.OpPrereqError("There are no possible target groups",
13034                                  errors.ECODE_INVAL)
13035
13036   def BuildHooksEnv(self):
13037     """Build hooks env.
13038
13039     """
13040     assert self.target_uuids
13041
13042     env = {
13043       "TARGET_GROUPS": " ".join(self.target_uuids),
13044       }
13045
13046     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13047
13048     return env
13049
13050   def BuildHooksNodes(self):
13051     """Build hooks nodes.
13052
13053     """
13054     mn = self.cfg.GetMasterNode()
13055     return ([mn], [mn])
13056
13057   def Exec(self, feedback_fn):
13058     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13059
13060     assert instances == [self.op.instance_name], "Instance not locked"
13061
13062     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13063                      instances=instances, target_groups=list(self.target_uuids))
13064
13065     ial.Run(self.op.iallocator)
13066
13067     if not ial.success:
13068       raise errors.OpPrereqError("Can't compute solution for changing group of"
13069                                  " instance '%s' using iallocator '%s': %s" %
13070                                  (self.op.instance_name, self.op.iallocator,
13071                                   ial.info),
13072                                  errors.ECODE_NORES)
13073
13074     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13075
13076     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13077                  " instance '%s'", len(jobs), self.op.instance_name)
13078
13079     return ResultWithJobs(jobs)
13080
13081
13082 class LUBackupQuery(NoHooksLU):
13083   """Query the exports list
13084
13085   """
13086   REQ_BGL = False
13087
13088   def CheckArguments(self):
13089     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13090                              ["node", "export"], self.op.use_locking)
13091
13092   def ExpandNames(self):
13093     self.expq.ExpandNames(self)
13094
13095   def DeclareLocks(self, level):
13096     self.expq.DeclareLocks(self, level)
13097
13098   def Exec(self, feedback_fn):
13099     result = {}
13100
13101     for (node, expname) in self.expq.OldStyleQuery(self):
13102       if expname is None:
13103         result[node] = False
13104       else:
13105         result.setdefault(node, []).append(expname)
13106
13107     return result
13108
13109
13110 class _ExportQuery(_QueryBase):
13111   FIELDS = query.EXPORT_FIELDS
13112
13113   #: The node name is not a unique key for this query
13114   SORT_FIELD = "node"
13115
13116   def ExpandNames(self, lu):
13117     lu.needed_locks = {}
13118
13119     # The following variables interact with _QueryBase._GetNames
13120     if self.names:
13121       self.wanted = _GetWantedNodes(lu, self.names)
13122     else:
13123       self.wanted = locking.ALL_SET
13124
13125     self.do_locking = self.use_locking
13126
13127     if self.do_locking:
13128       lu.share_locks = _ShareAll()
13129       lu.needed_locks = {
13130         locking.LEVEL_NODE: self.wanted,
13131         }
13132
13133   def DeclareLocks(self, lu, level):
13134     pass
13135
13136   def _GetQueryData(self, lu):
13137     """Computes the list of nodes and their attributes.
13138
13139     """
13140     # Locking is not used
13141     # TODO
13142     assert not (compat.any(lu.glm.is_owned(level)
13143                            for level in locking.LEVELS
13144                            if level != locking.LEVEL_CLUSTER) or
13145                 self.do_locking or self.use_locking)
13146
13147     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13148
13149     result = []
13150
13151     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13152       if nres.fail_msg:
13153         result.append((node, None))
13154       else:
13155         result.extend((node, expname) for expname in nres.payload)
13156
13157     return result
13158
13159
13160 class LUBackupPrepare(NoHooksLU):
13161   """Prepares an instance for an export and returns useful information.
13162
13163   """
13164   REQ_BGL = False
13165
13166   def ExpandNames(self):
13167     self._ExpandAndLockInstance()
13168
13169   def CheckPrereq(self):
13170     """Check prerequisites.
13171
13172     """
13173     instance_name = self.op.instance_name
13174
13175     self.instance = self.cfg.GetInstanceInfo(instance_name)
13176     assert self.instance is not None, \
13177           "Cannot retrieve locked instance %s" % self.op.instance_name
13178     _CheckNodeOnline(self, self.instance.primary_node)
13179
13180     self._cds = _GetClusterDomainSecret()
13181
13182   def Exec(self, feedback_fn):
13183     """Prepares an instance for an export.
13184
13185     """
13186     instance = self.instance
13187
13188     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13189       salt = utils.GenerateSecret(8)
13190
13191       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13192       result = self.rpc.call_x509_cert_create(instance.primary_node,
13193                                               constants.RIE_CERT_VALIDITY)
13194       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13195
13196       (name, cert_pem) = result.payload
13197
13198       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13199                                              cert_pem)
13200
13201       return {
13202         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13203         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13204                           salt),
13205         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13206         }
13207
13208     return None
13209
13210
13211 class LUBackupExport(LogicalUnit):
13212   """Export an instance to an image in the cluster.
13213
13214   """
13215   HPATH = "instance-export"
13216   HTYPE = constants.HTYPE_INSTANCE
13217   REQ_BGL = False
13218
13219   def CheckArguments(self):
13220     """Check the arguments.
13221
13222     """
13223     self.x509_key_name = self.op.x509_key_name
13224     self.dest_x509_ca_pem = self.op.destination_x509_ca
13225
13226     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13227       if not self.x509_key_name:
13228         raise errors.OpPrereqError("Missing X509 key name for encryption",
13229                                    errors.ECODE_INVAL)
13230
13231       if not self.dest_x509_ca_pem:
13232         raise errors.OpPrereqError("Missing destination X509 CA",
13233                                    errors.ECODE_INVAL)
13234
13235   def ExpandNames(self):
13236     self._ExpandAndLockInstance()
13237
13238     # Lock all nodes for local exports
13239     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13240       # FIXME: lock only instance primary and destination node
13241       #
13242       # Sad but true, for now we have do lock all nodes, as we don't know where
13243       # the previous export might be, and in this LU we search for it and
13244       # remove it from its current node. In the future we could fix this by:
13245       #  - making a tasklet to search (share-lock all), then create the
13246       #    new one, then one to remove, after
13247       #  - removing the removal operation altogether
13248       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13249
13250   def DeclareLocks(self, level):
13251     """Last minute lock declaration."""
13252     # All nodes are locked anyway, so nothing to do here.
13253
13254   def BuildHooksEnv(self):
13255     """Build hooks env.
13256
13257     This will run on the master, primary node and target node.
13258
13259     """
13260     env = {
13261       "EXPORT_MODE": self.op.mode,
13262       "EXPORT_NODE": self.op.target_node,
13263       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13264       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13265       # TODO: Generic function for boolean env variables
13266       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13267       }
13268
13269     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13270
13271     return env
13272
13273   def BuildHooksNodes(self):
13274     """Build hooks nodes.
13275
13276     """
13277     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13278
13279     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13280       nl.append(self.op.target_node)
13281
13282     return (nl, nl)
13283
13284   def CheckPrereq(self):
13285     """Check prerequisites.
13286
13287     This checks that the instance and node names are valid.
13288
13289     """
13290     instance_name = self.op.instance_name
13291
13292     self.instance = self.cfg.GetInstanceInfo(instance_name)
13293     assert self.instance is not None, \
13294           "Cannot retrieve locked instance %s" % self.op.instance_name
13295     _CheckNodeOnline(self, self.instance.primary_node)
13296
13297     if (self.op.remove_instance and
13298         self.instance.admin_state == constants.ADMINST_UP and
13299         not self.op.shutdown):
13300       raise errors.OpPrereqError("Can not remove instance without shutting it"
13301                                  " down before")
13302
13303     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13304       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13305       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13306       assert self.dst_node is not None
13307
13308       _CheckNodeOnline(self, self.dst_node.name)
13309       _CheckNodeNotDrained(self, self.dst_node.name)
13310
13311       self._cds = None
13312       self.dest_disk_info = None
13313       self.dest_x509_ca = None
13314
13315     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13316       self.dst_node = None
13317
13318       if len(self.op.target_node) != len(self.instance.disks):
13319         raise errors.OpPrereqError(("Received destination information for %s"
13320                                     " disks, but instance %s has %s disks") %
13321                                    (len(self.op.target_node), instance_name,
13322                                     len(self.instance.disks)),
13323                                    errors.ECODE_INVAL)
13324
13325       cds = _GetClusterDomainSecret()
13326
13327       # Check X509 key name
13328       try:
13329         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13330       except (TypeError, ValueError), err:
13331         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13332
13333       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13334         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13335                                    errors.ECODE_INVAL)
13336
13337       # Load and verify CA
13338       try:
13339         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13340       except OpenSSL.crypto.Error, err:
13341         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13342                                    (err, ), errors.ECODE_INVAL)
13343
13344       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13345       if errcode is not None:
13346         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13347                                    (msg, ), errors.ECODE_INVAL)
13348
13349       self.dest_x509_ca = cert
13350
13351       # Verify target information
13352       disk_info = []
13353       for idx, disk_data in enumerate(self.op.target_node):
13354         try:
13355           (host, port, magic) = \
13356             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13357         except errors.GenericError, err:
13358           raise errors.OpPrereqError("Target info for disk %s: %s" %
13359                                      (idx, err), errors.ECODE_INVAL)
13360
13361         disk_info.append((host, port, magic))
13362
13363       assert len(disk_info) == len(self.op.target_node)
13364       self.dest_disk_info = disk_info
13365
13366     else:
13367       raise errors.ProgrammerError("Unhandled export mode %r" %
13368                                    self.op.mode)
13369
13370     # instance disk type verification
13371     # TODO: Implement export support for file-based disks
13372     for disk in self.instance.disks:
13373       if disk.dev_type == constants.LD_FILE:
13374         raise errors.OpPrereqError("Export not supported for instances with"
13375                                    " file-based disks", errors.ECODE_INVAL)
13376
13377   def _CleanupExports(self, feedback_fn):
13378     """Removes exports of current instance from all other nodes.
13379
13380     If an instance in a cluster with nodes A..D was exported to node C, its
13381     exports will be removed from the nodes A, B and D.
13382
13383     """
13384     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13385
13386     nodelist = self.cfg.GetNodeList()
13387     nodelist.remove(self.dst_node.name)
13388
13389     # on one-node clusters nodelist will be empty after the removal
13390     # if we proceed the backup would be removed because OpBackupQuery
13391     # substitutes an empty list with the full cluster node list.
13392     iname = self.instance.name
13393     if nodelist:
13394       feedback_fn("Removing old exports for instance %s" % iname)
13395       exportlist = self.rpc.call_export_list(nodelist)
13396       for node in exportlist:
13397         if exportlist[node].fail_msg:
13398           continue
13399         if iname in exportlist[node].payload:
13400           msg = self.rpc.call_export_remove(node, iname).fail_msg
13401           if msg:
13402             self.LogWarning("Could not remove older export for instance %s"
13403                             " on node %s: %s", iname, node, msg)
13404
13405   def Exec(self, feedback_fn):
13406     """Export an instance to an image in the cluster.
13407
13408     """
13409     assert self.op.mode in constants.EXPORT_MODES
13410
13411     instance = self.instance
13412     src_node = instance.primary_node
13413
13414     if self.op.shutdown:
13415       # shutdown the instance, but not the disks
13416       feedback_fn("Shutting down instance %s" % instance.name)
13417       result = self.rpc.call_instance_shutdown(src_node, instance,
13418                                                self.op.shutdown_timeout)
13419       # TODO: Maybe ignore failures if ignore_remove_failures is set
13420       result.Raise("Could not shutdown instance %s on"
13421                    " node %s" % (instance.name, src_node))
13422
13423     # set the disks ID correctly since call_instance_start needs the
13424     # correct drbd minor to create the symlinks
13425     for disk in instance.disks:
13426       self.cfg.SetDiskID(disk, src_node)
13427
13428     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13429
13430     if activate_disks:
13431       # Activate the instance disks if we'exporting a stopped instance
13432       feedback_fn("Activating disks for %s" % instance.name)
13433       _StartInstanceDisks(self, instance, None)
13434
13435     try:
13436       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13437                                                      instance)
13438
13439       helper.CreateSnapshots()
13440       try:
13441         if (self.op.shutdown and
13442             instance.admin_state == constants.ADMINST_UP and
13443             not self.op.remove_instance):
13444           assert not activate_disks
13445           feedback_fn("Starting instance %s" % instance.name)
13446           result = self.rpc.call_instance_start(src_node,
13447                                                 (instance, None, None), False)
13448           msg = result.fail_msg
13449           if msg:
13450             feedback_fn("Failed to start instance: %s" % msg)
13451             _ShutdownInstanceDisks(self, instance)
13452             raise errors.OpExecError("Could not start instance: %s" % msg)
13453
13454         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13455           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13456         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13457           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13458           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13459
13460           (key_name, _, _) = self.x509_key_name
13461
13462           dest_ca_pem = \
13463             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13464                                             self.dest_x509_ca)
13465
13466           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13467                                                      key_name, dest_ca_pem,
13468                                                      timeouts)
13469       finally:
13470         helper.Cleanup()
13471
13472       # Check for backwards compatibility
13473       assert len(dresults) == len(instance.disks)
13474       assert compat.all(isinstance(i, bool) for i in dresults), \
13475              "Not all results are boolean: %r" % dresults
13476
13477     finally:
13478       if activate_disks:
13479         feedback_fn("Deactivating disks for %s" % instance.name)
13480         _ShutdownInstanceDisks(self, instance)
13481
13482     if not (compat.all(dresults) and fin_resu):
13483       failures = []
13484       if not fin_resu:
13485         failures.append("export finalization")
13486       if not compat.all(dresults):
13487         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13488                                if not dsk)
13489         failures.append("disk export: disk(s) %s" % fdsk)
13490
13491       raise errors.OpExecError("Export failed, errors in %s" %
13492                                utils.CommaJoin(failures))
13493
13494     # At this point, the export was successful, we can cleanup/finish
13495
13496     # Remove instance if requested
13497     if self.op.remove_instance:
13498       feedback_fn("Removing instance %s" % instance.name)
13499       _RemoveInstance(self, feedback_fn, instance,
13500                       self.op.ignore_remove_failures)
13501
13502     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13503       self._CleanupExports(feedback_fn)
13504
13505     return fin_resu, dresults
13506
13507
13508 class LUBackupRemove(NoHooksLU):
13509   """Remove exports related to the named instance.
13510
13511   """
13512   REQ_BGL = False
13513
13514   def ExpandNames(self):
13515     self.needed_locks = {}
13516     # We need all nodes to be locked in order for RemoveExport to work, but we
13517     # don't need to lock the instance itself, as nothing will happen to it (and
13518     # we can remove exports also for a removed instance)
13519     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13520
13521   def Exec(self, feedback_fn):
13522     """Remove any export.
13523
13524     """
13525     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13526     # If the instance was not found we'll try with the name that was passed in.
13527     # This will only work if it was an FQDN, though.
13528     fqdn_warn = False
13529     if not instance_name:
13530       fqdn_warn = True
13531       instance_name = self.op.instance_name
13532
13533     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13534     exportlist = self.rpc.call_export_list(locked_nodes)
13535     found = False
13536     for node in exportlist:
13537       msg = exportlist[node].fail_msg
13538       if msg:
13539         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13540         continue
13541       if instance_name in exportlist[node].payload:
13542         found = True
13543         result = self.rpc.call_export_remove(node, instance_name)
13544         msg = result.fail_msg
13545         if msg:
13546           logging.error("Could not remove export for instance %s"
13547                         " on node %s: %s", instance_name, node, msg)
13548
13549     if fqdn_warn and not found:
13550       feedback_fn("Export not found. If trying to remove an export belonging"
13551                   " to a deleted instance please use its Fully Qualified"
13552                   " Domain Name.")
13553
13554
13555 class LUGroupAdd(LogicalUnit):
13556   """Logical unit for creating node groups.
13557
13558   """
13559   HPATH = "group-add"
13560   HTYPE = constants.HTYPE_GROUP
13561   REQ_BGL = False
13562
13563   def ExpandNames(self):
13564     # We need the new group's UUID here so that we can create and acquire the
13565     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13566     # that it should not check whether the UUID exists in the configuration.
13567     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13568     self.needed_locks = {}
13569     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13570
13571   def CheckPrereq(self):
13572     """Check prerequisites.
13573
13574     This checks that the given group name is not an existing node group
13575     already.
13576
13577     """
13578     try:
13579       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13580     except errors.OpPrereqError:
13581       pass
13582     else:
13583       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13584                                  " node group (UUID: %s)" %
13585                                  (self.op.group_name, existing_uuid),
13586                                  errors.ECODE_EXISTS)
13587
13588     if self.op.ndparams:
13589       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13590
13591     if self.op.hv_state:
13592       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13593     else:
13594       self.new_hv_state = None
13595
13596     if self.op.disk_state:
13597       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13598     else:
13599       self.new_disk_state = None
13600
13601     if self.op.diskparams:
13602       for templ in constants.DISK_TEMPLATES:
13603         if templ in self.op.diskparams:
13604           utils.ForceDictType(self.op.diskparams[templ],
13605                               constants.DISK_DT_TYPES)
13606       self.new_diskparams = self.op.diskparams
13607     else:
13608       self.new_diskparams = {}
13609
13610     if self.op.ipolicy:
13611       cluster = self.cfg.GetClusterInfo()
13612       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13613       try:
13614         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13615       except errors.ConfigurationError, err:
13616         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13617                                    errors.ECODE_INVAL)
13618
13619   def BuildHooksEnv(self):
13620     """Build hooks env.
13621
13622     """
13623     return {
13624       "GROUP_NAME": self.op.group_name,
13625       }
13626
13627   def BuildHooksNodes(self):
13628     """Build hooks nodes.
13629
13630     """
13631     mn = self.cfg.GetMasterNode()
13632     return ([mn], [mn])
13633
13634   def Exec(self, feedback_fn):
13635     """Add the node group to the cluster.
13636
13637     """
13638     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13639                                   uuid=self.group_uuid,
13640                                   alloc_policy=self.op.alloc_policy,
13641                                   ndparams=self.op.ndparams,
13642                                   diskparams=self.new_diskparams,
13643                                   ipolicy=self.op.ipolicy,
13644                                   hv_state_static=self.new_hv_state,
13645                                   disk_state_static=self.new_disk_state)
13646
13647     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13648     del self.remove_locks[locking.LEVEL_NODEGROUP]
13649
13650
13651 class LUGroupAssignNodes(NoHooksLU):
13652   """Logical unit for assigning nodes to groups.
13653
13654   """
13655   REQ_BGL = False
13656
13657   def ExpandNames(self):
13658     # These raise errors.OpPrereqError on their own:
13659     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13660     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13661
13662     # We want to lock all the affected nodes and groups. We have readily
13663     # available the list of nodes, and the *destination* group. To gather the
13664     # list of "source" groups, we need to fetch node information later on.
13665     self.needed_locks = {
13666       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13667       locking.LEVEL_NODE: self.op.nodes,
13668       }
13669
13670   def DeclareLocks(self, level):
13671     if level == locking.LEVEL_NODEGROUP:
13672       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13673
13674       # Try to get all affected nodes' groups without having the group or node
13675       # lock yet. Needs verification later in the code flow.
13676       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13677
13678       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13679
13680   def CheckPrereq(self):
13681     """Check prerequisites.
13682
13683     """
13684     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13685     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13686             frozenset(self.op.nodes))
13687
13688     expected_locks = (set([self.group_uuid]) |
13689                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13690     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13691     if actual_locks != expected_locks:
13692       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13693                                " current groups are '%s', used to be '%s'" %
13694                                (utils.CommaJoin(expected_locks),
13695                                 utils.CommaJoin(actual_locks)))
13696
13697     self.node_data = self.cfg.GetAllNodesInfo()
13698     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13699     instance_data = self.cfg.GetAllInstancesInfo()
13700
13701     if self.group is None:
13702       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13703                                (self.op.group_name, self.group_uuid))
13704
13705     (new_splits, previous_splits) = \
13706       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13707                                              for node in self.op.nodes],
13708                                             self.node_data, instance_data)
13709
13710     if new_splits:
13711       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13712
13713       if not self.op.force:
13714         raise errors.OpExecError("The following instances get split by this"
13715                                  " change and --force was not given: %s" %
13716                                  fmt_new_splits)
13717       else:
13718         self.LogWarning("This operation will split the following instances: %s",
13719                         fmt_new_splits)
13720
13721         if previous_splits:
13722           self.LogWarning("In addition, these already-split instances continue"
13723                           " to be split across groups: %s",
13724                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13725
13726   def Exec(self, feedback_fn):
13727     """Assign nodes to a new group.
13728
13729     """
13730     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13731
13732     self.cfg.AssignGroupNodes(mods)
13733
13734   @staticmethod
13735   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13736     """Check for split instances after a node assignment.
13737
13738     This method considers a series of node assignments as an atomic operation,
13739     and returns information about split instances after applying the set of
13740     changes.
13741
13742     In particular, it returns information about newly split instances, and
13743     instances that were already split, and remain so after the change.
13744
13745     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13746     considered.
13747
13748     @type changes: list of (node_name, new_group_uuid) pairs.
13749     @param changes: list of node assignments to consider.
13750     @param node_data: a dict with data for all nodes
13751     @param instance_data: a dict with all instances to consider
13752     @rtype: a two-tuple
13753     @return: a list of instances that were previously okay and result split as a
13754       consequence of this change, and a list of instances that were previously
13755       split and this change does not fix.
13756
13757     """
13758     changed_nodes = dict((node, group) for node, group in changes
13759                          if node_data[node].group != group)
13760
13761     all_split_instances = set()
13762     previously_split_instances = set()
13763
13764     def InstanceNodes(instance):
13765       return [instance.primary_node] + list(instance.secondary_nodes)
13766
13767     for inst in instance_data.values():
13768       if inst.disk_template not in constants.DTS_INT_MIRROR:
13769         continue
13770
13771       instance_nodes = InstanceNodes(inst)
13772
13773       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13774         previously_split_instances.add(inst.name)
13775
13776       if len(set(changed_nodes.get(node, node_data[node].group)
13777                  for node in instance_nodes)) > 1:
13778         all_split_instances.add(inst.name)
13779
13780     return (list(all_split_instances - previously_split_instances),
13781             list(previously_split_instances & all_split_instances))
13782
13783
13784 class _GroupQuery(_QueryBase):
13785   FIELDS = query.GROUP_FIELDS
13786
13787   def ExpandNames(self, lu):
13788     lu.needed_locks = {}
13789
13790     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13791     self._cluster = lu.cfg.GetClusterInfo()
13792     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13793
13794     if not self.names:
13795       self.wanted = [name_to_uuid[name]
13796                      for name in utils.NiceSort(name_to_uuid.keys())]
13797     else:
13798       # Accept names to be either names or UUIDs.
13799       missing = []
13800       self.wanted = []
13801       all_uuid = frozenset(self._all_groups.keys())
13802
13803       for name in self.names:
13804         if name in all_uuid:
13805           self.wanted.append(name)
13806         elif name in name_to_uuid:
13807           self.wanted.append(name_to_uuid[name])
13808         else:
13809           missing.append(name)
13810
13811       if missing:
13812         raise errors.OpPrereqError("Some groups do not exist: %s" %
13813                                    utils.CommaJoin(missing),
13814                                    errors.ECODE_NOENT)
13815
13816   def DeclareLocks(self, lu, level):
13817     pass
13818
13819   def _GetQueryData(self, lu):
13820     """Computes the list of node groups and their attributes.
13821
13822     """
13823     do_nodes = query.GQ_NODE in self.requested_data
13824     do_instances = query.GQ_INST in self.requested_data
13825
13826     group_to_nodes = None
13827     group_to_instances = None
13828
13829     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13830     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13831     # latter GetAllInstancesInfo() is not enough, for we have to go through
13832     # instance->node. Hence, we will need to process nodes even if we only need
13833     # instance information.
13834     if do_nodes or do_instances:
13835       all_nodes = lu.cfg.GetAllNodesInfo()
13836       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13837       node_to_group = {}
13838
13839       for node in all_nodes.values():
13840         if node.group in group_to_nodes:
13841           group_to_nodes[node.group].append(node.name)
13842           node_to_group[node.name] = node.group
13843
13844       if do_instances:
13845         all_instances = lu.cfg.GetAllInstancesInfo()
13846         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13847
13848         for instance in all_instances.values():
13849           node = instance.primary_node
13850           if node in node_to_group:
13851             group_to_instances[node_to_group[node]].append(instance.name)
13852
13853         if not do_nodes:
13854           # Do not pass on node information if it was not requested.
13855           group_to_nodes = None
13856
13857     return query.GroupQueryData(self._cluster,
13858                                 [self._all_groups[uuid]
13859                                  for uuid in self.wanted],
13860                                 group_to_nodes, group_to_instances,
13861                                 query.GQ_DISKPARAMS in self.requested_data)
13862
13863
13864 class LUGroupQuery(NoHooksLU):
13865   """Logical unit for querying node groups.
13866
13867   """
13868   REQ_BGL = False
13869
13870   def CheckArguments(self):
13871     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13872                           self.op.output_fields, False)
13873
13874   def ExpandNames(self):
13875     self.gq.ExpandNames(self)
13876
13877   def DeclareLocks(self, level):
13878     self.gq.DeclareLocks(self, level)
13879
13880   def Exec(self, feedback_fn):
13881     return self.gq.OldStyleQuery(self)
13882
13883
13884 class LUGroupSetParams(LogicalUnit):
13885   """Modifies the parameters of a node group.
13886
13887   """
13888   HPATH = "group-modify"
13889   HTYPE = constants.HTYPE_GROUP
13890   REQ_BGL = False
13891
13892   def CheckArguments(self):
13893     all_changes = [
13894       self.op.ndparams,
13895       self.op.diskparams,
13896       self.op.alloc_policy,
13897       self.op.hv_state,
13898       self.op.disk_state,
13899       self.op.ipolicy,
13900       ]
13901
13902     if all_changes.count(None) == len(all_changes):
13903       raise errors.OpPrereqError("Please pass at least one modification",
13904                                  errors.ECODE_INVAL)
13905
13906   def ExpandNames(self):
13907     # This raises errors.OpPrereqError on its own:
13908     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13909
13910     self.needed_locks = {
13911       locking.LEVEL_INSTANCE: [],
13912       locking.LEVEL_NODEGROUP: [self.group_uuid],
13913       }
13914
13915     self.share_locks[locking.LEVEL_INSTANCE] = 1
13916
13917   def DeclareLocks(self, level):
13918     if level == locking.LEVEL_INSTANCE:
13919       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13920
13921       # Lock instances optimistically, needs verification once group lock has
13922       # been acquired
13923       self.needed_locks[locking.LEVEL_INSTANCE] = \
13924           self.cfg.GetNodeGroupInstances(self.group_uuid)
13925
13926   @staticmethod
13927   def _UpdateAndVerifyDiskParams(old, new):
13928     """Updates and verifies disk parameters.
13929
13930     """
13931     new_params = _GetUpdatedParams(old, new)
13932     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13933     return new_params
13934
13935   def CheckPrereq(self):
13936     """Check prerequisites.
13937
13938     """
13939     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13940
13941     # Check if locked instances are still correct
13942     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13943
13944     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13945     cluster = self.cfg.GetClusterInfo()
13946
13947     if self.group is None:
13948       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13949                                (self.op.group_name, self.group_uuid))
13950
13951     if self.op.ndparams:
13952       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13953       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13954       self.new_ndparams = new_ndparams
13955
13956     if self.op.diskparams:
13957       diskparams = self.group.diskparams
13958       uavdp = self._UpdateAndVerifyDiskParams
13959       # For each disktemplate subdict update and verify the values
13960       new_diskparams = dict((dt,
13961                              uavdp(diskparams.get(dt, {}),
13962                                    self.op.diskparams[dt]))
13963                             for dt in constants.DISK_TEMPLATES
13964                             if dt in self.op.diskparams)
13965       # As we've all subdicts of diskparams ready, lets merge the actual
13966       # dict with all updated subdicts
13967       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
13968
13969     if self.op.hv_state:
13970       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13971                                                  self.group.hv_state_static)
13972
13973     if self.op.disk_state:
13974       self.new_disk_state = \
13975         _MergeAndVerifyDiskState(self.op.disk_state,
13976                                  self.group.disk_state_static)
13977
13978     if self.op.ipolicy:
13979       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13980                                             self.op.ipolicy,
13981                                             group_policy=True)
13982
13983       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13984       inst_filter = lambda inst: inst.name in owned_instances
13985       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13986       violations = \
13987           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13988                                                                self.group),
13989                                         new_ipolicy, instances)
13990
13991       if violations:
13992         self.LogWarning("After the ipolicy change the following instances"
13993                         " violate them: %s",
13994                         utils.CommaJoin(violations))
13995
13996   def BuildHooksEnv(self):
13997     """Build hooks env.
13998
13999     """
14000     return {
14001       "GROUP_NAME": self.op.group_name,
14002       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14003       }
14004
14005   def BuildHooksNodes(self):
14006     """Build hooks nodes.
14007
14008     """
14009     mn = self.cfg.GetMasterNode()
14010     return ([mn], [mn])
14011
14012   def Exec(self, feedback_fn):
14013     """Modifies the node group.
14014
14015     """
14016     result = []
14017
14018     if self.op.ndparams:
14019       self.group.ndparams = self.new_ndparams
14020       result.append(("ndparams", str(self.group.ndparams)))
14021
14022     if self.op.diskparams:
14023       self.group.diskparams = self.new_diskparams
14024       result.append(("diskparams", str(self.group.diskparams)))
14025
14026     if self.op.alloc_policy:
14027       self.group.alloc_policy = self.op.alloc_policy
14028
14029     if self.op.hv_state:
14030       self.group.hv_state_static = self.new_hv_state
14031
14032     if self.op.disk_state:
14033       self.group.disk_state_static = self.new_disk_state
14034
14035     if self.op.ipolicy:
14036       self.group.ipolicy = self.new_ipolicy
14037
14038     self.cfg.Update(self.group, feedback_fn)
14039     return result
14040
14041
14042 class LUGroupRemove(LogicalUnit):
14043   HPATH = "group-remove"
14044   HTYPE = constants.HTYPE_GROUP
14045   REQ_BGL = False
14046
14047   def ExpandNames(self):
14048     # This will raises errors.OpPrereqError on its own:
14049     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14050     self.needed_locks = {
14051       locking.LEVEL_NODEGROUP: [self.group_uuid],
14052       }
14053
14054   def CheckPrereq(self):
14055     """Check prerequisites.
14056
14057     This checks that the given group name exists as a node group, that is
14058     empty (i.e., contains no nodes), and that is not the last group of the
14059     cluster.
14060
14061     """
14062     # Verify that the group is empty.
14063     group_nodes = [node.name
14064                    for node in self.cfg.GetAllNodesInfo().values()
14065                    if node.group == self.group_uuid]
14066
14067     if group_nodes:
14068       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14069                                  " nodes: %s" %
14070                                  (self.op.group_name,
14071                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14072                                  errors.ECODE_STATE)
14073
14074     # Verify the cluster would not be left group-less.
14075     if len(self.cfg.GetNodeGroupList()) == 1:
14076       raise errors.OpPrereqError("Group '%s' is the only group,"
14077                                  " cannot be removed" %
14078                                  self.op.group_name,
14079                                  errors.ECODE_STATE)
14080
14081   def BuildHooksEnv(self):
14082     """Build hooks env.
14083
14084     """
14085     return {
14086       "GROUP_NAME": self.op.group_name,
14087       }
14088
14089   def BuildHooksNodes(self):
14090     """Build hooks nodes.
14091
14092     """
14093     mn = self.cfg.GetMasterNode()
14094     return ([mn], [mn])
14095
14096   def Exec(self, feedback_fn):
14097     """Remove the node group.
14098
14099     """
14100     try:
14101       self.cfg.RemoveNodeGroup(self.group_uuid)
14102     except errors.ConfigurationError:
14103       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14104                                (self.op.group_name, self.group_uuid))
14105
14106     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14107
14108
14109 class LUGroupRename(LogicalUnit):
14110   HPATH = "group-rename"
14111   HTYPE = constants.HTYPE_GROUP
14112   REQ_BGL = False
14113
14114   def ExpandNames(self):
14115     # This raises errors.OpPrereqError on its own:
14116     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14117
14118     self.needed_locks = {
14119       locking.LEVEL_NODEGROUP: [self.group_uuid],
14120       }
14121
14122   def CheckPrereq(self):
14123     """Check prerequisites.
14124
14125     Ensures requested new name is not yet used.
14126
14127     """
14128     try:
14129       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14130     except errors.OpPrereqError:
14131       pass
14132     else:
14133       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14134                                  " node group (UUID: %s)" %
14135                                  (self.op.new_name, new_name_uuid),
14136                                  errors.ECODE_EXISTS)
14137
14138   def BuildHooksEnv(self):
14139     """Build hooks env.
14140
14141     """
14142     return {
14143       "OLD_NAME": self.op.group_name,
14144       "NEW_NAME": self.op.new_name,
14145       }
14146
14147   def BuildHooksNodes(self):
14148     """Build hooks nodes.
14149
14150     """
14151     mn = self.cfg.GetMasterNode()
14152
14153     all_nodes = self.cfg.GetAllNodesInfo()
14154     all_nodes.pop(mn, None)
14155
14156     run_nodes = [mn]
14157     run_nodes.extend(node.name for node in all_nodes.values()
14158                      if node.group == self.group_uuid)
14159
14160     return (run_nodes, run_nodes)
14161
14162   def Exec(self, feedback_fn):
14163     """Rename the node group.
14164
14165     """
14166     group = self.cfg.GetNodeGroup(self.group_uuid)
14167
14168     if group is None:
14169       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14170                                (self.op.group_name, self.group_uuid))
14171
14172     group.name = self.op.new_name
14173     self.cfg.Update(group, feedback_fn)
14174
14175     return self.op.new_name
14176
14177
14178 class LUGroupEvacuate(LogicalUnit):
14179   HPATH = "group-evacuate"
14180   HTYPE = constants.HTYPE_GROUP
14181   REQ_BGL = False
14182
14183   def ExpandNames(self):
14184     # This raises errors.OpPrereqError on its own:
14185     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14186
14187     if self.op.target_groups:
14188       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14189                                   self.op.target_groups)
14190     else:
14191       self.req_target_uuids = []
14192
14193     if self.group_uuid in self.req_target_uuids:
14194       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14195                                  " as a target group (targets are %s)" %
14196                                  (self.group_uuid,
14197                                   utils.CommaJoin(self.req_target_uuids)),
14198                                  errors.ECODE_INVAL)
14199
14200     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14201
14202     self.share_locks = _ShareAll()
14203     self.needed_locks = {
14204       locking.LEVEL_INSTANCE: [],
14205       locking.LEVEL_NODEGROUP: [],
14206       locking.LEVEL_NODE: [],
14207       }
14208
14209   def DeclareLocks(self, level):
14210     if level == locking.LEVEL_INSTANCE:
14211       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14212
14213       # Lock instances optimistically, needs verification once node and group
14214       # locks have been acquired
14215       self.needed_locks[locking.LEVEL_INSTANCE] = \
14216         self.cfg.GetNodeGroupInstances(self.group_uuid)
14217
14218     elif level == locking.LEVEL_NODEGROUP:
14219       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14220
14221       if self.req_target_uuids:
14222         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14223
14224         # Lock all groups used by instances optimistically; this requires going
14225         # via the node before it's locked, requiring verification later on
14226         lock_groups.update(group_uuid
14227                            for instance_name in
14228                              self.owned_locks(locking.LEVEL_INSTANCE)
14229                            for group_uuid in
14230                              self.cfg.GetInstanceNodeGroups(instance_name))
14231       else:
14232         # No target groups, need to lock all of them
14233         lock_groups = locking.ALL_SET
14234
14235       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14236
14237     elif level == locking.LEVEL_NODE:
14238       # This will only lock the nodes in the group to be evacuated which
14239       # contain actual instances
14240       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14241       self._LockInstancesNodes()
14242
14243       # Lock all nodes in group to be evacuated and target groups
14244       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14245       assert self.group_uuid in owned_groups
14246       member_nodes = [node_name
14247                       for group in owned_groups
14248                       for node_name in self.cfg.GetNodeGroup(group).members]
14249       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14250
14251   def CheckPrereq(self):
14252     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14253     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14254     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14255
14256     assert owned_groups.issuperset(self.req_target_uuids)
14257     assert self.group_uuid in owned_groups
14258
14259     # Check if locked instances are still correct
14260     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14261
14262     # Get instance information
14263     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14264
14265     # Check if node groups for locked instances are still correct
14266     _CheckInstancesNodeGroups(self.cfg, self.instances,
14267                               owned_groups, owned_nodes, self.group_uuid)
14268
14269     if self.req_target_uuids:
14270       # User requested specific target groups
14271       self.target_uuids = self.req_target_uuids
14272     else:
14273       # All groups except the one to be evacuated are potential targets
14274       self.target_uuids = [group_uuid for group_uuid in owned_groups
14275                            if group_uuid != self.group_uuid]
14276
14277       if not self.target_uuids:
14278         raise errors.OpPrereqError("There are no possible target groups",
14279                                    errors.ECODE_INVAL)
14280
14281   def BuildHooksEnv(self):
14282     """Build hooks env.
14283
14284     """
14285     return {
14286       "GROUP_NAME": self.op.group_name,
14287       "TARGET_GROUPS": " ".join(self.target_uuids),
14288       }
14289
14290   def BuildHooksNodes(self):
14291     """Build hooks nodes.
14292
14293     """
14294     mn = self.cfg.GetMasterNode()
14295
14296     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14297
14298     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14299
14300     return (run_nodes, run_nodes)
14301
14302   def Exec(self, feedback_fn):
14303     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14304
14305     assert self.group_uuid not in self.target_uuids
14306
14307     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14308                      instances=instances, target_groups=self.target_uuids)
14309
14310     ial.Run(self.op.iallocator)
14311
14312     if not ial.success:
14313       raise errors.OpPrereqError("Can't compute group evacuation using"
14314                                  " iallocator '%s': %s" %
14315                                  (self.op.iallocator, ial.info),
14316                                  errors.ECODE_NORES)
14317
14318     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14319
14320     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14321                  len(jobs), self.op.group_name)
14322
14323     return ResultWithJobs(jobs)
14324
14325
14326 class TagsLU(NoHooksLU): # pylint: disable=W0223
14327   """Generic tags LU.
14328
14329   This is an abstract class which is the parent of all the other tags LUs.
14330
14331   """
14332   def ExpandNames(self):
14333     self.group_uuid = None
14334     self.needed_locks = {}
14335
14336     if self.op.kind == constants.TAG_NODE:
14337       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14338       lock_level = locking.LEVEL_NODE
14339       lock_name = self.op.name
14340     elif self.op.kind == constants.TAG_INSTANCE:
14341       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14342       lock_level = locking.LEVEL_INSTANCE
14343       lock_name = self.op.name
14344     elif self.op.kind == constants.TAG_NODEGROUP:
14345       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14346       lock_level = locking.LEVEL_NODEGROUP
14347       lock_name = self.group_uuid
14348     else:
14349       lock_level = None
14350       lock_name = None
14351
14352     if lock_level and getattr(self.op, "use_locking", True):
14353       self.needed_locks[lock_level] = lock_name
14354
14355     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14356     # not possible to acquire the BGL based on opcode parameters)
14357
14358   def CheckPrereq(self):
14359     """Check prerequisites.
14360
14361     """
14362     if self.op.kind == constants.TAG_CLUSTER:
14363       self.target = self.cfg.GetClusterInfo()
14364     elif self.op.kind == constants.TAG_NODE:
14365       self.target = self.cfg.GetNodeInfo(self.op.name)
14366     elif self.op.kind == constants.TAG_INSTANCE:
14367       self.target = self.cfg.GetInstanceInfo(self.op.name)
14368     elif self.op.kind == constants.TAG_NODEGROUP:
14369       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14370     else:
14371       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14372                                  str(self.op.kind), errors.ECODE_INVAL)
14373
14374
14375 class LUTagsGet(TagsLU):
14376   """Returns the tags of a given object.
14377
14378   """
14379   REQ_BGL = False
14380
14381   def ExpandNames(self):
14382     TagsLU.ExpandNames(self)
14383
14384     # Share locks as this is only a read operation
14385     self.share_locks = _ShareAll()
14386
14387   def Exec(self, feedback_fn):
14388     """Returns the tag list.
14389
14390     """
14391     return list(self.target.GetTags())
14392
14393
14394 class LUTagsSearch(NoHooksLU):
14395   """Searches the tags for a given pattern.
14396
14397   """
14398   REQ_BGL = False
14399
14400   def ExpandNames(self):
14401     self.needed_locks = {}
14402
14403   def CheckPrereq(self):
14404     """Check prerequisites.
14405
14406     This checks the pattern passed for validity by compiling it.
14407
14408     """
14409     try:
14410       self.re = re.compile(self.op.pattern)
14411     except re.error, err:
14412       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14413                                  (self.op.pattern, err), errors.ECODE_INVAL)
14414
14415   def Exec(self, feedback_fn):
14416     """Returns the tag list.
14417
14418     """
14419     cfg = self.cfg
14420     tgts = [("/cluster", cfg.GetClusterInfo())]
14421     ilist = cfg.GetAllInstancesInfo().values()
14422     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14423     nlist = cfg.GetAllNodesInfo().values()
14424     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14425     tgts.extend(("/nodegroup/%s" % n.name, n)
14426                 for n in cfg.GetAllNodeGroupsInfo().values())
14427     results = []
14428     for path, target in tgts:
14429       for tag in target.GetTags():
14430         if self.re.search(tag):
14431           results.append((path, tag))
14432     return results
14433
14434
14435 class LUTagsSet(TagsLU):
14436   """Sets a tag on a given object.
14437
14438   """
14439   REQ_BGL = False
14440
14441   def CheckPrereq(self):
14442     """Check prerequisites.
14443
14444     This checks the type and length of the tag name and value.
14445
14446     """
14447     TagsLU.CheckPrereq(self)
14448     for tag in self.op.tags:
14449       objects.TaggableObject.ValidateTag(tag)
14450
14451   def Exec(self, feedback_fn):
14452     """Sets the tag.
14453
14454     """
14455     try:
14456       for tag in self.op.tags:
14457         self.target.AddTag(tag)
14458     except errors.TagError, err:
14459       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14460     self.cfg.Update(self.target, feedback_fn)
14461
14462
14463 class LUTagsDel(TagsLU):
14464   """Delete a list of tags from a given object.
14465
14466   """
14467   REQ_BGL = False
14468
14469   def CheckPrereq(self):
14470     """Check prerequisites.
14471
14472     This checks that we have the given tag.
14473
14474     """
14475     TagsLU.CheckPrereq(self)
14476     for tag in self.op.tags:
14477       objects.TaggableObject.ValidateTag(tag)
14478     del_tags = frozenset(self.op.tags)
14479     cur_tags = self.target.GetTags()
14480
14481     diff_tags = del_tags - cur_tags
14482     if diff_tags:
14483       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14484       raise errors.OpPrereqError("Tag(s) %s not found" %
14485                                  (utils.CommaJoin(diff_names), ),
14486                                  errors.ECODE_NOENT)
14487
14488   def Exec(self, feedback_fn):
14489     """Remove the tag from the object.
14490
14491     """
14492     for tag in self.op.tags:
14493       self.target.RemoveTag(tag)
14494     self.cfg.Update(self.target, feedback_fn)
14495
14496
14497 class LUTestDelay(NoHooksLU):
14498   """Sleep for a specified amount of time.
14499
14500   This LU sleeps on the master and/or nodes for a specified amount of
14501   time.
14502
14503   """
14504   REQ_BGL = False
14505
14506   def ExpandNames(self):
14507     """Expand names and set required locks.
14508
14509     This expands the node list, if any.
14510
14511     """
14512     self.needed_locks = {}
14513     if self.op.on_nodes:
14514       # _GetWantedNodes can be used here, but is not always appropriate to use
14515       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14516       # more information.
14517       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14518       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14519
14520   def _TestDelay(self):
14521     """Do the actual sleep.
14522
14523     """
14524     if self.op.on_master:
14525       if not utils.TestDelay(self.op.duration):
14526         raise errors.OpExecError("Error during master delay test")
14527     if self.op.on_nodes:
14528       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14529       for node, node_result in result.items():
14530         node_result.Raise("Failure during rpc call to node %s" % node)
14531
14532   def Exec(self, feedback_fn):
14533     """Execute the test delay opcode, with the wanted repetitions.
14534
14535     """
14536     if self.op.repeat == 0:
14537       self._TestDelay()
14538     else:
14539       top_value = self.op.repeat - 1
14540       for i in range(self.op.repeat):
14541         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14542         self._TestDelay()
14543
14544
14545 class LUTestJqueue(NoHooksLU):
14546   """Utility LU to test some aspects of the job queue.
14547
14548   """
14549   REQ_BGL = False
14550
14551   # Must be lower than default timeout for WaitForJobChange to see whether it
14552   # notices changed jobs
14553   _CLIENT_CONNECT_TIMEOUT = 20.0
14554   _CLIENT_CONFIRM_TIMEOUT = 60.0
14555
14556   @classmethod
14557   def _NotifyUsingSocket(cls, cb, errcls):
14558     """Opens a Unix socket and waits for another program to connect.
14559
14560     @type cb: callable
14561     @param cb: Callback to send socket name to client
14562     @type errcls: class
14563     @param errcls: Exception class to use for errors
14564
14565     """
14566     # Using a temporary directory as there's no easy way to create temporary
14567     # sockets without writing a custom loop around tempfile.mktemp and
14568     # socket.bind
14569     tmpdir = tempfile.mkdtemp()
14570     try:
14571       tmpsock = utils.PathJoin(tmpdir, "sock")
14572
14573       logging.debug("Creating temporary socket at %s", tmpsock)
14574       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14575       try:
14576         sock.bind(tmpsock)
14577         sock.listen(1)
14578
14579         # Send details to client
14580         cb(tmpsock)
14581
14582         # Wait for client to connect before continuing
14583         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14584         try:
14585           (conn, _) = sock.accept()
14586         except socket.error, err:
14587           raise errcls("Client didn't connect in time (%s)" % err)
14588       finally:
14589         sock.close()
14590     finally:
14591       # Remove as soon as client is connected
14592       shutil.rmtree(tmpdir)
14593
14594     # Wait for client to close
14595     try:
14596       try:
14597         # pylint: disable=E1101
14598         # Instance of '_socketobject' has no ... member
14599         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14600         conn.recv(1)
14601       except socket.error, err:
14602         raise errcls("Client failed to confirm notification (%s)" % err)
14603     finally:
14604       conn.close()
14605
14606   def _SendNotification(self, test, arg, sockname):
14607     """Sends a notification to the client.
14608
14609     @type test: string
14610     @param test: Test name
14611     @param arg: Test argument (depends on test)
14612     @type sockname: string
14613     @param sockname: Socket path
14614
14615     """
14616     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14617
14618   def _Notify(self, prereq, test, arg):
14619     """Notifies the client of a test.
14620
14621     @type prereq: bool
14622     @param prereq: Whether this is a prereq-phase test
14623     @type test: string
14624     @param test: Test name
14625     @param arg: Test argument (depends on test)
14626
14627     """
14628     if prereq:
14629       errcls = errors.OpPrereqError
14630     else:
14631       errcls = errors.OpExecError
14632
14633     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14634                                                   test, arg),
14635                                    errcls)
14636
14637   def CheckArguments(self):
14638     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14639     self.expandnames_calls = 0
14640
14641   def ExpandNames(self):
14642     checkargs_calls = getattr(self, "checkargs_calls", 0)
14643     if checkargs_calls < 1:
14644       raise errors.ProgrammerError("CheckArguments was not called")
14645
14646     self.expandnames_calls += 1
14647
14648     if self.op.notify_waitlock:
14649       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14650
14651     self.LogInfo("Expanding names")
14652
14653     # Get lock on master node (just to get a lock, not for a particular reason)
14654     self.needed_locks = {
14655       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14656       }
14657
14658   def Exec(self, feedback_fn):
14659     if self.expandnames_calls < 1:
14660       raise errors.ProgrammerError("ExpandNames was not called")
14661
14662     if self.op.notify_exec:
14663       self._Notify(False, constants.JQT_EXEC, None)
14664
14665     self.LogInfo("Executing")
14666
14667     if self.op.log_messages:
14668       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14669       for idx, msg in enumerate(self.op.log_messages):
14670         self.LogInfo("Sending log message %s", idx + 1)
14671         feedback_fn(constants.JQT_MSGPREFIX + msg)
14672         # Report how many test messages have been sent
14673         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14674
14675     if self.op.fail:
14676       raise errors.OpExecError("Opcode failure was requested")
14677
14678     return True
14679
14680
14681 class IAllocator(object):
14682   """IAllocator framework.
14683
14684   An IAllocator instance has three sets of attributes:
14685     - cfg that is needed to query the cluster
14686     - input data (all members of the _KEYS class attribute are required)
14687     - four buffer attributes (in|out_data|text), that represent the
14688       input (to the external script) in text and data structure format,
14689       and the output from it, again in two formats
14690     - the result variables from the script (success, info, nodes) for
14691       easy usage
14692
14693   """
14694   # pylint: disable=R0902
14695   # lots of instance attributes
14696
14697   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14698     self.cfg = cfg
14699     self.rpc = rpc_runner
14700     # init buffer variables
14701     self.in_text = self.out_text = self.in_data = self.out_data = None
14702     # init all input fields so that pylint is happy
14703     self.mode = mode
14704     self.memory = self.disks = self.disk_template = self.spindle_use = None
14705     self.os = self.tags = self.nics = self.vcpus = None
14706     self.hypervisor = None
14707     self.relocate_from = None
14708     self.name = None
14709     self.instances = None
14710     self.evac_mode = None
14711     self.target_groups = []
14712     # computed fields
14713     self.required_nodes = None
14714     # init result fields
14715     self.success = self.info = self.result = None
14716
14717     try:
14718       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14719     except KeyError:
14720       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14721                                    " IAllocator" % self.mode)
14722
14723     keyset = [n for (n, _) in keydata]
14724
14725     for key in kwargs:
14726       if key not in keyset:
14727         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14728                                      " IAllocator" % key)
14729       setattr(self, key, kwargs[key])
14730
14731     for key in keyset:
14732       if key not in kwargs:
14733         raise errors.ProgrammerError("Missing input parameter '%s' to"
14734                                      " IAllocator" % key)
14735     self._BuildInputData(compat.partial(fn, self), keydata)
14736
14737   def _ComputeClusterData(self):
14738     """Compute the generic allocator input data.
14739
14740     This is the data that is independent of the actual operation.
14741
14742     """
14743     cfg = self.cfg
14744     cluster_info = cfg.GetClusterInfo()
14745     # cluster data
14746     data = {
14747       "version": constants.IALLOCATOR_VERSION,
14748       "cluster_name": cfg.GetClusterName(),
14749       "cluster_tags": list(cluster_info.GetTags()),
14750       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14751       "ipolicy": cluster_info.ipolicy,
14752       }
14753     ninfo = cfg.GetAllNodesInfo()
14754     iinfo = cfg.GetAllInstancesInfo().values()
14755     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14756
14757     # node data
14758     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14759
14760     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14761       hypervisor_name = self.hypervisor
14762     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14763       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14764     else:
14765       hypervisor_name = cluster_info.primary_hypervisor
14766
14767     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14768                                         [hypervisor_name])
14769     node_iinfo = \
14770       self.rpc.call_all_instances_info(node_list,
14771                                        cluster_info.enabled_hypervisors)
14772
14773     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14774
14775     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14776     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14777                                                  i_list, config_ndata)
14778     assert len(data["nodes"]) == len(ninfo), \
14779         "Incomplete node data computed"
14780
14781     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14782
14783     self.in_data = data
14784
14785   @staticmethod
14786   def _ComputeNodeGroupData(cfg):
14787     """Compute node groups data.
14788
14789     """
14790     cluster = cfg.GetClusterInfo()
14791     ng = dict((guuid, {
14792       "name": gdata.name,
14793       "alloc_policy": gdata.alloc_policy,
14794       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14795       })
14796       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14797
14798     return ng
14799
14800   @staticmethod
14801   def _ComputeBasicNodeData(cfg, node_cfg):
14802     """Compute global node data.
14803
14804     @rtype: dict
14805     @returns: a dict of name: (node dict, node config)
14806
14807     """
14808     # fill in static (config-based) values
14809     node_results = dict((ninfo.name, {
14810       "tags": list(ninfo.GetTags()),
14811       "primary_ip": ninfo.primary_ip,
14812       "secondary_ip": ninfo.secondary_ip,
14813       "offline": ninfo.offline,
14814       "drained": ninfo.drained,
14815       "master_candidate": ninfo.master_candidate,
14816       "group": ninfo.group,
14817       "master_capable": ninfo.master_capable,
14818       "vm_capable": ninfo.vm_capable,
14819       "ndparams": cfg.GetNdParams(ninfo),
14820       })
14821       for ninfo in node_cfg.values())
14822
14823     return node_results
14824
14825   @staticmethod
14826   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14827                               node_results):
14828     """Compute global node data.
14829
14830     @param node_results: the basic node structures as filled from the config
14831
14832     """
14833     #TODO(dynmem): compute the right data on MAX and MIN memory
14834     # make a copy of the current dict
14835     node_results = dict(node_results)
14836     for nname, nresult in node_data.items():
14837       assert nname in node_results, "Missing basic data for node %s" % nname
14838       ninfo = node_cfg[nname]
14839
14840       if not (ninfo.offline or ninfo.drained):
14841         nresult.Raise("Can't get data for node %s" % nname)
14842         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14843                                 nname)
14844         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14845
14846         for attr in ["memory_total", "memory_free", "memory_dom0",
14847                      "vg_size", "vg_free", "cpu_total"]:
14848           if attr not in remote_info:
14849             raise errors.OpExecError("Node '%s' didn't return attribute"
14850                                      " '%s'" % (nname, attr))
14851           if not isinstance(remote_info[attr], int):
14852             raise errors.OpExecError("Node '%s' returned invalid value"
14853                                      " for '%s': %s" %
14854                                      (nname, attr, remote_info[attr]))
14855         # compute memory used by primary instances
14856         i_p_mem = i_p_up_mem = 0
14857         for iinfo, beinfo in i_list:
14858           if iinfo.primary_node == nname:
14859             i_p_mem += beinfo[constants.BE_MAXMEM]
14860             if iinfo.name not in node_iinfo[nname].payload:
14861               i_used_mem = 0
14862             else:
14863               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14864             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14865             remote_info["memory_free"] -= max(0, i_mem_diff)
14866
14867             if iinfo.admin_state == constants.ADMINST_UP:
14868               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14869
14870         # compute memory used by instances
14871         pnr_dyn = {
14872           "total_memory": remote_info["memory_total"],
14873           "reserved_memory": remote_info["memory_dom0"],
14874           "free_memory": remote_info["memory_free"],
14875           "total_disk": remote_info["vg_size"],
14876           "free_disk": remote_info["vg_free"],
14877           "total_cpus": remote_info["cpu_total"],
14878           "i_pri_memory": i_p_mem,
14879           "i_pri_up_memory": i_p_up_mem,
14880           }
14881         pnr_dyn.update(node_results[nname])
14882         node_results[nname] = pnr_dyn
14883
14884     return node_results
14885
14886   @staticmethod
14887   def _ComputeInstanceData(cluster_info, i_list):
14888     """Compute global instance data.
14889
14890     """
14891     instance_data = {}
14892     for iinfo, beinfo in i_list:
14893       nic_data = []
14894       for nic in iinfo.nics:
14895         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14896         nic_dict = {
14897           "mac": nic.mac,
14898           "ip": nic.ip,
14899           "mode": filled_params[constants.NIC_MODE],
14900           "link": filled_params[constants.NIC_LINK],
14901           }
14902         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14903           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14904         nic_data.append(nic_dict)
14905       pir = {
14906         "tags": list(iinfo.GetTags()),
14907         "admin_state": iinfo.admin_state,
14908         "vcpus": beinfo[constants.BE_VCPUS],
14909         "memory": beinfo[constants.BE_MAXMEM],
14910         "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14911         "os": iinfo.os,
14912         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14913         "nics": nic_data,
14914         "disks": [{constants.IDISK_SIZE: dsk.size,
14915                    constants.IDISK_MODE: dsk.mode}
14916                   for dsk in iinfo.disks],
14917         "disk_template": iinfo.disk_template,
14918         "hypervisor": iinfo.hypervisor,
14919         }
14920       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14921                                                  pir["disks"])
14922       instance_data[iinfo.name] = pir
14923
14924     return instance_data
14925
14926   def _AddNewInstance(self):
14927     """Add new instance data to allocator structure.
14928
14929     This in combination with _AllocatorGetClusterData will create the
14930     correct structure needed as input for the allocator.
14931
14932     The checks for the completeness of the opcode must have already been
14933     done.
14934
14935     """
14936     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14937
14938     if self.disk_template in constants.DTS_INT_MIRROR:
14939       self.required_nodes = 2
14940     else:
14941       self.required_nodes = 1
14942
14943     request = {
14944       "name": self.name,
14945       "disk_template": self.disk_template,
14946       "tags": self.tags,
14947       "os": self.os,
14948       "vcpus": self.vcpus,
14949       "memory": self.memory,
14950       "spindle_use": self.spindle_use,
14951       "disks": self.disks,
14952       "disk_space_total": disk_space,
14953       "nics": self.nics,
14954       "required_nodes": self.required_nodes,
14955       "hypervisor": self.hypervisor,
14956       }
14957
14958     return request
14959
14960   def _AddRelocateInstance(self):
14961     """Add relocate instance data to allocator structure.
14962
14963     This in combination with _IAllocatorGetClusterData will create the
14964     correct structure needed as input for the allocator.
14965
14966     The checks for the completeness of the opcode must have already been
14967     done.
14968
14969     """
14970     instance = self.cfg.GetInstanceInfo(self.name)
14971     if instance is None:
14972       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14973                                    " IAllocator" % self.name)
14974
14975     if instance.disk_template not in constants.DTS_MIRRORED:
14976       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14977                                  errors.ECODE_INVAL)
14978
14979     if instance.disk_template in constants.DTS_INT_MIRROR and \
14980         len(instance.secondary_nodes) != 1:
14981       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14982                                  errors.ECODE_STATE)
14983
14984     self.required_nodes = 1
14985     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14986     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14987
14988     request = {
14989       "name": self.name,
14990       "disk_space_total": disk_space,
14991       "required_nodes": self.required_nodes,
14992       "relocate_from": self.relocate_from,
14993       }
14994     return request
14995
14996   def _AddNodeEvacuate(self):
14997     """Get data for node-evacuate requests.
14998
14999     """
15000     return {
15001       "instances": self.instances,
15002       "evac_mode": self.evac_mode,
15003       }
15004
15005   def _AddChangeGroup(self):
15006     """Get data for node-evacuate requests.
15007
15008     """
15009     return {
15010       "instances": self.instances,
15011       "target_groups": self.target_groups,
15012       }
15013
15014   def _BuildInputData(self, fn, keydata):
15015     """Build input data structures.
15016
15017     """
15018     self._ComputeClusterData()
15019
15020     request = fn()
15021     request["type"] = self.mode
15022     for keyname, keytype in keydata:
15023       if keyname not in request:
15024         raise errors.ProgrammerError("Request parameter %s is missing" %
15025                                      keyname)
15026       val = request[keyname]
15027       if not keytype(val):
15028         raise errors.ProgrammerError("Request parameter %s doesn't pass"
15029                                      " validation, value %s, expected"
15030                                      " type %s" % (keyname, val, keytype))
15031     self.in_data["request"] = request
15032
15033     self.in_text = serializer.Dump(self.in_data)
15034
15035   _STRING_LIST = ht.TListOf(ht.TString)
15036   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15037      # pylint: disable=E1101
15038      # Class '...' has no 'OP_ID' member
15039      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15040                           opcodes.OpInstanceMigrate.OP_ID,
15041                           opcodes.OpInstanceReplaceDisks.OP_ID])
15042      })))
15043
15044   _NEVAC_MOVED = \
15045     ht.TListOf(ht.TAnd(ht.TIsLength(3),
15046                        ht.TItems([ht.TNonEmptyString,
15047                                   ht.TNonEmptyString,
15048                                   ht.TListOf(ht.TNonEmptyString),
15049                                  ])))
15050   _NEVAC_FAILED = \
15051     ht.TListOf(ht.TAnd(ht.TIsLength(2),
15052                        ht.TItems([ht.TNonEmptyString,
15053                                   ht.TMaybeString,
15054                                  ])))
15055   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15056                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15057
15058   _MODE_DATA = {
15059     constants.IALLOCATOR_MODE_ALLOC:
15060       (_AddNewInstance,
15061        [
15062         ("name", ht.TString),
15063         ("memory", ht.TInt),
15064         ("spindle_use", ht.TInt),
15065         ("disks", ht.TListOf(ht.TDict)),
15066         ("disk_template", ht.TString),
15067         ("os", ht.TString),
15068         ("tags", _STRING_LIST),
15069         ("nics", ht.TListOf(ht.TDict)),
15070         ("vcpus", ht.TInt),
15071         ("hypervisor", ht.TString),
15072         ], ht.TList),
15073     constants.IALLOCATOR_MODE_RELOC:
15074       (_AddRelocateInstance,
15075        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15076        ht.TList),
15077      constants.IALLOCATOR_MODE_NODE_EVAC:
15078       (_AddNodeEvacuate, [
15079         ("instances", _STRING_LIST),
15080         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15081         ], _NEVAC_RESULT),
15082      constants.IALLOCATOR_MODE_CHG_GROUP:
15083       (_AddChangeGroup, [
15084         ("instances", _STRING_LIST),
15085         ("target_groups", _STRING_LIST),
15086         ], _NEVAC_RESULT),
15087     }
15088
15089   def Run(self, name, validate=True, call_fn=None):
15090     """Run an instance allocator and return the results.
15091
15092     """
15093     if call_fn is None:
15094       call_fn = self.rpc.call_iallocator_runner
15095
15096     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15097     result.Raise("Failure while running the iallocator script")
15098
15099     self.out_text = result.payload
15100     if validate:
15101       self._ValidateResult()
15102
15103   def _ValidateResult(self):
15104     """Process the allocator results.
15105
15106     This will process and if successful save the result in
15107     self.out_data and the other parameters.
15108
15109     """
15110     try:
15111       rdict = serializer.Load(self.out_text)
15112     except Exception, err:
15113       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15114
15115     if not isinstance(rdict, dict):
15116       raise errors.OpExecError("Can't parse iallocator results: not a dict")
15117
15118     # TODO: remove backwards compatiblity in later versions
15119     if "nodes" in rdict and "result" not in rdict:
15120       rdict["result"] = rdict["nodes"]
15121       del rdict["nodes"]
15122
15123     for key in "success", "info", "result":
15124       if key not in rdict:
15125         raise errors.OpExecError("Can't parse iallocator results:"
15126                                  " missing key '%s'" % key)
15127       setattr(self, key, rdict[key])
15128
15129     if not self._result_check(self.result):
15130       raise errors.OpExecError("Iallocator returned invalid result,"
15131                                " expected %s, got %s" %
15132                                (self._result_check, self.result),
15133                                errors.ECODE_INVAL)
15134
15135     if self.mode == constants.IALLOCATOR_MODE_RELOC:
15136       assert self.relocate_from is not None
15137       assert self.required_nodes == 1
15138
15139       node2group = dict((name, ndata["group"])
15140                         for (name, ndata) in self.in_data["nodes"].items())
15141
15142       fn = compat.partial(self._NodesToGroups, node2group,
15143                           self.in_data["nodegroups"])
15144
15145       instance = self.cfg.GetInstanceInfo(self.name)
15146       request_groups = fn(self.relocate_from + [instance.primary_node])
15147       result_groups = fn(rdict["result"] + [instance.primary_node])
15148
15149       if self.success and not set(result_groups).issubset(request_groups):
15150         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15151                                  " differ from original groups (%s)" %
15152                                  (utils.CommaJoin(result_groups),
15153                                   utils.CommaJoin(request_groups)))
15154
15155     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15156       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15157
15158     self.out_data = rdict
15159
15160   @staticmethod
15161   def _NodesToGroups(node2group, groups, nodes):
15162     """Returns a list of unique group names for a list of nodes.
15163
15164     @type node2group: dict
15165     @param node2group: Map from node name to group UUID
15166     @type groups: dict
15167     @param groups: Group information
15168     @type nodes: list
15169     @param nodes: Node names
15170
15171     """
15172     result = set()
15173
15174     for node in nodes:
15175       try:
15176         group_uuid = node2group[node]
15177       except KeyError:
15178         # Ignore unknown node
15179         pass
15180       else:
15181         try:
15182           group = groups[group_uuid]
15183         except KeyError:
15184           # Can't find group, let's use UUID
15185           group_name = group_uuid
15186         else:
15187           group_name = group["name"]
15188
15189         result.add(group_name)
15190
15191     return sorted(result)
15192
15193
15194 class LUTestAllocator(NoHooksLU):
15195   """Run allocator tests.
15196
15197   This LU runs the allocator tests
15198
15199   """
15200   def CheckPrereq(self):
15201     """Check prerequisites.
15202
15203     This checks the opcode parameters depending on the director and mode test.
15204
15205     """
15206     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15207       for attr in ["memory", "disks", "disk_template",
15208                    "os", "tags", "nics", "vcpus"]:
15209         if not hasattr(self.op, attr):
15210           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15211                                      attr, errors.ECODE_INVAL)
15212       iname = self.cfg.ExpandInstanceName(self.op.name)
15213       if iname is not None:
15214         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15215                                    iname, errors.ECODE_EXISTS)
15216       if not isinstance(self.op.nics, list):
15217         raise errors.OpPrereqError("Invalid parameter 'nics'",
15218                                    errors.ECODE_INVAL)
15219       if not isinstance(self.op.disks, list):
15220         raise errors.OpPrereqError("Invalid parameter 'disks'",
15221                                    errors.ECODE_INVAL)
15222       for row in self.op.disks:
15223         if (not isinstance(row, dict) or
15224             constants.IDISK_SIZE not in row or
15225             not isinstance(row[constants.IDISK_SIZE], int) or
15226             constants.IDISK_MODE not in row or
15227             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15228           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15229                                      " parameter", errors.ECODE_INVAL)
15230       if self.op.hypervisor is None:
15231         self.op.hypervisor = self.cfg.GetHypervisorType()
15232     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15233       fname = _ExpandInstanceName(self.cfg, self.op.name)
15234       self.op.name = fname
15235       self.relocate_from = \
15236           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15237     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15238                           constants.IALLOCATOR_MODE_NODE_EVAC):
15239       if not self.op.instances:
15240         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15241       self.op.instances = _GetWantedInstances(self, self.op.instances)
15242     else:
15243       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15244                                  self.op.mode, errors.ECODE_INVAL)
15245
15246     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15247       if self.op.allocator is None:
15248         raise errors.OpPrereqError("Missing allocator name",
15249                                    errors.ECODE_INVAL)
15250     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15251       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15252                                  self.op.direction, errors.ECODE_INVAL)
15253
15254   def Exec(self, feedback_fn):
15255     """Run the allocator test.
15256
15257     """
15258     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15259       ial = IAllocator(self.cfg, self.rpc,
15260                        mode=self.op.mode,
15261                        name=self.op.name,
15262                        memory=self.op.memory,
15263                        disks=self.op.disks,
15264                        disk_template=self.op.disk_template,
15265                        os=self.op.os,
15266                        tags=self.op.tags,
15267                        nics=self.op.nics,
15268                        vcpus=self.op.vcpus,
15269                        hypervisor=self.op.hypervisor,
15270                        )
15271     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15272       ial = IAllocator(self.cfg, self.rpc,
15273                        mode=self.op.mode,
15274                        name=self.op.name,
15275                        relocate_from=list(self.relocate_from),
15276                        )
15277     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15278       ial = IAllocator(self.cfg, self.rpc,
15279                        mode=self.op.mode,
15280                        instances=self.op.instances,
15281                        target_groups=self.op.target_groups)
15282     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15283       ial = IAllocator(self.cfg, self.rpc,
15284                        mode=self.op.mode,
15285                        instances=self.op.instances,
15286                        evac_mode=self.op.evac_mode)
15287     else:
15288       raise errors.ProgrammerError("Uncatched mode %s in"
15289                                    " LUTestAllocator.Exec", self.op.mode)
15290
15291     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15292       result = ial.in_text
15293     else:
15294       ial.Run(self.op.allocator, validate=False)
15295       result = ial.out_text
15296     return result
15297
15298
15299 #: Query type implementations
15300 _QUERY_IMPL = {
15301   constants.QR_CLUSTER: _ClusterQuery,
15302   constants.QR_INSTANCE: _InstanceQuery,
15303   constants.QR_NODE: _NodeQuery,
15304   constants.QR_GROUP: _GroupQuery,
15305   constants.QR_OS: _OsQuery,
15306   constants.QR_EXPORT: _ExportQuery,
15307   }
15308
15309 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15310
15311
15312 def _GetQueryImplementation(name):
15313   """Returns the implemtnation for a query type.
15314
15315   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15316
15317   """
15318   try:
15319     return _QUERY_IMPL[name]
15320   except KeyError:
15321     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15322                                errors.ECODE_INVAL)