code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60 from ganeti import ht
  61 from ganeti import rpc
  62 from ganeti import runtime
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   #: Field to sort by
 497   SORT_FIELD = "name"
 498
 499   def __init__(self, qfilter, fields, use_locking):
 500     """Initializes this class.
 501
 502     """
 503     self.use_locking = use_locking
 504
 505     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 506                              namefield=self.SORT_FIELD)
 507     self.requested_data = self.query.RequestedData()
 508     self.names = self.query.RequestedNames()
 509
 510     # Sort only if no names were requested
 511     self.sort_by_name = not self.names
 512
 513     self.do_locking = None
 514     self.wanted = None
 515
 516   def _GetNames(self, lu, all_names, lock_level):
 517     """Helper function to determine names asked for in the query.
 518
 519     """
 520     if self.do_locking:
 521       names = lu.owned_locks(lock_level)
 522     else:
 523       names = all_names
 524
 525     if self.wanted == locking.ALL_SET:
 526       assert not self.names
 527       # caller didn't specify names, so ordering is not important
 528       return utils.NiceSort(names)
 529
 530     # caller specified names and we must keep the same order
 531     assert self.names
 532     assert not self.do_locking or lu.glm.is_owned(lock_level)
 533
 534     missing = set(self.wanted).difference(names)
 535     if missing:
 536       raise errors.OpExecError("Some items were removed before retrieving"
 537                                " their data: %s" % missing)
 538
 539     # Return expanded names
 540     return self.wanted
 541
 542   def ExpandNames(self, lu):
 543     """Expand names for this query.
 544
 545     See L{LogicalUnit.ExpandNames}.
 546
 547     """
 548     raise NotImplementedError()
 549
 550   def DeclareLocks(self, lu, level):
 551     """Declare locks for this query.
 552
 553     See L{LogicalUnit.DeclareLocks}.
 554
 555     """
 556     raise NotImplementedError()
 557
 558   def _GetQueryData(self, lu):
 559     """Collects all data for this query.
 560
 561     @return: Query data object
 562
 563     """
 564     raise NotImplementedError()
 565
 566   def NewStyleQuery(self, lu):
 567     """Collect data and execute query.
 568
 569     """
 570     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 571                                   sort_by_name=self.sort_by_name)
 572
 573   def OldStyleQuery(self, lu):
 574     """Collect data and execute query.
 575
 576     """
 577     return self.query.OldStyleQuery(self._GetQueryData(lu),
 578                                     sort_by_name=self.sort_by_name)
 579
 580
 581 def _ShareAll():
 582   """Returns a dict declaring all lock levels shared.
 583
 584   """
 585   return dict.fromkeys(locking.LEVELS, 1)
 586
 587
 588 def _MakeLegacyNodeInfo(data):
 589   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 590
 591   Converts the data into a single dictionary. This is fine for most use cases,
 592   but some require information from more than one volume group or hypervisor.
 593
 594   """
 595   (bootid, (vg_info, ), (hv_info, )) = data
 596
 597   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 598     "bootid": bootid,
 599     })
 600
 601
 602 def _AnnotateDiskParams(instance, devs, cfg):
 603   """Little helper wrapper to the rpc annotation method.
 604
 605   @param instance: The instance object
 606   @type devs: List of L{objects.Disk}
 607   @param devs: The root devices (not any of its children!)
 608   @param cfg: The config object
 609   @returns The annotated disk copies
 610   @see L{rpc.AnnotateDiskParams}
 611
 612   """
 613   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 614                                 cfg.GetInstanceDiskParams(instance))
 615
 616
 617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 618                               cur_group_uuid):
 619   """Checks if node groups for locked instances are still correct.
 620
 621   @type cfg: L{config.ConfigWriter}
 622   @param cfg: Cluster configuration
 623   @type instances: dict; string as key, L{objects.Instance} as value
 624   @param instances: Dictionary, instance name as key, instance object as value
 625   @type owned_groups: iterable of string
 626   @param owned_groups: List of owned groups
 627   @type owned_nodes: iterable of string
 628   @param owned_nodes: List of owned nodes
 629   @type cur_group_uuid: string or None
 630   @param cur_group_uuid: Optional group UUID to check against instance's groups
 631
 632   """
 633   for (name, inst) in instances.items():
 634     assert owned_nodes.issuperset(inst.all_nodes), \
 635       "Instance %s's nodes changed while we kept the lock" % name
 636
 637     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 638
 639     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 640       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 641
 642
 643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 644   """Checks if the owned node groups are still correct for an instance.
 645
 646   @type cfg: L{config.ConfigWriter}
 647   @param cfg: The cluster configuration
 648   @type instance_name: string
 649   @param instance_name: Instance name
 650   @type owned_groups: set or frozenset
 651   @param owned_groups: List of currently owned node groups
 652
 653   """
 654   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 655
 656   if not owned_groups.issuperset(inst_groups):
 657     raise errors.OpPrereqError("Instance %s's node groups changed since"
 658                                " locks were acquired, current groups are"
 659                                " are '%s', owning groups '%s'; retry the"
 660                                " operation" %
 661                                (instance_name,
 662                                 utils.CommaJoin(inst_groups),
 663                                 utils.CommaJoin(owned_groups)),
 664                                errors.ECODE_STATE)
 665
 666   return inst_groups
 667
 668
 669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 670   """Checks if the instances in a node group are still correct.
 671
 672   @type cfg: L{config.ConfigWriter}
 673   @param cfg: The cluster configuration
 674   @type group_uuid: string
 675   @param group_uuid: Node group UUID
 676   @type owned_instances: set or frozenset
 677   @param owned_instances: List of currently owned instances
 678
 679   """
 680   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 681   if owned_instances != wanted_instances:
 682     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 683                                " locks were acquired, wanted '%s', have '%s';"
 684                                " retry the operation" %
 685                                (group_uuid,
 686                                 utils.CommaJoin(wanted_instances),
 687                                 utils.CommaJoin(owned_instances)),
 688                                errors.ECODE_STATE)
 689
 690   return wanted_instances
 691
 692
 693 def _SupportsOob(cfg, node):
 694   """Tells if node supports OOB.
 695
 696   @type cfg: L{config.ConfigWriter}
 697   @param cfg: The cluster configuration
 698   @type node: L{objects.Node}
 699   @param node: The node
 700   @return: The OOB script if supported or an empty string otherwise
 701
 702   """
 703   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 704
 705
 706 def _GetWantedNodes(lu, nodes):
 707   """Returns list of checked and expanded node names.
 708
 709   @type lu: L{LogicalUnit}
 710   @param lu: the logical unit on whose behalf we execute
 711   @type nodes: list
 712   @param nodes: list of node names or None for all nodes
 713   @rtype: list
 714   @return: the list of nodes, sorted
 715   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 716
 717   """
 718   if nodes:
 719     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 720
 721   return utils.NiceSort(lu.cfg.GetNodeList())
 722
 723
 724 def _GetWantedInstances(lu, instances):
 725   """Returns list of checked and expanded instance names.
 726
 727   @type lu: L{LogicalUnit}
 728   @param lu: the logical unit on whose behalf we execute
 729   @type instances: list
 730   @param instances: list of instance names or None for all instances
 731   @rtype: list
 732   @return: the list of instances, sorted
 733   @raise errors.OpPrereqError: if the instances parameter is wrong type
 734   @raise errors.OpPrereqError: if any of the passed instances is not found
 735
 736   """
 737   if instances:
 738     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 739   else:
 740     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 741   return wanted
 742
 743
 744 def _GetUpdatedParams(old_params, update_dict,
 745                       use_default=True, use_none=False):
 746   """Return the new version of a parameter dictionary.
 747
 748   @type old_params: dict
 749   @param old_params: old parameters
 750   @type update_dict: dict
 751   @param update_dict: dict containing new parameter values, or
 752       constants.VALUE_DEFAULT to reset the parameter to its default
 753       value
 754   @param use_default: boolean
 755   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 756       values as 'to be deleted' values
 757   @param use_none: boolean
 758   @type use_none: whether to recognise C{None} values as 'to be
 759       deleted' values
 760   @rtype: dict
 761   @return: the new parameter dictionary
 762
 763   """
 764   params_copy = copy.deepcopy(old_params)
 765   for key, val in update_dict.iteritems():
 766     if ((use_default and val == constants.VALUE_DEFAULT) or
 767         (use_none and val is None)):
 768       try:
 769         del params_copy[key]
 770       except KeyError:
 771         pass
 772     else:
 773       params_copy[key] = val
 774   return params_copy
 775
 776
 777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 778   """Return the new version of a instance policy.
 779
 780   @param group_policy: whether this policy applies to a group and thus
 781     we should support removal of policy entries
 782
 783   """
 784   use_none = use_default = group_policy
 785   ipolicy = copy.deepcopy(old_ipolicy)
 786   for key, value in new_ipolicy.items():
 787     if key not in constants.IPOLICY_ALL_KEYS:
 788       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 789                                  errors.ECODE_INVAL)
 790     if key in constants.IPOLICY_ISPECS:
 791       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 792       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 793                                        use_none=use_none,
 794                                        use_default=use_default)
 795     else:
 796       if (not value or value == [constants.VALUE_DEFAULT] or
 797           value == constants.VALUE_DEFAULT):
 798         if group_policy:
 799           del ipolicy[key]
 800         else:
 801           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 802                                      " on the cluster'" % key,
 803                                      errors.ECODE_INVAL)
 804       else:
 805         if key in constants.IPOLICY_PARAMETERS:
 806           # FIXME: we assume all such values are float
 807           try:
 808             ipolicy[key] = float(value)
 809           except (TypeError, ValueError), err:
 810             raise errors.OpPrereqError("Invalid value for attribute"
 811                                        " '%s': '%s', error: %s" %
 812                                        (key, value, err), errors.ECODE_INVAL)
 813         else:
 814           # FIXME: we assume all others are lists; this should be redone
 815           # in a nicer way
 816           ipolicy[key] = list(value)
 817   try:
 818     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 819   except errors.ConfigurationError, err:
 820     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 821                                errors.ECODE_INVAL)
 822   return ipolicy
 823
 824
 825 def _UpdateAndVerifySubDict(base, updates, type_check):
 826   """Updates and verifies a dict with sub dicts of the same type.
 827
 828   @param base: The dict with the old data
 829   @param updates: The dict with the new data
 830   @param type_check: Dict suitable to ForceDictType to verify correct types
 831   @returns: A new dict with updated and verified values
 832
 833   """
 834   def fn(old, value):
 835     new = _GetUpdatedParams(old, value)
 836     utils.ForceDictType(new, type_check)
 837     return new
 838
 839   ret = copy.deepcopy(base)
 840   ret.update(dict((key, fn(base.get(key, {}), value))
 841                   for key, value in updates.items()))
 842   return ret
 843
 844
 845 def _MergeAndVerifyHvState(op_input, obj_input):
 846   """Combines the hv state from an opcode with the one of the object
 847
 848   @param op_input: The input dict from the opcode
 849   @param obj_input: The input dict from the objects
 850   @return: The verified and updated dict
 851
 852   """
 853   if op_input:
 854     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 855     if invalid_hvs:
 856       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 857                                  " %s" % utils.CommaJoin(invalid_hvs),
 858                                  errors.ECODE_INVAL)
 859     if obj_input is None:
 860       obj_input = {}
 861     type_check = constants.HVSTS_PARAMETER_TYPES
 862     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 863
 864   return None
 865
 866
 867 def _MergeAndVerifyDiskState(op_input, obj_input):
 868   """Combines the disk state from an opcode with the one of the object
 869
 870   @param op_input: The input dict from the opcode
 871   @param obj_input: The input dict from the objects
 872   @return: The verified and updated dict
 873   """
 874   if op_input:
 875     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 876     if invalid_dst:
 877       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 878                                  utils.CommaJoin(invalid_dst),
 879                                  errors.ECODE_INVAL)
 880     type_check = constants.DSS_PARAMETER_TYPES
 881     if obj_input is None:
 882       obj_input = {}
 883     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 884                                               type_check))
 885                 for key, value in op_input.items())
 886
 887   return None
 888
 889
 890 def _ReleaseLocks(lu, level, names=None, keep=None):
 891   """Releases locks owned by an LU.
 892
 893   @type lu: L{LogicalUnit}
 894   @param level: Lock level
 895   @type names: list or None
 896   @param names: Names of locks to release
 897   @type keep: list or None
 898   @param keep: Names of locks to retain
 899
 900   """
 901   assert not (keep is not None and names is not None), \
 902          "Only one of the 'names' and the 'keep' parameters can be given"
 903
 904   if names is not None:
 905     should_release = names.__contains__
 906   elif keep:
 907     should_release = lambda name: name not in keep
 908   else:
 909     should_release = None
 910
 911   owned = lu.owned_locks(level)
 912   if not owned:
 913     # Not owning any lock at this level, do nothing
 914     pass
 915
 916   elif should_release:
 917     retain = []
 918     release = []
 919
 920     # Determine which locks to release
 921     for name in owned:
 922       if should_release(name):
 923         release.append(name)
 924       else:
 925         retain.append(name)
 926
 927     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 928
 929     # Release just some locks
 930     lu.glm.release(level, names=release)
 931
 932     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 933   else:
 934     # Release everything
 935     lu.glm.release(level)
 936
 937     assert not lu.glm.is_owned(level), "No locks should be owned"
 938
 939
 940 def _MapInstanceDisksToNodes(instances):
 941   """Creates a map from (node, volume) to instance name.
 942
 943   @type instances: list of L{objects.Instance}
 944   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 945
 946   """
 947   return dict(((node, vol), inst.name)
 948               for inst in instances
 949               for (node, vols) in inst.MapLVsByNode().items()
 950               for vol in vols)
 951
 952
 953 def _RunPostHook(lu, node_name):
 954   """Runs the post-hook for an opcode on a single node.
 955
 956   """
 957   hm = lu.proc.BuildHooksManager(lu)
 958   try:
 959     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 960   except:
 961     # pylint: disable=W0702
 962     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 963
 964
 965 def _CheckOutputFields(static, dynamic, selected):
 966   """Checks whether all selected fields are valid.
 967
 968   @type static: L{utils.FieldSet}
 969   @param static: static fields set
 970   @type dynamic: L{utils.FieldSet}
 971   @param dynamic: dynamic fields set
 972
 973   """
 974   f = utils.FieldSet()
 975   f.Extend(static)
 976   f.Extend(dynamic)
 977
 978   delta = f.NonMatching(selected)
 979   if delta:
 980     raise errors.OpPrereqError("Unknown output fields selected: %s"
 981                                % ",".join(delta), errors.ECODE_INVAL)
 982
 983
 984 def _CheckGlobalHvParams(params):
 985   """Validates that given hypervisor params are not global ones.
 986
 987   This will ensure that instances don't get customised versions of
 988   global params.
 989
 990   """
 991   used_globals = constants.HVC_GLOBALS.intersection(params)
 992   if used_globals:
 993     msg = ("The following hypervisor parameters are global and cannot"
 994            " be customized at instance level, please modify them at"
 995            " cluster level: %s" % utils.CommaJoin(used_globals))
 996     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 997
 998
 999 def _CheckNodeOnline(lu, node, msg=None):
1000   """Ensure that a given node is online.
1001
1002   @param lu: the LU on behalf of which we make the check
1003   @param node: the node to check
1004   @param msg: if passed, should be a message to replace the default one
1005   @raise errors.OpPrereqError: if the node is offline
1006
1007   """
1008   if msg is None:
1009     msg = "Can't use offline node"
1010   if lu.cfg.GetNodeInfo(node).offline:
1011     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1012
1013
1014 def _CheckNodeNotDrained(lu, node):
1015   """Ensure that a given node is not drained.
1016
1017   @param lu: the LU on behalf of which we make the check
1018   @param node: the node to check
1019   @raise errors.OpPrereqError: if the node is drained
1020
1021   """
1022   if lu.cfg.GetNodeInfo(node).drained:
1023     raise errors.OpPrereqError("Can't use drained node %s" % node,
1024                                errors.ECODE_STATE)
1025
1026
1027 def _CheckNodeVmCapable(lu, node):
1028   """Ensure that a given node is vm capable.
1029
1030   @param lu: the LU on behalf of which we make the check
1031   @param node: the node to check
1032   @raise errors.OpPrereqError: if the node is not vm capable
1033
1034   """
1035   if not lu.cfg.GetNodeInfo(node).vm_capable:
1036     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1037                                errors.ECODE_STATE)
1038
1039
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041   """Ensure that a node supports a given OS.
1042
1043   @param lu: the LU on behalf of which we make the check
1044   @param node: the node to check
1045   @param os_name: the OS to query about
1046   @param force_variant: whether to ignore variant errors
1047   @raise errors.OpPrereqError: if the node is not supporting the OS
1048
1049   """
1050   result = lu.rpc.call_os_get(node, os_name)
1051   result.Raise("OS '%s' not in supported OS list for node %s" %
1052                (os_name, node),
1053                prereq=True, ecode=errors.ECODE_INVAL)
1054   if not force_variant:
1055     _CheckOSVariant(result.payload, os_name)
1056
1057
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059   """Ensure that a node has the given secondary ip.
1060
1061   @type lu: L{LogicalUnit}
1062   @param lu: the LU on behalf of which we make the check
1063   @type node: string
1064   @param node: the node to check
1065   @type secondary_ip: string
1066   @param secondary_ip: the ip to check
1067   @type prereq: boolean
1068   @param prereq: whether to throw a prerequisite or an execute error
1069   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1071
1072   """
1073   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074   result.Raise("Failure checking secondary ip on node %s" % node,
1075                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076   if not result.payload:
1077     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078            " please fix and re-run this command" % secondary_ip)
1079     if prereq:
1080       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1081     else:
1082       raise errors.OpExecError(msg)
1083
1084
1085 def _GetClusterDomainSecret():
1086   """Reads the cluster domain secret.
1087
1088   """
1089   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1090                                strict=True)
1091
1092
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094   """Ensure that an instance is in one of the required states.
1095
1096   @param lu: the LU on behalf of which we make the check
1097   @param instance: the instance to check
1098   @param msg: if passed, should be a message to replace the default one
1099   @raise errors.OpPrereqError: if the instance is not in the required state
1100
1101   """
1102   if msg is None:
1103     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1104   if instance.admin_state not in req_states:
1105     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1106                                (instance.name, instance.admin_state, msg),
1107                                errors.ECODE_STATE)
1108
1109   if constants.ADMINST_UP not in req_states:
1110     pnode = instance.primary_node
1111     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113                 prereq=True, ecode=errors.ECODE_ENVIRON)
1114
1115     if instance.name in ins_l.payload:
1116       raise errors.OpPrereqError("Instance %s is running, %s" %
1117                                  (instance.name, msg), errors.ECODE_STATE)
1118
1119
1120 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1121   """Computes if value is in the desired range.
1122
1123   @param name: name of the parameter for which we perform the check
1124   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1125       not just 'disk')
1126   @param ipolicy: dictionary containing min, max and std values
1127   @param value: actual value that we want to use
1128   @return: None or element not meeting the criteria
1129
1130
1131   """
1132   if value in [None, constants.VALUE_AUTO]:
1133     return None
1134   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1135   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1136   if value > max_v or min_v > value:
1137     if qualifier:
1138       fqn = "%s/%s" % (name, qualifier)
1139     else:
1140       fqn = name
1141     return ("%s value %s is not in range [%s, %s]" %
1142             (fqn, value, min_v, max_v))
1143   return None
1144
1145
1146 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1147                                  nic_count, disk_sizes, spindle_use,
1148                                  _compute_fn=_ComputeMinMaxSpec):
1149   """Verifies ipolicy against provided specs.
1150
1151   @type ipolicy: dict
1152   @param ipolicy: The ipolicy
1153   @type mem_size: int
1154   @param mem_size: The memory size
1155   @type cpu_count: int
1156   @param cpu_count: Used cpu cores
1157   @type disk_count: int
1158   @param disk_count: Number of disks used
1159   @type nic_count: int
1160   @param nic_count: Number of nics used
1161   @type disk_sizes: list of ints
1162   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1163   @type spindle_use: int
1164   @param spindle_use: The number of spindles this instance uses
1165   @param _compute_fn: The compute function (unittest only)
1166   @return: A list of violations, or an empty list of no violations are found
1167
1168   """
1169   assert disk_count == len(disk_sizes)
1170
1171   test_settings = [
1172     (constants.ISPEC_MEM_SIZE, "", mem_size),
1173     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1174     (constants.ISPEC_DISK_COUNT, "", disk_count),
1175     (constants.ISPEC_NIC_COUNT, "", nic_count),
1176     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1177     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1178          for idx, d in enumerate(disk_sizes)]
1179
1180   return filter(None,
1181                 (_compute_fn(name, qualifier, ipolicy, value)
1182                  for (name, qualifier, value) in test_settings))
1183
1184
1185 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1186                                      _compute_fn=_ComputeIPolicySpecViolation):
1187   """Compute if instance meets the specs of ipolicy.
1188
1189   @type ipolicy: dict
1190   @param ipolicy: The ipolicy to verify against
1191   @type instance: L{objects.Instance}
1192   @param instance: The instance to verify
1193   @param _compute_fn: The function to verify ipolicy (unittest only)
1194   @see: L{_ComputeIPolicySpecViolation}
1195
1196   """
1197   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1198   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1199   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1200   disk_count = len(instance.disks)
1201   disk_sizes = [disk.size for disk in instance.disks]
1202   nic_count = len(instance.nics)
1203
1204   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1205                      disk_sizes, spindle_use)
1206
1207
1208 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1209     _compute_fn=_ComputeIPolicySpecViolation):
1210   """Compute if instance specs meets the specs of ipolicy.
1211
1212   @type ipolicy: dict
1213   @param ipolicy: The ipolicy to verify against
1214   @param instance_spec: dict
1215   @param instance_spec: The instance spec to verify
1216   @param _compute_fn: The function to verify ipolicy (unittest only)
1217   @see: L{_ComputeIPolicySpecViolation}
1218
1219   """
1220   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1221   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1222   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1223   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1224   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1225   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1226
1227   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1228                      disk_sizes, spindle_use)
1229
1230
1231 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1232                                  target_group,
1233                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1234   """Compute if instance meets the specs of the new target group.
1235
1236   @param ipolicy: The ipolicy to verify
1237   @param instance: The instance object to verify
1238   @param current_group: The current group of the instance
1239   @param target_group: The new group of the instance
1240   @param _compute_fn: The function to verify ipolicy (unittest only)
1241   @see: L{_ComputeIPolicySpecViolation}
1242
1243   """
1244   if current_group == target_group:
1245     return []
1246   else:
1247     return _compute_fn(ipolicy, instance)
1248
1249
1250 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1251                             _compute_fn=_ComputeIPolicyNodeViolation):
1252   """Checks that the target node is correct in terms of instance policy.
1253
1254   @param ipolicy: The ipolicy to verify
1255   @param instance: The instance object to verify
1256   @param node: The new node to relocate
1257   @param ignore: Ignore violations of the ipolicy
1258   @param _compute_fn: The function to verify ipolicy (unittest only)
1259   @see: L{_ComputeIPolicySpecViolation}
1260
1261   """
1262   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1263   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1264
1265   if res:
1266     msg = ("Instance does not meet target node group's (%s) instance"
1267            " policy: %s") % (node.group, utils.CommaJoin(res))
1268     if ignore:
1269       lu.LogWarning(msg)
1270     else:
1271       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1272
1273
1274 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1275   """Computes a set of any instances that would violate the new ipolicy.
1276
1277   @param old_ipolicy: The current (still in-place) ipolicy
1278   @param new_ipolicy: The new (to become) ipolicy
1279   @param instances: List of instances to verify
1280   @return: A list of instances which violates the new ipolicy but
1281       did not before
1282
1283   """
1284   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1285           _ComputeViolatingInstances(old_ipolicy, instances))
1286
1287
1288 def _ExpandItemName(fn, name, kind):
1289   """Expand an item name.
1290
1291   @param fn: the function to use for expansion
1292   @param name: requested item name
1293   @param kind: text description ('Node' or 'Instance')
1294   @return: the resolved (full) name
1295   @raise errors.OpPrereqError: if the item is not found
1296
1297   """
1298   full_name = fn(name)
1299   if full_name is None:
1300     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1301                                errors.ECODE_NOENT)
1302   return full_name
1303
1304
1305 def _ExpandNodeName(cfg, name):
1306   """Wrapper over L{_ExpandItemName} for nodes."""
1307   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1308
1309
1310 def _ExpandInstanceName(cfg, name):
1311   """Wrapper over L{_ExpandItemName} for instance."""
1312   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1313
1314
1315 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1316                           minmem, maxmem, vcpus, nics, disk_template, disks,
1317                           bep, hvp, hypervisor_name, tags):
1318   """Builds instance related env variables for hooks
1319
1320   This builds the hook environment from individual variables.
1321
1322   @type name: string
1323   @param name: the name of the instance
1324   @type primary_node: string
1325   @param primary_node: the name of the instance's primary node
1326   @type secondary_nodes: list
1327   @param secondary_nodes: list of secondary nodes as strings
1328   @type os_type: string
1329   @param os_type: the name of the instance's OS
1330   @type status: string
1331   @param status: the desired status of the instance
1332   @type minmem: string
1333   @param minmem: the minimum memory size of the instance
1334   @type maxmem: string
1335   @param maxmem: the maximum memory size of the instance
1336   @type vcpus: string
1337   @param vcpus: the count of VCPUs the instance has
1338   @type nics: list
1339   @param nics: list of tuples (ip, mac, mode, link) representing
1340       the NICs the instance has
1341   @type disk_template: string
1342   @param disk_template: the disk template of the instance
1343   @type disks: list
1344   @param disks: the list of (size, mode) pairs
1345   @type bep: dict
1346   @param bep: the backend parameters for the instance
1347   @type hvp: dict
1348   @param hvp: the hypervisor parameters for the instance
1349   @type hypervisor_name: string
1350   @param hypervisor_name: the hypervisor for the instance
1351   @type tags: list
1352   @param tags: list of instance tags as strings
1353   @rtype: dict
1354   @return: the hook environment for this instance
1355
1356   """
1357   env = {
1358     "OP_TARGET": name,
1359     "INSTANCE_NAME": name,
1360     "INSTANCE_PRIMARY": primary_node,
1361     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1362     "INSTANCE_OS_TYPE": os_type,
1363     "INSTANCE_STATUS": status,
1364     "INSTANCE_MINMEM": minmem,
1365     "INSTANCE_MAXMEM": maxmem,
1366     # TODO(2.7) remove deprecated "memory" value
1367     "INSTANCE_MEMORY": maxmem,
1368     "INSTANCE_VCPUS": vcpus,
1369     "INSTANCE_DISK_TEMPLATE": disk_template,
1370     "INSTANCE_HYPERVISOR": hypervisor_name,
1371   }
1372   if nics:
1373     nic_count = len(nics)
1374     for idx, (ip, mac, mode, link) in enumerate(nics):
1375       if ip is None:
1376         ip = ""
1377       env["INSTANCE_NIC%d_IP" % idx] = ip
1378       env["INSTANCE_NIC%d_MAC" % idx] = mac
1379       env["INSTANCE_NIC%d_MODE" % idx] = mode
1380       env["INSTANCE_NIC%d_LINK" % idx] = link
1381       if mode == constants.NIC_MODE_BRIDGED:
1382         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1383   else:
1384     nic_count = 0
1385
1386   env["INSTANCE_NIC_COUNT"] = nic_count
1387
1388   if disks:
1389     disk_count = len(disks)
1390     for idx, (size, mode) in enumerate(disks):
1391       env["INSTANCE_DISK%d_SIZE" % idx] = size
1392       env["INSTANCE_DISK%d_MODE" % idx] = mode
1393   else:
1394     disk_count = 0
1395
1396   env["INSTANCE_DISK_COUNT"] = disk_count
1397
1398   if not tags:
1399     tags = []
1400
1401   env["INSTANCE_TAGS"] = " ".join(tags)
1402
1403   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1404     for key, value in source.items():
1405       env["INSTANCE_%s_%s" % (kind, key)] = value
1406
1407   return env
1408
1409
1410 def _NICListToTuple(lu, nics):
1411   """Build a list of nic information tuples.
1412
1413   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1414   value in LUInstanceQueryData.
1415
1416   @type lu:  L{LogicalUnit}
1417   @param lu: the logical unit on whose behalf we execute
1418   @type nics: list of L{objects.NIC}
1419   @param nics: list of nics to convert to hooks tuples
1420
1421   """
1422   hooks_nics = []
1423   cluster = lu.cfg.GetClusterInfo()
1424   for nic in nics:
1425     ip = nic.ip
1426     mac = nic.mac
1427     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1428     mode = filled_params[constants.NIC_MODE]
1429     link = filled_params[constants.NIC_LINK]
1430     hooks_nics.append((ip, mac, mode, link))
1431   return hooks_nics
1432
1433
1434 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1435   """Builds instance related env variables for hooks from an object.
1436
1437   @type lu: L{LogicalUnit}
1438   @param lu: the logical unit on whose behalf we execute
1439   @type instance: L{objects.Instance}
1440   @param instance: the instance for which we should build the
1441       environment
1442   @type override: dict
1443   @param override: dictionary with key/values that will override
1444       our values
1445   @rtype: dict
1446   @return: the hook environment dictionary
1447
1448   """
1449   cluster = lu.cfg.GetClusterInfo()
1450   bep = cluster.FillBE(instance)
1451   hvp = cluster.FillHV(instance)
1452   args = {
1453     "name": instance.name,
1454     "primary_node": instance.primary_node,
1455     "secondary_nodes": instance.secondary_nodes,
1456     "os_type": instance.os,
1457     "status": instance.admin_state,
1458     "maxmem": bep[constants.BE_MAXMEM],
1459     "minmem": bep[constants.BE_MINMEM],
1460     "vcpus": bep[constants.BE_VCPUS],
1461     "nics": _NICListToTuple(lu, instance.nics),
1462     "disk_template": instance.disk_template,
1463     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1464     "bep": bep,
1465     "hvp": hvp,
1466     "hypervisor_name": instance.hypervisor,
1467     "tags": instance.tags,
1468   }
1469   if override:
1470     args.update(override)
1471   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1472
1473
1474 def _AdjustCandidatePool(lu, exceptions):
1475   """Adjust the candidate pool after node operations.
1476
1477   """
1478   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1479   if mod_list:
1480     lu.LogInfo("Promoted nodes to master candidate role: %s",
1481                utils.CommaJoin(node.name for node in mod_list))
1482     for name in mod_list:
1483       lu.context.ReaddNode(name)
1484   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1485   if mc_now > mc_max:
1486     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1487                (mc_now, mc_max))
1488
1489
1490 def _DecideSelfPromotion(lu, exceptions=None):
1491   """Decide whether I should promote myself as a master candidate.
1492
1493   """
1494   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1495   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1496   # the new node will increase mc_max with one, so:
1497   mc_should = min(mc_should + 1, cp_size)
1498   return mc_now < mc_should
1499
1500
1501 def _CalculateGroupIPolicy(cluster, group):
1502   """Calculate instance policy for group.
1503
1504   """
1505   return cluster.SimpleFillIPolicy(group.ipolicy)
1506
1507
1508 def _ComputeViolatingInstances(ipolicy, instances):
1509   """Computes a set of instances who violates given ipolicy.
1510
1511   @param ipolicy: The ipolicy to verify
1512   @type instances: object.Instance
1513   @param instances: List of instances to verify
1514   @return: A frozenset of instance names violating the ipolicy
1515
1516   """
1517   return frozenset([inst.name for inst in instances
1518                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1519
1520
1521 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1522   """Check that the brigdes needed by a list of nics exist.
1523
1524   """
1525   cluster = lu.cfg.GetClusterInfo()
1526   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1527   brlist = [params[constants.NIC_LINK] for params in paramslist
1528             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1529   if brlist:
1530     result = lu.rpc.call_bridges_exist(target_node, brlist)
1531     result.Raise("Error checking bridges on destination node '%s'" %
1532                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1533
1534
1535 def _CheckInstanceBridgesExist(lu, instance, node=None):
1536   """Check that the brigdes needed by an instance exist.
1537
1538   """
1539   if node is None:
1540     node = instance.primary_node
1541   _CheckNicsBridgesExist(lu, instance.nics, node)
1542
1543
1544 def _CheckOSVariant(os_obj, name):
1545   """Check whether an OS name conforms to the os variants specification.
1546
1547   @type os_obj: L{objects.OS}
1548   @param os_obj: OS object to check
1549   @type name: string
1550   @param name: OS name passed by the user, to check for validity
1551
1552   """
1553   variant = objects.OS.GetVariant(name)
1554   if not os_obj.supported_variants:
1555     if variant:
1556       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1557                                  " passed)" % (os_obj.name, variant),
1558                                  errors.ECODE_INVAL)
1559     return
1560   if not variant:
1561     raise errors.OpPrereqError("OS name must include a variant",
1562                                errors.ECODE_INVAL)
1563
1564   if variant not in os_obj.supported_variants:
1565     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1566
1567
1568 def _GetNodeInstancesInner(cfg, fn):
1569   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1570
1571
1572 def _GetNodeInstances(cfg, node_name):
1573   """Returns a list of all primary and secondary instances on a node.
1574
1575   """
1576
1577   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1578
1579
1580 def _GetNodePrimaryInstances(cfg, node_name):
1581   """Returns primary instances on a node.
1582
1583   """
1584   return _GetNodeInstancesInner(cfg,
1585                                 lambda inst: node_name == inst.primary_node)
1586
1587
1588 def _GetNodeSecondaryInstances(cfg, node_name):
1589   """Returns secondary instances on a node.
1590
1591   """
1592   return _GetNodeInstancesInner(cfg,
1593                                 lambda inst: node_name in inst.secondary_nodes)
1594
1595
1596 def _GetStorageTypeArgs(cfg, storage_type):
1597   """Returns the arguments for a storage type.
1598
1599   """
1600   # Special case for file storage
1601   if storage_type == constants.ST_FILE:
1602     # storage.FileStorage wants a list of storage directories
1603     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1604
1605   return []
1606
1607
1608 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1609   faulty = []
1610
1611   for dev in instance.disks:
1612     cfg.SetDiskID(dev, node_name)
1613
1614   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1615                                                                 instance))
1616   result.Raise("Failed to get disk status from node %s" % node_name,
1617                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1618
1619   for idx, bdev_status in enumerate(result.payload):
1620     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1621       faulty.append(idx)
1622
1623   return faulty
1624
1625
1626 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1627   """Check the sanity of iallocator and node arguments and use the
1628   cluster-wide iallocator if appropriate.
1629
1630   Check that at most one of (iallocator, node) is specified. If none is
1631   specified, then the LU's opcode's iallocator slot is filled with the
1632   cluster-wide default iallocator.
1633
1634   @type iallocator_slot: string
1635   @param iallocator_slot: the name of the opcode iallocator slot
1636   @type node_slot: string
1637   @param node_slot: the name of the opcode target node slot
1638
1639   """
1640   node = getattr(lu.op, node_slot, None)
1641   iallocator = getattr(lu.op, iallocator_slot, None)
1642
1643   if node is not None and iallocator is not None:
1644     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1645                                errors.ECODE_INVAL)
1646   elif node is None and iallocator is None:
1647     default_iallocator = lu.cfg.GetDefaultIAllocator()
1648     if default_iallocator:
1649       setattr(lu.op, iallocator_slot, default_iallocator)
1650     else:
1651       raise errors.OpPrereqError("No iallocator or node given and no"
1652                                  " cluster-wide default iallocator found;"
1653                                  " please specify either an iallocator or a"
1654                                  " node, or set a cluster-wide default"
1655                                  " iallocator")
1656
1657
1658 def _GetDefaultIAllocator(cfg, iallocator):
1659   """Decides on which iallocator to use.
1660
1661   @type cfg: L{config.ConfigWriter}
1662   @param cfg: Cluster configuration object
1663   @type iallocator: string or None
1664   @param iallocator: Iallocator specified in opcode
1665   @rtype: string
1666   @return: Iallocator name
1667
1668   """
1669   if not iallocator:
1670     # Use default iallocator
1671     iallocator = cfg.GetDefaultIAllocator()
1672
1673   if not iallocator:
1674     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1675                                " opcode nor as a cluster-wide default",
1676                                errors.ECODE_INVAL)
1677
1678   return iallocator
1679
1680
1681 class LUClusterPostInit(LogicalUnit):
1682   """Logical unit for running hooks after cluster initialization.
1683
1684   """
1685   HPATH = "cluster-init"
1686   HTYPE = constants.HTYPE_CLUSTER
1687
1688   def BuildHooksEnv(self):
1689     """Build hooks env.
1690
1691     """
1692     return {
1693       "OP_TARGET": self.cfg.GetClusterName(),
1694       }
1695
1696   def BuildHooksNodes(self):
1697     """Build hooks nodes.
1698
1699     """
1700     return ([], [self.cfg.GetMasterNode()])
1701
1702   def Exec(self, feedback_fn):
1703     """Nothing to do.
1704
1705     """
1706     return True
1707
1708
1709 class LUClusterDestroy(LogicalUnit):
1710   """Logical unit for destroying the cluster.
1711
1712   """
1713   HPATH = "cluster-destroy"
1714   HTYPE = constants.HTYPE_CLUSTER
1715
1716   def BuildHooksEnv(self):
1717     """Build hooks env.
1718
1719     """
1720     return {
1721       "OP_TARGET": self.cfg.GetClusterName(),
1722       }
1723
1724   def BuildHooksNodes(self):
1725     """Build hooks nodes.
1726
1727     """
1728     return ([], [])
1729
1730   def CheckPrereq(self):
1731     """Check prerequisites.
1732
1733     This checks whether the cluster is empty.
1734
1735     Any errors are signaled by raising errors.OpPrereqError.
1736
1737     """
1738     master = self.cfg.GetMasterNode()
1739
1740     nodelist = self.cfg.GetNodeList()
1741     if len(nodelist) != 1 or nodelist[0] != master:
1742       raise errors.OpPrereqError("There are still %d node(s) in"
1743                                  " this cluster." % (len(nodelist) - 1),
1744                                  errors.ECODE_INVAL)
1745     instancelist = self.cfg.GetInstanceList()
1746     if instancelist:
1747       raise errors.OpPrereqError("There are still %d instance(s) in"
1748                                  " this cluster." % len(instancelist),
1749                                  errors.ECODE_INVAL)
1750
1751   def Exec(self, feedback_fn):
1752     """Destroys the cluster.
1753
1754     """
1755     master_params = self.cfg.GetMasterNetworkParameters()
1756
1757     # Run post hooks on master node before it's removed
1758     _RunPostHook(self, master_params.name)
1759
1760     ems = self.cfg.GetUseExternalMipScript()
1761     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1762                                                      master_params, ems)
1763     if result.fail_msg:
1764       self.LogWarning("Error disabling the master IP address: %s",
1765                       result.fail_msg)
1766
1767     return master_params.name
1768
1769
1770 def _VerifyCertificate(filename):
1771   """Verifies a certificate for L{LUClusterVerifyConfig}.
1772
1773   @type filename: string
1774   @param filename: Path to PEM file
1775
1776   """
1777   try:
1778     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1779                                            utils.ReadFile(filename))
1780   except Exception, err: # pylint: disable=W0703
1781     return (LUClusterVerifyConfig.ETYPE_ERROR,
1782             "Failed to load X509 certificate %s: %s" % (filename, err))
1783
1784   (errcode, msg) = \
1785     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1786                                 constants.SSL_CERT_EXPIRATION_ERROR)
1787
1788   if msg:
1789     fnamemsg = "While verifying %s: %s" % (filename, msg)
1790   else:
1791     fnamemsg = None
1792
1793   if errcode is None:
1794     return (None, fnamemsg)
1795   elif errcode == utils.CERT_WARNING:
1796     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1797   elif errcode == utils.CERT_ERROR:
1798     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1799
1800   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1801
1802
1803 def _GetAllHypervisorParameters(cluster, instances):
1804   """Compute the set of all hypervisor parameters.
1805
1806   @type cluster: L{objects.Cluster}
1807   @param cluster: the cluster object
1808   @param instances: list of L{objects.Instance}
1809   @param instances: additional instances from which to obtain parameters
1810   @rtype: list of (origin, hypervisor, parameters)
1811   @return: a list with all parameters found, indicating the hypervisor they
1812        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1813
1814   """
1815   hvp_data = []
1816
1817   for hv_name in cluster.enabled_hypervisors:
1818     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1819
1820   for os_name, os_hvp in cluster.os_hvp.items():
1821     for hv_name, hv_params in os_hvp.items():
1822       if hv_params:
1823         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1824         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1825
1826   # TODO: collapse identical parameter values in a single one
1827   for instance in instances:
1828     if instance.hvparams:
1829       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1830                        cluster.FillHV(instance)))
1831
1832   return hvp_data
1833
1834
1835 class _VerifyErrors(object):
1836   """Mix-in for cluster/group verify LUs.
1837
1838   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1839   self.op and self._feedback_fn to be available.)
1840
1841   """
1842
1843   ETYPE_FIELD = "code"
1844   ETYPE_ERROR = "ERROR"
1845   ETYPE_WARNING = "WARNING"
1846
1847   def _Error(self, ecode, item, msg, *args, **kwargs):
1848     """Format an error message.
1849
1850     Based on the opcode's error_codes parameter, either format a
1851     parseable error code, or a simpler error string.
1852
1853     This must be called only from Exec and functions called from Exec.
1854
1855     """
1856     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1857     itype, etxt, _ = ecode
1858     # first complete the msg
1859     if args:
1860       msg = msg % args
1861     # then format the whole message
1862     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1863       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1864     else:
1865       if item:
1866         item = " " + item
1867       else:
1868         item = ""
1869       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1870     # and finally report it via the feedback_fn
1871     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1872
1873   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1874     """Log an error message if the passed condition is True.
1875
1876     """
1877     cond = (bool(cond)
1878             or self.op.debug_simulate_errors) # pylint: disable=E1101
1879
1880     # If the error code is in the list of ignored errors, demote the error to a
1881     # warning
1882     (_, etxt, _) = ecode
1883     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1884       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1885
1886     if cond:
1887       self._Error(ecode, *args, **kwargs)
1888
1889     # do not mark the operation as failed for WARN cases only
1890     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1891       self.bad = self.bad or cond
1892
1893
1894 class LUClusterVerify(NoHooksLU):
1895   """Submits all jobs necessary to verify the cluster.
1896
1897   """
1898   REQ_BGL = False
1899
1900   def ExpandNames(self):
1901     self.needed_locks = {}
1902
1903   def Exec(self, feedback_fn):
1904     jobs = []
1905
1906     if self.op.group_name:
1907       groups = [self.op.group_name]
1908       depends_fn = lambda: None
1909     else:
1910       groups = self.cfg.GetNodeGroupList()
1911
1912       # Verify global configuration
1913       jobs.append([
1914         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1915         ])
1916
1917       # Always depend on global verification
1918       depends_fn = lambda: [(-len(jobs), [])]
1919
1920     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1921                                             ignore_errors=self.op.ignore_errors,
1922                                             depends=depends_fn())]
1923                 for group in groups)
1924
1925     # Fix up all parameters
1926     for op in itertools.chain(*jobs): # pylint: disable=W0142
1927       op.debug_simulate_errors = self.op.debug_simulate_errors
1928       op.verbose = self.op.verbose
1929       op.error_codes = self.op.error_codes
1930       try:
1931         op.skip_checks = self.op.skip_checks
1932       except AttributeError:
1933         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1934
1935     return ResultWithJobs(jobs)
1936
1937
1938 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1939   """Verifies the cluster config.
1940
1941   """
1942   REQ_BGL = False
1943
1944   def _VerifyHVP(self, hvp_data):
1945     """Verifies locally the syntax of the hypervisor parameters.
1946
1947     """
1948     for item, hv_name, hv_params in hvp_data:
1949       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1950              (item, hv_name))
1951       try:
1952         hv_class = hypervisor.GetHypervisor(hv_name)
1953         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1954         hv_class.CheckParameterSyntax(hv_params)
1955       except errors.GenericError, err:
1956         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1957
1958   def ExpandNames(self):
1959     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1960     self.share_locks = _ShareAll()
1961
1962   def CheckPrereq(self):
1963     """Check prerequisites.
1964
1965     """
1966     # Retrieve all information
1967     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1968     self.all_node_info = self.cfg.GetAllNodesInfo()
1969     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1970
1971   def Exec(self, feedback_fn):
1972     """Verify integrity of cluster, performing various test on nodes.
1973
1974     """
1975     self.bad = False
1976     self._feedback_fn = feedback_fn
1977
1978     feedback_fn("* Verifying cluster config")
1979
1980     for msg in self.cfg.VerifyConfig():
1981       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1982
1983     feedback_fn("* Verifying cluster certificate files")
1984
1985     for cert_filename in constants.ALL_CERT_FILES:
1986       (errcode, msg) = _VerifyCertificate(cert_filename)
1987       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1988
1989     feedback_fn("* Verifying hypervisor parameters")
1990
1991     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1992                                                 self.all_inst_info.values()))
1993
1994     feedback_fn("* Verifying all nodes belong to an existing group")
1995
1996     # We do this verification here because, should this bogus circumstance
1997     # occur, it would never be caught by VerifyGroup, which only acts on
1998     # nodes/instances reachable from existing node groups.
1999
2000     dangling_nodes = set(node.name for node in self.all_node_info.values()
2001                          if node.group not in self.all_group_info)
2002
2003     dangling_instances = {}
2004     no_node_instances = []
2005
2006     for inst in self.all_inst_info.values():
2007       if inst.primary_node in dangling_nodes:
2008         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2009       elif inst.primary_node not in self.all_node_info:
2010         no_node_instances.append(inst.name)
2011
2012     pretty_dangling = [
2013         "%s (%s)" %
2014         (node.name,
2015          utils.CommaJoin(dangling_instances.get(node.name,
2016                                                 ["no instances"])))
2017         for node in dangling_nodes]
2018
2019     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2020                   None,
2021                   "the following nodes (and their instances) belong to a non"
2022                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2023
2024     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2025                   None,
2026                   "the following instances have a non-existing primary-node:"
2027                   " %s", utils.CommaJoin(no_node_instances))
2028
2029     return not self.bad
2030
2031
2032 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2033   """Verifies the status of a node group.
2034
2035   """
2036   HPATH = "cluster-verify"
2037   HTYPE = constants.HTYPE_CLUSTER
2038   REQ_BGL = False
2039
2040   _HOOKS_INDENT_RE = re.compile("^", re.M)
2041
2042   class NodeImage(object):
2043     """A class representing the logical and physical status of a node.
2044
2045     @type name: string
2046     @ivar name: the node name to which this object refers
2047     @ivar volumes: a structure as returned from
2048         L{ganeti.backend.GetVolumeList} (runtime)
2049     @ivar instances: a list of running instances (runtime)
2050     @ivar pinst: list of configured primary instances (config)
2051     @ivar sinst: list of configured secondary instances (config)
2052     @ivar sbp: dictionary of {primary-node: list of instances} for all
2053         instances for which this node is secondary (config)
2054     @ivar mfree: free memory, as reported by hypervisor (runtime)
2055     @ivar dfree: free disk, as reported by the node (runtime)
2056     @ivar offline: the offline status (config)
2057     @type rpc_fail: boolean
2058     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2059         not whether the individual keys were correct) (runtime)
2060     @type lvm_fail: boolean
2061     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2062     @type hyp_fail: boolean
2063     @ivar hyp_fail: whether the RPC call didn't return the instance list
2064     @type ghost: boolean
2065     @ivar ghost: whether this is a known node or not (config)
2066     @type os_fail: boolean
2067     @ivar os_fail: whether the RPC call didn't return valid OS data
2068     @type oslist: list
2069     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2070     @type vm_capable: boolean
2071     @ivar vm_capable: whether the node can host instances
2072
2073     """
2074     def __init__(self, offline=False, name=None, vm_capable=True):
2075       self.name = name
2076       self.volumes = {}
2077       self.instances = []
2078       self.pinst = []
2079       self.sinst = []
2080       self.sbp = {}
2081       self.mfree = 0
2082       self.dfree = 0
2083       self.offline = offline
2084       self.vm_capable = vm_capable
2085       self.rpc_fail = False
2086       self.lvm_fail = False
2087       self.hyp_fail = False
2088       self.ghost = False
2089       self.os_fail = False
2090       self.oslist = {}
2091
2092   def ExpandNames(self):
2093     # This raises errors.OpPrereqError on its own:
2094     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2095
2096     # Get instances in node group; this is unsafe and needs verification later
2097     inst_names = \
2098       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2099
2100     self.needed_locks = {
2101       locking.LEVEL_INSTANCE: inst_names,
2102       locking.LEVEL_NODEGROUP: [self.group_uuid],
2103       locking.LEVEL_NODE: [],
2104       }
2105
2106     self.share_locks = _ShareAll()
2107
2108   def DeclareLocks(self, level):
2109     if level == locking.LEVEL_NODE:
2110       # Get members of node group; this is unsafe and needs verification later
2111       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2112
2113       all_inst_info = self.cfg.GetAllInstancesInfo()
2114
2115       # In Exec(), we warn about mirrored instances that have primary and
2116       # secondary living in separate node groups. To fully verify that
2117       # volumes for these instances are healthy, we will need to do an
2118       # extra call to their secondaries. We ensure here those nodes will
2119       # be locked.
2120       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2121         # Important: access only the instances whose lock is owned
2122         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2123           nodes.update(all_inst_info[inst].secondary_nodes)
2124
2125       self.needed_locks[locking.LEVEL_NODE] = nodes
2126
2127   def CheckPrereq(self):
2128     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2129     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2130
2131     group_nodes = set(self.group_info.members)
2132     group_instances = \
2133       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2134
2135     unlocked_nodes = \
2136         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2137
2138     unlocked_instances = \
2139         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2140
2141     if unlocked_nodes:
2142       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2143                                  utils.CommaJoin(unlocked_nodes),
2144                                  errors.ECODE_STATE)
2145
2146     if unlocked_instances:
2147       raise errors.OpPrereqError("Missing lock for instances: %s" %
2148                                  utils.CommaJoin(unlocked_instances),
2149                                  errors.ECODE_STATE)
2150
2151     self.all_node_info = self.cfg.GetAllNodesInfo()
2152     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2153
2154     self.my_node_names = utils.NiceSort(group_nodes)
2155     self.my_inst_names = utils.NiceSort(group_instances)
2156
2157     self.my_node_info = dict((name, self.all_node_info[name])
2158                              for name in self.my_node_names)
2159
2160     self.my_inst_info = dict((name, self.all_inst_info[name])
2161                              for name in self.my_inst_names)
2162
2163     # We detect here the nodes that will need the extra RPC calls for verifying
2164     # split LV volumes; they should be locked.
2165     extra_lv_nodes = set()
2166
2167     for inst in self.my_inst_info.values():
2168       if inst.disk_template in constants.DTS_INT_MIRROR:
2169         for nname in inst.all_nodes:
2170           if self.all_node_info[nname].group != self.group_uuid:
2171             extra_lv_nodes.add(nname)
2172
2173     unlocked_lv_nodes = \
2174         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2175
2176     if unlocked_lv_nodes:
2177       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2178                                  utils.CommaJoin(unlocked_lv_nodes),
2179                                  errors.ECODE_STATE)
2180     self.extra_lv_nodes = list(extra_lv_nodes)
2181
2182   def _VerifyNode(self, ninfo, nresult):
2183     """Perform some basic validation on data returned from a node.
2184
2185       - check the result data structure is well formed and has all the
2186         mandatory fields
2187       - check ganeti version
2188
2189     @type ninfo: L{objects.Node}
2190     @param ninfo: the node to check
2191     @param nresult: the results from the node
2192     @rtype: boolean
2193     @return: whether overall this call was successful (and we can expect
2194          reasonable values in the respose)
2195
2196     """
2197     node = ninfo.name
2198     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2199
2200     # main result, nresult should be a non-empty dict
2201     test = not nresult or not isinstance(nresult, dict)
2202     _ErrorIf(test, constants.CV_ENODERPC, node,
2203                   "unable to verify node: no data returned")
2204     if test:
2205       return False
2206
2207     # compares ganeti version
2208     local_version = constants.PROTOCOL_VERSION
2209     remote_version = nresult.get("version", None)
2210     test = not (remote_version and
2211                 isinstance(remote_version, (list, tuple)) and
2212                 len(remote_version) == 2)
2213     _ErrorIf(test, constants.CV_ENODERPC, node,
2214              "connection to node returned invalid data")
2215     if test:
2216       return False
2217
2218     test = local_version != remote_version[0]
2219     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2220              "incompatible protocol versions: master %s,"
2221              " node %s", local_version, remote_version[0])
2222     if test:
2223       return False
2224
2225     # node seems compatible, we can actually try to look into its results
2226
2227     # full package version
2228     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2229                   constants.CV_ENODEVERSION, node,
2230                   "software version mismatch: master %s, node %s",
2231                   constants.RELEASE_VERSION, remote_version[1],
2232                   code=self.ETYPE_WARNING)
2233
2234     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2235     if ninfo.vm_capable and isinstance(hyp_result, dict):
2236       for hv_name, hv_result in hyp_result.iteritems():
2237         test = hv_result is not None
2238         _ErrorIf(test, constants.CV_ENODEHV, node,
2239                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2240
2241     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2242     if ninfo.vm_capable and isinstance(hvp_result, list):
2243       for item, hv_name, hv_result in hvp_result:
2244         _ErrorIf(True, constants.CV_ENODEHV, node,
2245                  "hypervisor %s parameter verify failure (source %s): %s",
2246                  hv_name, item, hv_result)
2247
2248     test = nresult.get(constants.NV_NODESETUP,
2249                        ["Missing NODESETUP results"])
2250     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2251              "; ".join(test))
2252
2253     return True
2254
2255   def _VerifyNodeTime(self, ninfo, nresult,
2256                       nvinfo_starttime, nvinfo_endtime):
2257     """Check the node time.
2258
2259     @type ninfo: L{objects.Node}
2260     @param ninfo: the node to check
2261     @param nresult: the remote results for the node
2262     @param nvinfo_starttime: the start time of the RPC call
2263     @param nvinfo_endtime: the end time of the RPC call
2264
2265     """
2266     node = ninfo.name
2267     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2268
2269     ntime = nresult.get(constants.NV_TIME, None)
2270     try:
2271       ntime_merged = utils.MergeTime(ntime)
2272     except (ValueError, TypeError):
2273       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2274       return
2275
2276     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2277       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2278     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2279       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2280     else:
2281       ntime_diff = None
2282
2283     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2284              "Node time diverges by at least %s from master node time",
2285              ntime_diff)
2286
2287   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2288     """Check the node LVM results.
2289
2290     @type ninfo: L{objects.Node}
2291     @param ninfo: the node to check
2292     @param nresult: the remote results for the node
2293     @param vg_name: the configured VG name
2294
2295     """
2296     if vg_name is None:
2297       return
2298
2299     node = ninfo.name
2300     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2301
2302     # checks vg existence and size > 20G
2303     vglist = nresult.get(constants.NV_VGLIST, None)
2304     test = not vglist
2305     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2306     if not test:
2307       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2308                                             constants.MIN_VG_SIZE)
2309       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2310
2311     # check pv names
2312     pvlist = nresult.get(constants.NV_PVLIST, None)
2313     test = pvlist is None
2314     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2315     if not test:
2316       # check that ':' is not present in PV names, since it's a
2317       # special character for lvcreate (denotes the range of PEs to
2318       # use on the PV)
2319       for _, pvname, owner_vg in pvlist:
2320         test = ":" in pvname
2321         _ErrorIf(test, constants.CV_ENODELVM, node,
2322                  "Invalid character ':' in PV '%s' of VG '%s'",
2323                  pvname, owner_vg)
2324
2325   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2326     """Check the node bridges.
2327
2328     @type ninfo: L{objects.Node}
2329     @param ninfo: the node to check
2330     @param nresult: the remote results for the node
2331     @param bridges: the expected list of bridges
2332
2333     """
2334     if not bridges:
2335       return
2336
2337     node = ninfo.name
2338     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2339
2340     missing = nresult.get(constants.NV_BRIDGES, None)
2341     test = not isinstance(missing, list)
2342     _ErrorIf(test, constants.CV_ENODENET, node,
2343              "did not return valid bridge information")
2344     if not test:
2345       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2346                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2347
2348   def _VerifyNodeUserScripts(self, ninfo, nresult):
2349     """Check the results of user scripts presence and executability on the node
2350
2351     @type ninfo: L{objects.Node}
2352     @param ninfo: the node to check
2353     @param nresult: the remote results for the node
2354
2355     """
2356     node = ninfo.name
2357
2358     test = not constants.NV_USERSCRIPTS in nresult
2359     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2360                   "did not return user scripts information")
2361
2362     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2363     if not test:
2364       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2365                     "user scripts not present or not executable: %s" %
2366                     utils.CommaJoin(sorted(broken_scripts)))
2367
2368   def _VerifyNodeNetwork(self, ninfo, nresult):
2369     """Check the node network connectivity results.
2370
2371     @type ninfo: L{objects.Node}
2372     @param ninfo: the node to check
2373     @param nresult: the remote results for the node
2374
2375     """
2376     node = ninfo.name
2377     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2378
2379     test = constants.NV_NODELIST not in nresult
2380     _ErrorIf(test, constants.CV_ENODESSH, node,
2381              "node hasn't returned node ssh connectivity data")
2382     if not test:
2383       if nresult[constants.NV_NODELIST]:
2384         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2385           _ErrorIf(True, constants.CV_ENODESSH, node,
2386                    "ssh communication with node '%s': %s", a_node, a_msg)
2387
2388     test = constants.NV_NODENETTEST not in nresult
2389     _ErrorIf(test, constants.CV_ENODENET, node,
2390              "node hasn't returned node tcp connectivity data")
2391     if not test:
2392       if nresult[constants.NV_NODENETTEST]:
2393         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2394         for anode in nlist:
2395           _ErrorIf(True, constants.CV_ENODENET, node,
2396                    "tcp communication with node '%s': %s",
2397                    anode, nresult[constants.NV_NODENETTEST][anode])
2398
2399     test = constants.NV_MASTERIP not in nresult
2400     _ErrorIf(test, constants.CV_ENODENET, node,
2401              "node hasn't returned node master IP reachability data")
2402     if not test:
2403       if not nresult[constants.NV_MASTERIP]:
2404         if node == self.master_node:
2405           msg = "the master node cannot reach the master IP (not configured?)"
2406         else:
2407           msg = "cannot reach the master IP"
2408         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2409
2410   def _VerifyInstance(self, instance, instanceconfig, node_image,
2411                       diskstatus):
2412     """Verify an instance.
2413
2414     This function checks to see if the required block devices are
2415     available on the instance's node.
2416
2417     """
2418     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2419     node_current = instanceconfig.primary_node
2420
2421     node_vol_should = {}
2422     instanceconfig.MapLVsByNode(node_vol_should)
2423
2424     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2425     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2426     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2427
2428     for node in node_vol_should:
2429       n_img = node_image[node]
2430       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2431         # ignore missing volumes on offline or broken nodes
2432         continue
2433       for volume in node_vol_should[node]:
2434         test = volume not in n_img.volumes
2435         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2436                  "volume %s missing on node %s", volume, node)
2437
2438     if instanceconfig.admin_state == constants.ADMINST_UP:
2439       pri_img = node_image[node_current]
2440       test = instance not in pri_img.instances and not pri_img.offline
2441       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2442                "instance not running on its primary node %s",
2443                node_current)
2444
2445     diskdata = [(nname, success, status, idx)
2446                 for (nname, disks) in diskstatus.items()
2447                 for idx, (success, status) in enumerate(disks)]
2448
2449     for nname, success, bdev_status, idx in diskdata:
2450       # the 'ghost node' construction in Exec() ensures that we have a
2451       # node here
2452       snode = node_image[nname]
2453       bad_snode = snode.ghost or snode.offline
2454       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2455                not success and not bad_snode,
2456                constants.CV_EINSTANCEFAULTYDISK, instance,
2457                "couldn't retrieve status for disk/%s on %s: %s",
2458                idx, nname, bdev_status)
2459       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2460                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2461                constants.CV_EINSTANCEFAULTYDISK, instance,
2462                "disk/%s on %s is faulty", idx, nname)
2463
2464   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2465     """Verify if there are any unknown volumes in the cluster.
2466
2467     The .os, .swap and backup volumes are ignored. All other volumes are
2468     reported as unknown.
2469
2470     @type reserved: L{ganeti.utils.FieldSet}
2471     @param reserved: a FieldSet of reserved volume names
2472
2473     """
2474     for node, n_img in node_image.items():
2475       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2476           self.all_node_info[node].group != self.group_uuid):
2477         # skip non-healthy nodes
2478         continue
2479       for volume in n_img.volumes:
2480         test = ((node not in node_vol_should or
2481                 volume not in node_vol_should[node]) and
2482                 not reserved.Matches(volume))
2483         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2484                       "volume %s is unknown", volume)
2485
2486   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2487     """Verify N+1 Memory Resilience.
2488
2489     Check that if one single node dies we can still start all the
2490     instances it was primary for.
2491
2492     """
2493     cluster_info = self.cfg.GetClusterInfo()
2494     for node, n_img in node_image.items():
2495       # This code checks that every node which is now listed as
2496       # secondary has enough memory to host all instances it is
2497       # supposed to should a single other node in the cluster fail.
2498       # FIXME: not ready for failover to an arbitrary node
2499       # FIXME: does not support file-backed instances
2500       # WARNING: we currently take into account down instances as well
2501       # as up ones, considering that even if they're down someone
2502       # might want to start them even in the event of a node failure.
2503       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2504         # we're skipping nodes marked offline and nodes in other groups from
2505         # the N+1 warning, since most likely we don't have good memory
2506         # infromation from them; we already list instances living on such
2507         # nodes, and that's enough warning
2508         continue
2509       #TODO(dynmem): also consider ballooning out other instances
2510       for prinode, instances in n_img.sbp.items():
2511         needed_mem = 0
2512         for instance in instances:
2513           bep = cluster_info.FillBE(instance_cfg[instance])
2514           if bep[constants.BE_AUTO_BALANCE]:
2515             needed_mem += bep[constants.BE_MINMEM]
2516         test = n_img.mfree < needed_mem
2517         self._ErrorIf(test, constants.CV_ENODEN1, node,
2518                       "not enough memory to accomodate instance failovers"
2519                       " should node %s fail (%dMiB needed, %dMiB available)",
2520                       prinode, needed_mem, n_img.mfree)
2521
2522   @classmethod
2523   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2524                    (files_all, files_opt, files_mc, files_vm)):
2525     """Verifies file checksums collected from all nodes.
2526
2527     @param errorif: Callback for reporting errors
2528     @param nodeinfo: List of L{objects.Node} objects
2529     @param master_node: Name of master node
2530     @param all_nvinfo: RPC results
2531
2532     """
2533     # Define functions determining which nodes to consider for a file
2534     files2nodefn = [
2535       (files_all, None),
2536       (files_mc, lambda node: (node.master_candidate or
2537                                node.name == master_node)),
2538       (files_vm, lambda node: node.vm_capable),
2539       ]
2540
2541     # Build mapping from filename to list of nodes which should have the file
2542     nodefiles = {}
2543     for (files, fn) in files2nodefn:
2544       if fn is None:
2545         filenodes = nodeinfo
2546       else:
2547         filenodes = filter(fn, nodeinfo)
2548       nodefiles.update((filename,
2549                         frozenset(map(operator.attrgetter("name"), filenodes)))
2550                        for filename in files)
2551
2552     assert set(nodefiles) == (files_all | files_mc | files_vm)
2553
2554     fileinfo = dict((filename, {}) for filename in nodefiles)
2555     ignore_nodes = set()
2556
2557     for node in nodeinfo:
2558       if node.offline:
2559         ignore_nodes.add(node.name)
2560         continue
2561
2562       nresult = all_nvinfo[node.name]
2563
2564       if nresult.fail_msg or not nresult.payload:
2565         node_files = None
2566       else:
2567         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2568
2569       test = not (node_files and isinstance(node_files, dict))
2570       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2571               "Node did not return file checksum data")
2572       if test:
2573         ignore_nodes.add(node.name)
2574         continue
2575
2576       # Build per-checksum mapping from filename to nodes having it
2577       for (filename, checksum) in node_files.items():
2578         assert filename in nodefiles
2579         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2580
2581     for (filename, checksums) in fileinfo.items():
2582       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2583
2584       # Nodes having the file
2585       with_file = frozenset(node_name
2586                             for nodes in fileinfo[filename].values()
2587                             for node_name in nodes) - ignore_nodes
2588
2589       expected_nodes = nodefiles[filename] - ignore_nodes
2590
2591       # Nodes missing file
2592       missing_file = expected_nodes - with_file
2593
2594       if filename in files_opt:
2595         # All or no nodes
2596         errorif(missing_file and missing_file != expected_nodes,
2597                 constants.CV_ECLUSTERFILECHECK, None,
2598                 "File %s is optional, but it must exist on all or no"
2599                 " nodes (not found on %s)",
2600                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2601       else:
2602         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2603                 "File %s is missing from node(s) %s", filename,
2604                 utils.CommaJoin(utils.NiceSort(missing_file)))
2605
2606         # Warn if a node has a file it shouldn't
2607         unexpected = with_file - expected_nodes
2608         errorif(unexpected,
2609                 constants.CV_ECLUSTERFILECHECK, None,
2610                 "File %s should not exist on node(s) %s",
2611                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2612
2613       # See if there are multiple versions of the file
2614       test = len(checksums) > 1
2615       if test:
2616         variants = ["variant %s on %s" %
2617                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2618                     for (idx, (checksum, nodes)) in
2619                       enumerate(sorted(checksums.items()))]
2620       else:
2621         variants = []
2622
2623       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2624               "File %s found with %s different checksums (%s)",
2625               filename, len(checksums), "; ".join(variants))
2626
2627   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2628                       drbd_map):
2629     """Verifies and the node DRBD status.
2630
2631     @type ninfo: L{objects.Node}
2632     @param ninfo: the node to check
2633     @param nresult: the remote results for the node
2634     @param instanceinfo: the dict of instances
2635     @param drbd_helper: the configured DRBD usermode helper
2636     @param drbd_map: the DRBD map as returned by
2637         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2638
2639     """
2640     node = ninfo.name
2641     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2642
2643     if drbd_helper:
2644       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2645       test = (helper_result == None)
2646       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2647                "no drbd usermode helper returned")
2648       if helper_result:
2649         status, payload = helper_result
2650         test = not status
2651         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2652                  "drbd usermode helper check unsuccessful: %s", payload)
2653         test = status and (payload != drbd_helper)
2654         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2655                  "wrong drbd usermode helper: %s", payload)
2656
2657     # compute the DRBD minors
2658     node_drbd = {}
2659     for minor, instance in drbd_map[node].items():
2660       test = instance not in instanceinfo
2661       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2662                "ghost instance '%s' in temporary DRBD map", instance)
2663         # ghost instance should not be running, but otherwise we
2664         # don't give double warnings (both ghost instance and
2665         # unallocated minor in use)
2666       if test:
2667         node_drbd[minor] = (instance, False)
2668       else:
2669         instance = instanceinfo[instance]
2670         node_drbd[minor] = (instance.name,
2671                             instance.admin_state == constants.ADMINST_UP)
2672
2673     # and now check them
2674     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2675     test = not isinstance(used_minors, (tuple, list))
2676     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2677              "cannot parse drbd status file: %s", str(used_minors))
2678     if test:
2679       # we cannot check drbd status
2680       return
2681
2682     for minor, (iname, must_exist) in node_drbd.items():
2683       test = minor not in used_minors and must_exist
2684       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2685                "drbd minor %d of instance %s is not active", minor, iname)
2686     for minor in used_minors:
2687       test = minor not in node_drbd
2688       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2689                "unallocated drbd minor %d is in use", minor)
2690
2691   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2692     """Builds the node OS structures.
2693
2694     @type ninfo: L{objects.Node}
2695     @param ninfo: the node to check
2696     @param nresult: the remote results for the node
2697     @param nimg: the node image object
2698
2699     """
2700     node = ninfo.name
2701     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2702
2703     remote_os = nresult.get(constants.NV_OSLIST, None)
2704     test = (not isinstance(remote_os, list) or
2705             not compat.all(isinstance(v, list) and len(v) == 7
2706                            for v in remote_os))
2707
2708     _ErrorIf(test, constants.CV_ENODEOS, node,
2709              "node hasn't returned valid OS data")
2710
2711     nimg.os_fail = test
2712
2713     if test:
2714       return
2715
2716     os_dict = {}
2717
2718     for (name, os_path, status, diagnose,
2719          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2720
2721       if name not in os_dict:
2722         os_dict[name] = []
2723
2724       # parameters is a list of lists instead of list of tuples due to
2725       # JSON lacking a real tuple type, fix it:
2726       parameters = [tuple(v) for v in parameters]
2727       os_dict[name].append((os_path, status, diagnose,
2728                             set(variants), set(parameters), set(api_ver)))
2729
2730     nimg.oslist = os_dict
2731
2732   def _VerifyNodeOS(self, ninfo, nimg, base):
2733     """Verifies the node OS list.
2734
2735     @type ninfo: L{objects.Node}
2736     @param ninfo: the node to check
2737     @param nimg: the node image object
2738     @param base: the 'template' node we match against (e.g. from the master)
2739
2740     """
2741     node = ninfo.name
2742     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2743
2744     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2745
2746     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2747     for os_name, os_data in nimg.oslist.items():
2748       assert os_data, "Empty OS status for OS %s?!" % os_name
2749       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2750       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2751                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2752       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2753                "OS '%s' has multiple entries (first one shadows the rest): %s",
2754                os_name, utils.CommaJoin([v[0] for v in os_data]))
2755       # comparisons with the 'base' image
2756       test = os_name not in base.oslist
2757       _ErrorIf(test, constants.CV_ENODEOS, node,
2758                "Extra OS %s not present on reference node (%s)",
2759                os_name, base.name)
2760       if test:
2761         continue
2762       assert base.oslist[os_name], "Base node has empty OS status?"
2763       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2764       if not b_status:
2765         # base OS is invalid, skipping
2766         continue
2767       for kind, a, b in [("API version", f_api, b_api),
2768                          ("variants list", f_var, b_var),
2769                          ("parameters", beautify_params(f_param),
2770                           beautify_params(b_param))]:
2771         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2772                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2773                  kind, os_name, base.name,
2774                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2775
2776     # check any missing OSes
2777     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2778     _ErrorIf(missing, constants.CV_ENODEOS, node,
2779              "OSes present on reference node %s but missing on this node: %s",
2780              base.name, utils.CommaJoin(missing))
2781
2782   def _VerifyOob(self, ninfo, nresult):
2783     """Verifies out of band functionality of a node.
2784
2785     @type ninfo: L{objects.Node}
2786     @param ninfo: the node to check
2787     @param nresult: the remote results for the node
2788
2789     """
2790     node = ninfo.name
2791     # We just have to verify the paths on master and/or master candidates
2792     # as the oob helper is invoked on the master
2793     if ((ninfo.master_candidate or ninfo.master_capable) and
2794         constants.NV_OOB_PATHS in nresult):
2795       for path_result in nresult[constants.NV_OOB_PATHS]:
2796         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2797
2798   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2799     """Verifies and updates the node volume data.
2800
2801     This function will update a L{NodeImage}'s internal structures
2802     with data from the remote call.
2803
2804     @type ninfo: L{objects.Node}
2805     @param ninfo: the node to check
2806     @param nresult: the remote results for the node
2807     @param nimg: the node image object
2808     @param vg_name: the configured VG name
2809
2810     """
2811     node = ninfo.name
2812     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2813
2814     nimg.lvm_fail = True
2815     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2816     if vg_name is None:
2817       pass
2818     elif isinstance(lvdata, basestring):
2819       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2820                utils.SafeEncode(lvdata))
2821     elif not isinstance(lvdata, dict):
2822       _ErrorIf(True, constants.CV_ENODELVM, node,
2823                "rpc call to node failed (lvlist)")
2824     else:
2825       nimg.volumes = lvdata
2826       nimg.lvm_fail = False
2827
2828   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2829     """Verifies and updates the node instance list.
2830
2831     If the listing was successful, then updates this node's instance
2832     list. Otherwise, it marks the RPC call as failed for the instance
2833     list key.
2834
2835     @type ninfo: L{objects.Node}
2836     @param ninfo: the node to check
2837     @param nresult: the remote results for the node
2838     @param nimg: the node image object
2839
2840     """
2841     idata = nresult.get(constants.NV_INSTANCELIST, None)
2842     test = not isinstance(idata, list)
2843     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2844                   "rpc call to node failed (instancelist): %s",
2845                   utils.SafeEncode(str(idata)))
2846     if test:
2847       nimg.hyp_fail = True
2848     else:
2849       nimg.instances = idata
2850
2851   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2852     """Verifies and computes a node information map
2853
2854     @type ninfo: L{objects.Node}
2855     @param ninfo: the node to check
2856     @param nresult: the remote results for the node
2857     @param nimg: the node image object
2858     @param vg_name: the configured VG name
2859
2860     """
2861     node = ninfo.name
2862     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2863
2864     # try to read free memory (from the hypervisor)
2865     hv_info = nresult.get(constants.NV_HVINFO, None)
2866     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2867     _ErrorIf(test, constants.CV_ENODEHV, node,
2868              "rpc call to node failed (hvinfo)")
2869     if not test:
2870       try:
2871         nimg.mfree = int(hv_info["memory_free"])
2872       except (ValueError, TypeError):
2873         _ErrorIf(True, constants.CV_ENODERPC, node,
2874                  "node returned invalid nodeinfo, check hypervisor")
2875
2876     # FIXME: devise a free space model for file based instances as well
2877     if vg_name is not None:
2878       test = (constants.NV_VGLIST not in nresult or
2879               vg_name not in nresult[constants.NV_VGLIST])
2880       _ErrorIf(test, constants.CV_ENODELVM, node,
2881                "node didn't return data for the volume group '%s'"
2882                " - it is either missing or broken", vg_name)
2883       if not test:
2884         try:
2885           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2886         except (ValueError, TypeError):
2887           _ErrorIf(True, constants.CV_ENODERPC, node,
2888                    "node returned invalid LVM info, check LVM status")
2889
2890   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2891     """Gets per-disk status information for all instances.
2892
2893     @type nodelist: list of strings
2894     @param nodelist: Node names
2895     @type node_image: dict of (name, L{objects.Node})
2896     @param node_image: Node objects
2897     @type instanceinfo: dict of (name, L{objects.Instance})
2898     @param instanceinfo: Instance objects
2899     @rtype: {instance: {node: [(succes, payload)]}}
2900     @return: a dictionary of per-instance dictionaries with nodes as
2901         keys and disk information as values; the disk information is a
2902         list of tuples (success, payload)
2903
2904     """
2905     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2906
2907     node_disks = {}
2908     node_disks_devonly = {}
2909     diskless_instances = set()
2910     diskless = constants.DT_DISKLESS
2911
2912     for nname in nodelist:
2913       node_instances = list(itertools.chain(node_image[nname].pinst,
2914                                             node_image[nname].sinst))
2915       diskless_instances.update(inst for inst in node_instances
2916                                 if instanceinfo[inst].disk_template == diskless)
2917       disks = [(inst, disk)
2918                for inst in node_instances
2919                for disk in instanceinfo[inst].disks]
2920
2921       if not disks:
2922         # No need to collect data
2923         continue
2924
2925       node_disks[nname] = disks
2926
2927       # _AnnotateDiskParams makes already copies of the disks
2928       devonly = []
2929       for (inst, dev) in disks:
2930         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2931         self.cfg.SetDiskID(anno_disk, nname)
2932         devonly.append(anno_disk)
2933
2934       node_disks_devonly[nname] = devonly
2935
2936     assert len(node_disks) == len(node_disks_devonly)
2937
2938     # Collect data from all nodes with disks
2939     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2940                                                           node_disks_devonly)
2941
2942     assert len(result) == len(node_disks)
2943
2944     instdisk = {}
2945
2946     for (nname, nres) in result.items():
2947       disks = node_disks[nname]
2948
2949       if nres.offline:
2950         # No data from this node
2951         data = len(disks) * [(False, "node offline")]
2952       else:
2953         msg = nres.fail_msg
2954         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2955                  "while getting disk information: %s", msg)
2956         if msg:
2957           # No data from this node
2958           data = len(disks) * [(False, msg)]
2959         else:
2960           data = []
2961           for idx, i in enumerate(nres.payload):
2962             if isinstance(i, (tuple, list)) and len(i) == 2:
2963               data.append(i)
2964             else:
2965               logging.warning("Invalid result from node %s, entry %d: %s",
2966                               nname, idx, i)
2967               data.append((False, "Invalid result from the remote node"))
2968
2969       for ((inst, _), status) in zip(disks, data):
2970         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2971
2972     # Add empty entries for diskless instances.
2973     for inst in diskless_instances:
2974       assert inst not in instdisk
2975       instdisk[inst] = {}
2976
2977     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2978                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2979                       compat.all(isinstance(s, (tuple, list)) and
2980                                  len(s) == 2 for s in statuses)
2981                       for inst, nnames in instdisk.items()
2982                       for nname, statuses in nnames.items())
2983     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2984
2985     return instdisk
2986
2987   @staticmethod
2988   def _SshNodeSelector(group_uuid, all_nodes):
2989     """Create endless iterators for all potential SSH check hosts.
2990
2991     """
2992     nodes = [node for node in all_nodes
2993              if (node.group != group_uuid and
2994                  not node.offline)]
2995     keyfunc = operator.attrgetter("group")
2996
2997     return map(itertools.cycle,
2998                [sorted(map(operator.attrgetter("name"), names))
2999                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3000                                                   keyfunc)])
3001
3002   @classmethod
3003   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3004     """Choose which nodes should talk to which other nodes.
3005
3006     We will make nodes contact all nodes in their group, and one node from
3007     every other group.
3008
3009     @warning: This algorithm has a known issue if one node group is much
3010       smaller than others (e.g. just one node). In such a case all other
3011       nodes will talk to the single node.
3012
3013     """
3014     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3015     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3016
3017     return (online_nodes,
3018             dict((name, sorted([i.next() for i in sel]))
3019                  for name in online_nodes))
3020
3021   def BuildHooksEnv(self):
3022     """Build hooks env.
3023
3024     Cluster-Verify hooks just ran in the post phase and their failure makes
3025     the output be logged in the verify output and the verification to fail.
3026
3027     """
3028     env = {
3029       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3030       }
3031
3032     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3033                for node in self.my_node_info.values())
3034
3035     return env
3036
3037   def BuildHooksNodes(self):
3038     """Build hooks nodes.
3039
3040     """
3041     return ([], self.my_node_names)
3042
3043   def Exec(self, feedback_fn):
3044     """Verify integrity of the node group, performing various test on nodes.
3045
3046     """
3047     # This method has too many local variables. pylint: disable=R0914
3048     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3049
3050     if not self.my_node_names:
3051       # empty node group
3052       feedback_fn("* Empty node group, skipping verification")
3053       return True
3054
3055     self.bad = False
3056     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3057     verbose = self.op.verbose
3058     self._feedback_fn = feedback_fn
3059
3060     vg_name = self.cfg.GetVGName()
3061     drbd_helper = self.cfg.GetDRBDHelper()
3062     cluster = self.cfg.GetClusterInfo()
3063     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3064     hypervisors = cluster.enabled_hypervisors
3065     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3066
3067     i_non_redundant = [] # Non redundant instances
3068     i_non_a_balanced = [] # Non auto-balanced instances
3069     i_offline = 0 # Count of offline instances
3070     n_offline = 0 # Count of offline nodes
3071     n_drained = 0 # Count of nodes being drained
3072     node_vol_should = {}
3073
3074     # FIXME: verify OS list
3075
3076     # File verification
3077     filemap = _ComputeAncillaryFiles(cluster, False)
3078
3079     # do local checksums
3080     master_node = self.master_node = self.cfg.GetMasterNode()
3081     master_ip = self.cfg.GetMasterIP()
3082
3083     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3084
3085     user_scripts = []
3086     if self.cfg.GetUseExternalMipScript():
3087       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3088
3089     node_verify_param = {
3090       constants.NV_FILELIST:
3091         utils.UniqueSequence(filename
3092                              for files in filemap
3093                              for filename in files),
3094       constants.NV_NODELIST:
3095         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3096                                   self.all_node_info.values()),
3097       constants.NV_HYPERVISOR: hypervisors,
3098       constants.NV_HVPARAMS:
3099         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3100       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3101                                  for node in node_data_list
3102                                  if not node.offline],
3103       constants.NV_INSTANCELIST: hypervisors,
3104       constants.NV_VERSION: None,
3105       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3106       constants.NV_NODESETUP: None,
3107       constants.NV_TIME: None,
3108       constants.NV_MASTERIP: (master_node, master_ip),
3109       constants.NV_OSLIST: None,
3110       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3111       constants.NV_USERSCRIPTS: user_scripts,
3112       }
3113
3114     if vg_name is not None:
3115       node_verify_param[constants.NV_VGLIST] = None
3116       node_verify_param[constants.NV_LVLIST] = vg_name
3117       node_verify_param[constants.NV_PVLIST] = [vg_name]
3118       node_verify_param[constants.NV_DRBDLIST] = None
3119
3120     if drbd_helper:
3121       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3122
3123     # bridge checks
3124     # FIXME: this needs to be changed per node-group, not cluster-wide
3125     bridges = set()
3126     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3127     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3128       bridges.add(default_nicpp[constants.NIC_LINK])
3129     for instance in self.my_inst_info.values():
3130       for nic in instance.nics:
3131         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3132         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3133           bridges.add(full_nic[constants.NIC_LINK])
3134
3135     if bridges:
3136       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3137
3138     # Build our expected cluster state
3139     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3140                                                  name=node.name,
3141                                                  vm_capable=node.vm_capable))
3142                       for node in node_data_list)
3143
3144     # Gather OOB paths
3145     oob_paths = []
3146     for node in self.all_node_info.values():
3147       path = _SupportsOob(self.cfg, node)
3148       if path and path not in oob_paths:
3149         oob_paths.append(path)
3150
3151     if oob_paths:
3152       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3153
3154     for instance in self.my_inst_names:
3155       inst_config = self.my_inst_info[instance]
3156       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3157         i_offline += 1
3158
3159       for nname in inst_config.all_nodes:
3160         if nname not in node_image:
3161           gnode = self.NodeImage(name=nname)
3162           gnode.ghost = (nname not in self.all_node_info)
3163           node_image[nname] = gnode
3164
3165       inst_config.MapLVsByNode(node_vol_should)
3166
3167       pnode = inst_config.primary_node
3168       node_image[pnode].pinst.append(instance)
3169
3170       for snode in inst_config.secondary_nodes:
3171         nimg = node_image[snode]
3172         nimg.sinst.append(instance)
3173         if pnode not in nimg.sbp:
3174           nimg.sbp[pnode] = []
3175         nimg.sbp[pnode].append(instance)
3176
3177     # At this point, we have the in-memory data structures complete,
3178     # except for the runtime information, which we'll gather next
3179
3180     # Due to the way our RPC system works, exact response times cannot be
3181     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3182     # time before and after executing the request, we can at least have a time
3183     # window.
3184     nvinfo_starttime = time.time()
3185     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3186                                            node_verify_param,
3187                                            self.cfg.GetClusterName())
3188     nvinfo_endtime = time.time()
3189
3190     if self.extra_lv_nodes and vg_name is not None:
3191       extra_lv_nvinfo = \
3192           self.rpc.call_node_verify(self.extra_lv_nodes,
3193                                     {constants.NV_LVLIST: vg_name},
3194                                     self.cfg.GetClusterName())
3195     else:
3196       extra_lv_nvinfo = {}
3197
3198     all_drbd_map = self.cfg.ComputeDRBDMap()
3199
3200     feedback_fn("* Gathering disk information (%s nodes)" %
3201                 len(self.my_node_names))
3202     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3203                                      self.my_inst_info)
3204
3205     feedback_fn("* Verifying configuration file consistency")
3206
3207     # If not all nodes are being checked, we need to make sure the master node
3208     # and a non-checked vm_capable node are in the list.
3209     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3210     if absent_nodes:
3211       vf_nvinfo = all_nvinfo.copy()
3212       vf_node_info = list(self.my_node_info.values())
3213       additional_nodes = []
3214       if master_node not in self.my_node_info:
3215         additional_nodes.append(master_node)
3216         vf_node_info.append(self.all_node_info[master_node])
3217       # Add the first vm_capable node we find which is not included,
3218       # excluding the master node (which we already have)
3219       for node in absent_nodes:
3220         nodeinfo = self.all_node_info[node]
3221         if (nodeinfo.vm_capable and not nodeinfo.offline and
3222             node != master_node):
3223           additional_nodes.append(node)
3224           vf_node_info.append(self.all_node_info[node])
3225           break
3226       key = constants.NV_FILELIST
3227       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3228                                                  {key: node_verify_param[key]},
3229                                                  self.cfg.GetClusterName()))
3230     else:
3231       vf_nvinfo = all_nvinfo
3232       vf_node_info = self.my_node_info.values()
3233
3234     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3235
3236     feedback_fn("* Verifying node status")
3237
3238     refos_img = None
3239
3240     for node_i in node_data_list:
3241       node = node_i.name
3242       nimg = node_image[node]
3243
3244       if node_i.offline:
3245         if verbose:
3246           feedback_fn("* Skipping offline node %s" % (node,))
3247         n_offline += 1
3248         continue
3249
3250       if node == master_node:
3251         ntype = "master"
3252       elif node_i.master_candidate:
3253         ntype = "master candidate"
3254       elif node_i.drained:
3255         ntype = "drained"
3256         n_drained += 1
3257       else:
3258         ntype = "regular"
3259       if verbose:
3260         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3261
3262       msg = all_nvinfo[node].fail_msg
3263       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3264                msg)
3265       if msg:
3266         nimg.rpc_fail = True
3267         continue
3268
3269       nresult = all_nvinfo[node].payload
3270
3271       nimg.call_ok = self._VerifyNode(node_i, nresult)
3272       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3273       self._VerifyNodeNetwork(node_i, nresult)
3274       self._VerifyNodeUserScripts(node_i, nresult)
3275       self._VerifyOob(node_i, nresult)
3276
3277       if nimg.vm_capable:
3278         self._VerifyNodeLVM(node_i, nresult, vg_name)
3279         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3280                              all_drbd_map)
3281
3282         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3283         self._UpdateNodeInstances(node_i, nresult, nimg)
3284         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3285         self._UpdateNodeOS(node_i, nresult, nimg)
3286
3287         if not nimg.os_fail:
3288           if refos_img is None:
3289             refos_img = nimg
3290           self._VerifyNodeOS(node_i, nimg, refos_img)
3291         self._VerifyNodeBridges(node_i, nresult, bridges)
3292
3293         # Check whether all running instancies are primary for the node. (This
3294         # can no longer be done from _VerifyInstance below, since some of the
3295         # wrong instances could be from other node groups.)
3296         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3297
3298         for inst in non_primary_inst:
3299           test = inst in self.all_inst_info
3300           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3301                    "instance should not run on node %s", node_i.name)
3302           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3303                    "node is running unknown instance %s", inst)
3304
3305     for node, result in extra_lv_nvinfo.items():
3306       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3307                               node_image[node], vg_name)
3308
3309     feedback_fn("* Verifying instance status")
3310     for instance in self.my_inst_names:
3311       if verbose:
3312         feedback_fn("* Verifying instance %s" % instance)
3313       inst_config = self.my_inst_info[instance]
3314       self._VerifyInstance(instance, inst_config, node_image,
3315                            instdisk[instance])
3316       inst_nodes_offline = []
3317
3318       pnode = inst_config.primary_node
3319       pnode_img = node_image[pnode]
3320       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3321                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3322                " primary node failed", instance)
3323
3324       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3325                pnode_img.offline,
3326                constants.CV_EINSTANCEBADNODE, instance,
3327                "instance is marked as running and lives on offline node %s",
3328                inst_config.primary_node)
3329
3330       # If the instance is non-redundant we cannot survive losing its primary
3331       # node, so we are not N+1 compliant. On the other hand we have no disk
3332       # templates with more than one secondary so that situation is not well
3333       # supported either.
3334       # FIXME: does not support file-backed instances
3335       if not inst_config.secondary_nodes:
3336         i_non_redundant.append(instance)
3337
3338       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3339                constants.CV_EINSTANCELAYOUT,
3340                instance, "instance has multiple secondary nodes: %s",
3341                utils.CommaJoin(inst_config.secondary_nodes),
3342                code=self.ETYPE_WARNING)
3343
3344       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3345         pnode = inst_config.primary_node
3346         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3347         instance_groups = {}
3348
3349         for node in instance_nodes:
3350           instance_groups.setdefault(self.all_node_info[node].group,
3351                                      []).append(node)
3352
3353         pretty_list = [
3354           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3355           # Sort so that we always list the primary node first.
3356           for group, nodes in sorted(instance_groups.items(),
3357                                      key=lambda (_, nodes): pnode in nodes,
3358                                      reverse=True)]
3359
3360         self._ErrorIf(len(instance_groups) > 1,
3361                       constants.CV_EINSTANCESPLITGROUPS,
3362                       instance, "instance has primary and secondary nodes in"
3363                       " different groups: %s", utils.CommaJoin(pretty_list),
3364                       code=self.ETYPE_WARNING)
3365
3366       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3367         i_non_a_balanced.append(instance)
3368
3369       for snode in inst_config.secondary_nodes:
3370         s_img = node_image[snode]
3371         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3372                  snode, "instance %s, connection to secondary node failed",
3373                  instance)
3374
3375         if s_img.offline:
3376           inst_nodes_offline.append(snode)
3377
3378       # warn that the instance lives on offline nodes
3379       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3380                "instance has offline secondary node(s) %s",
3381                utils.CommaJoin(inst_nodes_offline))
3382       # ... or ghost/non-vm_capable nodes
3383       for node in inst_config.all_nodes:
3384         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3385                  instance, "instance lives on ghost node %s", node)
3386         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3387                  instance, "instance lives on non-vm_capable node %s", node)
3388
3389     feedback_fn("* Verifying orphan volumes")
3390     reserved = utils.FieldSet(*cluster.reserved_lvs)
3391
3392     # We will get spurious "unknown volume" warnings if any node of this group
3393     # is secondary for an instance whose primary is in another group. To avoid
3394     # them, we find these instances and add their volumes to node_vol_should.
3395     for inst in self.all_inst_info.values():
3396       for secondary in inst.secondary_nodes:
3397         if (secondary in self.my_node_info
3398             and inst.name not in self.my_inst_info):
3399           inst.MapLVsByNode(node_vol_should)
3400           break
3401
3402     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3403
3404     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3405       feedback_fn("* Verifying N+1 Memory redundancy")
3406       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3407
3408     feedback_fn("* Other Notes")
3409     if i_non_redundant:
3410       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3411                   % len(i_non_redundant))
3412
3413     if i_non_a_balanced:
3414       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3415                   % len(i_non_a_balanced))
3416
3417     if i_offline:
3418       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3419
3420     if n_offline:
3421       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3422
3423     if n_drained:
3424       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3425
3426     return not self.bad
3427
3428   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3429     """Analyze the post-hooks' result
3430
3431     This method analyses the hook result, handles it, and sends some
3432     nicely-formatted feedback back to the user.
3433
3434     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3435         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3436     @param hooks_results: the results of the multi-node hooks rpc call
3437     @param feedback_fn: function used send feedback back to the caller
3438     @param lu_result: previous Exec result
3439     @return: the new Exec result, based on the previous result
3440         and hook results
3441
3442     """
3443     # We only really run POST phase hooks, only for non-empty groups,
3444     # and are only interested in their results
3445     if not self.my_node_names:
3446       # empty node group
3447       pass
3448     elif phase == constants.HOOKS_PHASE_POST:
3449       # Used to change hooks' output to proper indentation
3450       feedback_fn("* Hooks Results")
3451       assert hooks_results, "invalid result from hooks"
3452
3453       for node_name in hooks_results:
3454         res = hooks_results[node_name]
3455         msg = res.fail_msg
3456         test = msg and not res.offline
3457         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3458                       "Communication failure in hooks execution: %s", msg)
3459         if res.offline or msg:
3460           # No need to investigate payload if node is offline or gave
3461           # an error.
3462           continue
3463         for script, hkr, output in res.payload:
3464           test = hkr == constants.HKR_FAIL
3465           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3466                         "Script %s failed, output:", script)
3467           if test:
3468             output = self._HOOKS_INDENT_RE.sub("      ", output)
3469             feedback_fn("%s" % output)
3470             lu_result = False
3471
3472     return lu_result
3473
3474
3475 class LUClusterVerifyDisks(NoHooksLU):
3476   """Verifies the cluster disks status.
3477
3478   """
3479   REQ_BGL = False
3480
3481   def ExpandNames(self):
3482     self.share_locks = _ShareAll()
3483     self.needed_locks = {
3484       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3485       }
3486
3487   def Exec(self, feedback_fn):
3488     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3489
3490     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3491     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3492                            for group in group_names])
3493
3494
3495 class LUGroupVerifyDisks(NoHooksLU):
3496   """Verifies the status of all disks in a node group.
3497
3498   """
3499   REQ_BGL = False
3500
3501   def ExpandNames(self):
3502     # Raises errors.OpPrereqError on its own if group can't be found
3503     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3504
3505     self.share_locks = _ShareAll()
3506     self.needed_locks = {
3507       locking.LEVEL_INSTANCE: [],
3508       locking.LEVEL_NODEGROUP: [],
3509       locking.LEVEL_NODE: [],
3510       }
3511
3512   def DeclareLocks(self, level):
3513     if level == locking.LEVEL_INSTANCE:
3514       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3515
3516       # Lock instances optimistically, needs verification once node and group
3517       # locks have been acquired
3518       self.needed_locks[locking.LEVEL_INSTANCE] = \
3519         self.cfg.GetNodeGroupInstances(self.group_uuid)
3520
3521     elif level == locking.LEVEL_NODEGROUP:
3522       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3523
3524       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3525         set([self.group_uuid] +
3526             # Lock all groups used by instances optimistically; this requires
3527             # going via the node before it's locked, requiring verification
3528             # later on
3529             [group_uuid
3530              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3531              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3532
3533     elif level == locking.LEVEL_NODE:
3534       # This will only lock the nodes in the group to be verified which contain
3535       # actual instances
3536       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3537       self._LockInstancesNodes()
3538
3539       # Lock all nodes in group to be verified
3540       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3541       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3542       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3543
3544   def CheckPrereq(self):
3545     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3546     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3547     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3548
3549     assert self.group_uuid in owned_groups
3550
3551     # Check if locked instances are still correct
3552     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3553
3554     # Get instance information
3555     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3556
3557     # Check if node groups for locked instances are still correct
3558     _CheckInstancesNodeGroups(self.cfg, self.instances,
3559                               owned_groups, owned_nodes, self.group_uuid)
3560
3561   def Exec(self, feedback_fn):
3562     """Verify integrity of cluster disks.
3563
3564     @rtype: tuple of three items
3565     @return: a tuple of (dict of node-to-node_error, list of instances
3566         which need activate-disks, dict of instance: (node, volume) for
3567         missing volumes
3568
3569     """
3570     res_nodes = {}
3571     res_instances = set()
3572     res_missing = {}
3573
3574     nv_dict = _MapInstanceDisksToNodes([inst
3575             for inst in self.instances.values()
3576             if inst.admin_state == constants.ADMINST_UP])
3577
3578     if nv_dict:
3579       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3580                              set(self.cfg.GetVmCapableNodeList()))
3581
3582       node_lvs = self.rpc.call_lv_list(nodes, [])
3583
3584       for (node, node_res) in node_lvs.items():
3585         if node_res.offline:
3586           continue
3587
3588         msg = node_res.fail_msg
3589         if msg:
3590           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3591           res_nodes[node] = msg
3592           continue
3593
3594         for lv_name, (_, _, lv_online) in node_res.payload.items():
3595           inst = nv_dict.pop((node, lv_name), None)
3596           if not (lv_online or inst is None):
3597             res_instances.add(inst)
3598
3599       # any leftover items in nv_dict are missing LVs, let's arrange the data
3600       # better
3601       for key, inst in nv_dict.iteritems():
3602         res_missing.setdefault(inst, []).append(list(key))
3603
3604     return (res_nodes, list(res_instances), res_missing)
3605
3606
3607 class LUClusterRepairDiskSizes(NoHooksLU):
3608   """Verifies the cluster disks sizes.
3609
3610   """
3611   REQ_BGL = False
3612
3613   def ExpandNames(self):
3614     if self.op.instances:
3615       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3616       self.needed_locks = {
3617         locking.LEVEL_NODE_RES: [],
3618         locking.LEVEL_INSTANCE: self.wanted_names,
3619         }
3620       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3621     else:
3622       self.wanted_names = None
3623       self.needed_locks = {
3624         locking.LEVEL_NODE_RES: locking.ALL_SET,
3625         locking.LEVEL_INSTANCE: locking.ALL_SET,
3626         }
3627     self.share_locks = {
3628       locking.LEVEL_NODE_RES: 1,
3629       locking.LEVEL_INSTANCE: 0,
3630       }
3631
3632   def DeclareLocks(self, level):
3633     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3634       self._LockInstancesNodes(primary_only=True, level=level)
3635
3636   def CheckPrereq(self):
3637     """Check prerequisites.
3638
3639     This only checks the optional instance list against the existing names.
3640
3641     """
3642     if self.wanted_names is None:
3643       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3644
3645     self.wanted_instances = \
3646         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3647
3648   def _EnsureChildSizes(self, disk):
3649     """Ensure children of the disk have the needed disk size.
3650
3651     This is valid mainly for DRBD8 and fixes an issue where the
3652     children have smaller disk size.
3653
3654     @param disk: an L{ganeti.objects.Disk} object
3655
3656     """
3657     if disk.dev_type == constants.LD_DRBD8:
3658       assert disk.children, "Empty children for DRBD8?"
3659       fchild = disk.children[0]
3660       mismatch = fchild.size < disk.size
3661       if mismatch:
3662         self.LogInfo("Child disk has size %d, parent %d, fixing",
3663                      fchild.size, disk.size)
3664         fchild.size = disk.size
3665
3666       # and we recurse on this child only, not on the metadev
3667       return self._EnsureChildSizes(fchild) or mismatch
3668     else:
3669       return False
3670
3671   def Exec(self, feedback_fn):
3672     """Verify the size of cluster disks.
3673
3674     """
3675     # TODO: check child disks too
3676     # TODO: check differences in size between primary/secondary nodes
3677     per_node_disks = {}
3678     for instance in self.wanted_instances:
3679       pnode = instance.primary_node
3680       if pnode not in per_node_disks:
3681         per_node_disks[pnode] = []
3682       for idx, disk in enumerate(instance.disks):
3683         per_node_disks[pnode].append((instance, idx, disk))
3684
3685     assert not (frozenset(per_node_disks.keys()) -
3686                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3687       "Not owning correct locks"
3688     assert not self.owned_locks(locking.LEVEL_NODE)
3689
3690     changed = []
3691     for node, dskl in per_node_disks.items():
3692       newl = [v[2].Copy() for v in dskl]
3693       for dsk in newl:
3694         self.cfg.SetDiskID(dsk, node)
3695       result = self.rpc.call_blockdev_getsize(node, newl)
3696       if result.fail_msg:
3697         self.LogWarning("Failure in blockdev_getsize call to node"
3698                         " %s, ignoring", node)
3699         continue
3700       if len(result.payload) != len(dskl):
3701         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3702                         " result.payload=%s", node, len(dskl), result.payload)
3703         self.LogWarning("Invalid result from node %s, ignoring node results",
3704                         node)
3705         continue
3706       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3707         if size is None:
3708           self.LogWarning("Disk %d of instance %s did not return size"
3709                           " information, ignoring", idx, instance.name)
3710           continue
3711         if not isinstance(size, (int, long)):
3712           self.LogWarning("Disk %d of instance %s did not return valid"
3713                           " size information, ignoring", idx, instance.name)
3714           continue
3715         size = size >> 20
3716         if size != disk.size:
3717           self.LogInfo("Disk %d of instance %s has mismatched size,"
3718                        " correcting: recorded %d, actual %d", idx,
3719                        instance.name, disk.size, size)
3720           disk.size = size
3721           self.cfg.Update(instance, feedback_fn)
3722           changed.append((instance.name, idx, size))
3723         if self._EnsureChildSizes(disk):
3724           self.cfg.Update(instance, feedback_fn)
3725           changed.append((instance.name, idx, disk.size))
3726     return changed
3727
3728
3729 class LUClusterRename(LogicalUnit):
3730   """Rename the cluster.
3731
3732   """
3733   HPATH = "cluster-rename"
3734   HTYPE = constants.HTYPE_CLUSTER
3735
3736   def BuildHooksEnv(self):
3737     """Build hooks env.
3738
3739     """
3740     return {
3741       "OP_TARGET": self.cfg.GetClusterName(),
3742       "NEW_NAME": self.op.name,
3743       }
3744
3745   def BuildHooksNodes(self):
3746     """Build hooks nodes.
3747
3748     """
3749     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3750
3751   def CheckPrereq(self):
3752     """Verify that the passed name is a valid one.
3753
3754     """
3755     hostname = netutils.GetHostname(name=self.op.name,
3756                                     family=self.cfg.GetPrimaryIPFamily())
3757
3758     new_name = hostname.name
3759     self.ip = new_ip = hostname.ip
3760     old_name = self.cfg.GetClusterName()
3761     old_ip = self.cfg.GetMasterIP()
3762     if new_name == old_name and new_ip == old_ip:
3763       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3764                                  " cluster has changed",
3765                                  errors.ECODE_INVAL)
3766     if new_ip != old_ip:
3767       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3768         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3769                                    " reachable on the network" %
3770                                    new_ip, errors.ECODE_NOTUNIQUE)
3771
3772     self.op.name = new_name
3773
3774   def Exec(self, feedback_fn):
3775     """Rename the cluster.
3776
3777     """
3778     clustername = self.op.name
3779     new_ip = self.ip
3780
3781     # shutdown the master IP
3782     master_params = self.cfg.GetMasterNetworkParameters()
3783     ems = self.cfg.GetUseExternalMipScript()
3784     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3785                                                      master_params, ems)
3786     result.Raise("Could not disable the master role")
3787
3788     try:
3789       cluster = self.cfg.GetClusterInfo()
3790       cluster.cluster_name = clustername
3791       cluster.master_ip = new_ip
3792       self.cfg.Update(cluster, feedback_fn)
3793
3794       # update the known hosts file
3795       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3796       node_list = self.cfg.GetOnlineNodeList()
3797       try:
3798         node_list.remove(master_params.name)
3799       except ValueError:
3800         pass
3801       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3802     finally:
3803       master_params.ip = new_ip
3804       result = self.rpc.call_node_activate_master_ip(master_params.name,
3805                                                      master_params, ems)
3806       msg = result.fail_msg
3807       if msg:
3808         self.LogWarning("Could not re-enable the master role on"
3809                         " the master, please restart manually: %s", msg)
3810
3811     return clustername
3812
3813
3814 def _ValidateNetmask(cfg, netmask):
3815   """Checks if a netmask is valid.
3816
3817   @type cfg: L{config.ConfigWriter}
3818   @param cfg: The cluster configuration
3819   @type netmask: int
3820   @param netmask: the netmask to be verified
3821   @raise errors.OpPrereqError: if the validation fails
3822
3823   """
3824   ip_family = cfg.GetPrimaryIPFamily()
3825   try:
3826     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3827   except errors.ProgrammerError:
3828     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3829                                ip_family)
3830   if not ipcls.ValidateNetmask(netmask):
3831     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3832                                 (netmask))
3833
3834
3835 class LUClusterSetParams(LogicalUnit):
3836   """Change the parameters of the cluster.
3837
3838   """
3839   HPATH = "cluster-modify"
3840   HTYPE = constants.HTYPE_CLUSTER
3841   REQ_BGL = False
3842
3843   def CheckArguments(self):
3844     """Check parameters
3845
3846     """
3847     if self.op.uid_pool:
3848       uidpool.CheckUidPool(self.op.uid_pool)
3849
3850     if self.op.add_uids:
3851       uidpool.CheckUidPool(self.op.add_uids)
3852
3853     if self.op.remove_uids:
3854       uidpool.CheckUidPool(self.op.remove_uids)
3855
3856     if self.op.master_netmask is not None:
3857       _ValidateNetmask(self.cfg, self.op.master_netmask)
3858
3859     if self.op.diskparams:
3860       for dt_params in self.op.diskparams.values():
3861         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3862       try:
3863         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3864       except errors.OpPrereqError, err:
3865         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3866                                    errors.ECODE_INVAL)
3867
3868   def ExpandNames(self):
3869     # FIXME: in the future maybe other cluster params won't require checking on
3870     # all nodes to be modified.
3871     self.needed_locks = {
3872       locking.LEVEL_NODE: locking.ALL_SET,
3873       locking.LEVEL_INSTANCE: locking.ALL_SET,
3874       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3875     }
3876     self.share_locks = {
3877         locking.LEVEL_NODE: 1,
3878         locking.LEVEL_INSTANCE: 1,
3879         locking.LEVEL_NODEGROUP: 1,
3880     }
3881
3882   def BuildHooksEnv(self):
3883     """Build hooks env.
3884
3885     """
3886     return {
3887       "OP_TARGET": self.cfg.GetClusterName(),
3888       "NEW_VG_NAME": self.op.vg_name,
3889       }
3890
3891   def BuildHooksNodes(self):
3892     """Build hooks nodes.
3893
3894     """
3895     mn = self.cfg.GetMasterNode()
3896     return ([mn], [mn])
3897
3898   def CheckPrereq(self):
3899     """Check prerequisites.
3900
3901     This checks whether the given params don't conflict and
3902     if the given volume group is valid.
3903
3904     """
3905     if self.op.vg_name is not None and not self.op.vg_name:
3906       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3907         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3908                                    " instances exist", errors.ECODE_INVAL)
3909
3910     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3911       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3912         raise errors.OpPrereqError("Cannot disable drbd helper while"
3913                                    " drbd-based instances exist",
3914                                    errors.ECODE_INVAL)
3915
3916     node_list = self.owned_locks(locking.LEVEL_NODE)
3917
3918     # if vg_name not None, checks given volume group on all nodes
3919     if self.op.vg_name:
3920       vglist = self.rpc.call_vg_list(node_list)
3921       for node in node_list:
3922         msg = vglist[node].fail_msg
3923         if msg:
3924           # ignoring down node
3925           self.LogWarning("Error while gathering data on node %s"
3926                           " (ignoring node): %s", node, msg)
3927           continue
3928         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3929                                               self.op.vg_name,
3930                                               constants.MIN_VG_SIZE)
3931         if vgstatus:
3932           raise errors.OpPrereqError("Error on node '%s': %s" %
3933                                      (node, vgstatus), errors.ECODE_ENVIRON)
3934
3935     if self.op.drbd_helper:
3936       # checks given drbd helper on all nodes
3937       helpers = self.rpc.call_drbd_helper(node_list)
3938       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3939         if ninfo.offline:
3940           self.LogInfo("Not checking drbd helper on offline node %s", node)
3941           continue
3942         msg = helpers[node].fail_msg
3943         if msg:
3944           raise errors.OpPrereqError("Error checking drbd helper on node"
3945                                      " '%s': %s" % (node, msg),
3946                                      errors.ECODE_ENVIRON)
3947         node_helper = helpers[node].payload
3948         if node_helper != self.op.drbd_helper:
3949           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3950                                      (node, node_helper), errors.ECODE_ENVIRON)
3951
3952     self.cluster = cluster = self.cfg.GetClusterInfo()
3953     # validate params changes
3954     if self.op.beparams:
3955       objects.UpgradeBeParams(self.op.beparams)
3956       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3957       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3958
3959     if self.op.ndparams:
3960       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3961       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3962
3963       # TODO: we need a more general way to handle resetting
3964       # cluster-level parameters to default values
3965       if self.new_ndparams["oob_program"] == "":
3966         self.new_ndparams["oob_program"] = \
3967             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3968
3969     if self.op.hv_state:
3970       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3971                                             self.cluster.hv_state_static)
3972       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3973                                for hv, values in new_hv_state.items())
3974
3975     if self.op.disk_state:
3976       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3977                                                 self.cluster.disk_state_static)
3978       self.new_disk_state = \
3979         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3980                             for name, values in svalues.items()))
3981              for storage, svalues in new_disk_state.items())
3982
3983     if self.op.ipolicy:
3984       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3985                                             group_policy=False)
3986
3987       all_instances = self.cfg.GetAllInstancesInfo().values()
3988       violations = set()
3989       for group in self.cfg.GetAllNodeGroupsInfo().values():
3990         instances = frozenset([inst for inst in all_instances
3991                                if compat.any(node in group.members
3992                                              for node in inst.all_nodes)])
3993         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3994         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3995                                                                    group),
3996                                             new_ipolicy, instances)
3997         if new:
3998           violations.update(new)
3999
4000       if violations:
4001         self.LogWarning("After the ipolicy change the following instances"
4002                         " violate them: %s",
4003                         utils.CommaJoin(utils.NiceSort(violations)))
4004
4005     if self.op.nicparams:
4006       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4007       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4008       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4009       nic_errors = []
4010
4011       # check all instances for consistency
4012       for instance in self.cfg.GetAllInstancesInfo().values():
4013         for nic_idx, nic in enumerate(instance.nics):
4014           params_copy = copy.deepcopy(nic.nicparams)
4015           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4016
4017           # check parameter syntax
4018           try:
4019             objects.NIC.CheckParameterSyntax(params_filled)
4020           except errors.ConfigurationError, err:
4021             nic_errors.append("Instance %s, nic/%d: %s" %
4022                               (instance.name, nic_idx, err))
4023
4024           # if we're moving instances to routed, check that they have an ip
4025           target_mode = params_filled[constants.NIC_MODE]
4026           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4027             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4028                               " address" % (instance.name, nic_idx))
4029       if nic_errors:
4030         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4031                                    "\n".join(nic_errors))
4032
4033     # hypervisor list/parameters
4034     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4035     if self.op.hvparams:
4036       for hv_name, hv_dict in self.op.hvparams.items():
4037         if hv_name not in self.new_hvparams:
4038           self.new_hvparams[hv_name] = hv_dict
4039         else:
4040           self.new_hvparams[hv_name].update(hv_dict)
4041
4042     # disk template parameters
4043     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4044     if self.op.diskparams:
4045       for dt_name, dt_params in self.op.diskparams.items():
4046         if dt_name not in self.op.diskparams:
4047           self.new_diskparams[dt_name] = dt_params
4048         else:
4049           self.new_diskparams[dt_name].update(dt_params)
4050
4051     # os hypervisor parameters
4052     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4053     if self.op.os_hvp:
4054       for os_name, hvs in self.op.os_hvp.items():
4055         if os_name not in self.new_os_hvp:
4056           self.new_os_hvp[os_name] = hvs
4057         else:
4058           for hv_name, hv_dict in hvs.items():
4059             if hv_name not in self.new_os_hvp[os_name]:
4060               self.new_os_hvp[os_name][hv_name] = hv_dict
4061             else:
4062               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4063
4064     # os parameters
4065     self.new_osp = objects.FillDict(cluster.osparams, {})
4066     if self.op.osparams:
4067       for os_name, osp in self.op.osparams.items():
4068         if os_name not in self.new_osp:
4069           self.new_osp[os_name] = {}
4070
4071         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4072                                                   use_none=True)
4073
4074         if not self.new_osp[os_name]:
4075           # we removed all parameters
4076           del self.new_osp[os_name]
4077         else:
4078           # check the parameter validity (remote check)
4079           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4080                          os_name, self.new_osp[os_name])
4081
4082     # changes to the hypervisor list
4083     if self.op.enabled_hypervisors is not None:
4084       self.hv_list = self.op.enabled_hypervisors
4085       for hv in self.hv_list:
4086         # if the hypervisor doesn't already exist in the cluster
4087         # hvparams, we initialize it to empty, and then (in both
4088         # cases) we make sure to fill the defaults, as we might not
4089         # have a complete defaults list if the hypervisor wasn't
4090         # enabled before
4091         if hv not in new_hvp:
4092           new_hvp[hv] = {}
4093         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4094         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4095     else:
4096       self.hv_list = cluster.enabled_hypervisors
4097
4098     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4099       # either the enabled list has changed, or the parameters have, validate
4100       for hv_name, hv_params in self.new_hvparams.items():
4101         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4102             (self.op.enabled_hypervisors and
4103              hv_name in self.op.enabled_hypervisors)):
4104           # either this is a new hypervisor, or its parameters have changed
4105           hv_class = hypervisor.GetHypervisor(hv_name)
4106           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4107           hv_class.CheckParameterSyntax(hv_params)
4108           _CheckHVParams(self, node_list, hv_name, hv_params)
4109
4110     if self.op.os_hvp:
4111       # no need to check any newly-enabled hypervisors, since the
4112       # defaults have already been checked in the above code-block
4113       for os_name, os_hvp in self.new_os_hvp.items():
4114         for hv_name, hv_params in os_hvp.items():
4115           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4116           # we need to fill in the new os_hvp on top of the actual hv_p
4117           cluster_defaults = self.new_hvparams.get(hv_name, {})
4118           new_osp = objects.FillDict(cluster_defaults, hv_params)
4119           hv_class = hypervisor.GetHypervisor(hv_name)
4120           hv_class.CheckParameterSyntax(new_osp)
4121           _CheckHVParams(self, node_list, hv_name, new_osp)
4122
4123     if self.op.default_iallocator:
4124       alloc_script = utils.FindFile(self.op.default_iallocator,
4125                                     constants.IALLOCATOR_SEARCH_PATH,
4126                                     os.path.isfile)
4127       if alloc_script is None:
4128         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4129                                    " specified" % self.op.default_iallocator,
4130                                    errors.ECODE_INVAL)
4131
4132   def Exec(self, feedback_fn):
4133     """Change the parameters of the cluster.
4134
4135     """
4136     if self.op.vg_name is not None:
4137       new_volume = self.op.vg_name
4138       if not new_volume:
4139         new_volume = None
4140       if new_volume != self.cfg.GetVGName():
4141         self.cfg.SetVGName(new_volume)
4142       else:
4143         feedback_fn("Cluster LVM configuration already in desired"
4144                     " state, not changing")
4145     if self.op.drbd_helper is not None:
4146       new_helper = self.op.drbd_helper
4147       if not new_helper:
4148         new_helper = None
4149       if new_helper != self.cfg.GetDRBDHelper():
4150         self.cfg.SetDRBDHelper(new_helper)
4151       else:
4152         feedback_fn("Cluster DRBD helper already in desired state,"
4153                     " not changing")
4154     if self.op.hvparams:
4155       self.cluster.hvparams = self.new_hvparams
4156     if self.op.os_hvp:
4157       self.cluster.os_hvp = self.new_os_hvp
4158     if self.op.enabled_hypervisors is not None:
4159       self.cluster.hvparams = self.new_hvparams
4160       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4161     if self.op.beparams:
4162       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4163     if self.op.nicparams:
4164       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4165     if self.op.ipolicy:
4166       self.cluster.ipolicy = self.new_ipolicy
4167     if self.op.osparams:
4168       self.cluster.osparams = self.new_osp
4169     if self.op.ndparams:
4170       self.cluster.ndparams = self.new_ndparams
4171     if self.op.diskparams:
4172       self.cluster.diskparams = self.new_diskparams
4173     if self.op.hv_state:
4174       self.cluster.hv_state_static = self.new_hv_state
4175     if self.op.disk_state:
4176       self.cluster.disk_state_static = self.new_disk_state
4177
4178     if self.op.candidate_pool_size is not None:
4179       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4180       # we need to update the pool size here, otherwise the save will fail
4181       _AdjustCandidatePool(self, [])
4182
4183     if self.op.maintain_node_health is not None:
4184       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4185         feedback_fn("Note: CONFD was disabled at build time, node health"
4186                     " maintenance is not useful (still enabling it)")
4187       self.cluster.maintain_node_health = self.op.maintain_node_health
4188
4189     if self.op.prealloc_wipe_disks is not None:
4190       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4191
4192     if self.op.add_uids is not None:
4193       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4194
4195     if self.op.remove_uids is not None:
4196       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4197
4198     if self.op.uid_pool is not None:
4199       self.cluster.uid_pool = self.op.uid_pool
4200
4201     if self.op.default_iallocator is not None:
4202       self.cluster.default_iallocator = self.op.default_iallocator
4203
4204     if self.op.reserved_lvs is not None:
4205       self.cluster.reserved_lvs = self.op.reserved_lvs
4206
4207     if self.op.use_external_mip_script is not None:
4208       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4209
4210     def helper_os(aname, mods, desc):
4211       desc += " OS list"
4212       lst = getattr(self.cluster, aname)
4213       for key, val in mods:
4214         if key == constants.DDM_ADD:
4215           if val in lst:
4216             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4217           else:
4218             lst.append(val)
4219         elif key == constants.DDM_REMOVE:
4220           if val in lst:
4221             lst.remove(val)
4222           else:
4223             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4224         else:
4225           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4226
4227     if self.op.hidden_os:
4228       helper_os("hidden_os", self.op.hidden_os, "hidden")
4229
4230     if self.op.blacklisted_os:
4231       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4232
4233     if self.op.master_netdev:
4234       master_params = self.cfg.GetMasterNetworkParameters()
4235       ems = self.cfg.GetUseExternalMipScript()
4236       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4237                   self.cluster.master_netdev)
4238       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4239                                                        master_params, ems)
4240       result.Raise("Could not disable the master ip")
4241       feedback_fn("Changing master_netdev from %s to %s" %
4242                   (master_params.netdev, self.op.master_netdev))
4243       self.cluster.master_netdev = self.op.master_netdev
4244
4245     if self.op.master_netmask:
4246       master_params = self.cfg.GetMasterNetworkParameters()
4247       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4248       result = self.rpc.call_node_change_master_netmask(master_params.name,
4249                                                         master_params.netmask,
4250                                                         self.op.master_netmask,
4251                                                         master_params.ip,
4252                                                         master_params.netdev)
4253       if result.fail_msg:
4254         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4255         feedback_fn(msg)
4256
4257       self.cluster.master_netmask = self.op.master_netmask
4258
4259     self.cfg.Update(self.cluster, feedback_fn)
4260
4261     if self.op.master_netdev:
4262       master_params = self.cfg.GetMasterNetworkParameters()
4263       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4264                   self.op.master_netdev)
4265       ems = self.cfg.GetUseExternalMipScript()
4266       result = self.rpc.call_node_activate_master_ip(master_params.name,
4267                                                      master_params, ems)
4268       if result.fail_msg:
4269         self.LogWarning("Could not re-enable the master ip on"
4270                         " the master, please restart manually: %s",
4271                         result.fail_msg)
4272
4273
4274 def _UploadHelper(lu, nodes, fname):
4275   """Helper for uploading a file and showing warnings.
4276
4277   """
4278   if os.path.exists(fname):
4279     result = lu.rpc.call_upload_file(nodes, fname)
4280     for to_node, to_result in result.items():
4281       msg = to_result.fail_msg
4282       if msg:
4283         msg = ("Copy of file %s to node %s failed: %s" %
4284                (fname, to_node, msg))
4285         lu.proc.LogWarning(msg)
4286
4287
4288 def _ComputeAncillaryFiles(cluster, redist):
4289   """Compute files external to Ganeti which need to be consistent.
4290
4291   @type redist: boolean
4292   @param redist: Whether to include files which need to be redistributed
4293
4294   """
4295   # Compute files for all nodes
4296   files_all = set([
4297     constants.SSH_KNOWN_HOSTS_FILE,
4298     constants.CONFD_HMAC_KEY,
4299     constants.CLUSTER_DOMAIN_SECRET_FILE,
4300     constants.SPICE_CERT_FILE,
4301     constants.SPICE_CACERT_FILE,
4302     constants.RAPI_USERS_FILE,
4303     ])
4304
4305   if not redist:
4306     files_all.update(constants.ALL_CERT_FILES)
4307     files_all.update(ssconf.SimpleStore().GetFileList())
4308   else:
4309     # we need to ship at least the RAPI certificate
4310     files_all.add(constants.RAPI_CERT_FILE)
4311
4312   if cluster.modify_etc_hosts:
4313     files_all.add(constants.ETC_HOSTS)
4314
4315   if cluster.use_external_mip_script:
4316     files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4317
4318   # Files which are optional, these must:
4319   # - be present in one other category as well
4320   # - either exist or not exist on all nodes of that category (mc, vm all)
4321   files_opt = set([
4322     constants.RAPI_USERS_FILE,
4323     ])
4324
4325   # Files which should only be on master candidates
4326   files_mc = set()
4327
4328   if not redist:
4329     files_mc.add(constants.CLUSTER_CONF_FILE)
4330
4331   # Files which should only be on VM-capable nodes
4332   files_vm = set(filename
4333     for hv_name in cluster.enabled_hypervisors
4334     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4335
4336   files_opt |= set(filename
4337     for hv_name in cluster.enabled_hypervisors
4338     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4339
4340   # Filenames in each category must be unique
4341   all_files_set = files_all | files_mc | files_vm
4342   assert (len(all_files_set) ==
4343           sum(map(len, [files_all, files_mc, files_vm]))), \
4344          "Found file listed in more than one file list"
4345
4346   # Optional files must be present in one other category
4347   assert all_files_set.issuperset(files_opt), \
4348          "Optional file not in a different required list"
4349
4350   return (files_all, files_opt, files_mc, files_vm)
4351
4352
4353 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4354   """Distribute additional files which are part of the cluster configuration.
4355
4356   ConfigWriter takes care of distributing the config and ssconf files, but
4357   there are more files which should be distributed to all nodes. This function
4358   makes sure those are copied.
4359
4360   @param lu: calling logical unit
4361   @param additional_nodes: list of nodes not in the config to distribute to
4362   @type additional_vm: boolean
4363   @param additional_vm: whether the additional nodes are vm-capable or not
4364
4365   """
4366   # Gather target nodes
4367   cluster = lu.cfg.GetClusterInfo()
4368   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4369
4370   online_nodes = lu.cfg.GetOnlineNodeList()
4371   online_set = frozenset(online_nodes)
4372   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4373
4374   if additional_nodes is not None:
4375     online_nodes.extend(additional_nodes)
4376     if additional_vm:
4377       vm_nodes.extend(additional_nodes)
4378
4379   # Never distribute to master node
4380   for nodelist in [online_nodes, vm_nodes]:
4381     if master_info.name in nodelist:
4382       nodelist.remove(master_info.name)
4383
4384   # Gather file lists
4385   (files_all, _, files_mc, files_vm) = \
4386     _ComputeAncillaryFiles(cluster, True)
4387
4388   # Never re-distribute configuration file from here
4389   assert not (constants.CLUSTER_CONF_FILE in files_all or
4390               constants.CLUSTER_CONF_FILE in files_vm)
4391   assert not files_mc, "Master candidates not handled in this function"
4392
4393   filemap = [
4394     (online_nodes, files_all),
4395     (vm_nodes, files_vm),
4396     ]
4397
4398   # Upload the files
4399   for (node_list, files) in filemap:
4400     for fname in files:
4401       _UploadHelper(lu, node_list, fname)
4402
4403
4404 class LUClusterRedistConf(NoHooksLU):
4405   """Force the redistribution of cluster configuration.
4406
4407   This is a very simple LU.
4408
4409   """
4410   REQ_BGL = False
4411
4412   def ExpandNames(self):
4413     self.needed_locks = {
4414       locking.LEVEL_NODE: locking.ALL_SET,
4415     }
4416     self.share_locks[locking.LEVEL_NODE] = 1
4417
4418   def Exec(self, feedback_fn):
4419     """Redistribute the configuration.
4420
4421     """
4422     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4423     _RedistributeAncillaryFiles(self)
4424
4425
4426 class LUClusterActivateMasterIp(NoHooksLU):
4427   """Activate the master IP on the master node.
4428
4429   """
4430   def Exec(self, feedback_fn):
4431     """Activate the master IP.
4432
4433     """
4434     master_params = self.cfg.GetMasterNetworkParameters()
4435     ems = self.cfg.GetUseExternalMipScript()
4436     result = self.rpc.call_node_activate_master_ip(master_params.name,
4437                                                    master_params, ems)
4438     result.Raise("Could not activate the master IP")
4439
4440
4441 class LUClusterDeactivateMasterIp(NoHooksLU):
4442   """Deactivate the master IP on the master node.
4443
4444   """
4445   def Exec(self, feedback_fn):
4446     """Deactivate the master IP.
4447
4448     """
4449     master_params = self.cfg.GetMasterNetworkParameters()
4450     ems = self.cfg.GetUseExternalMipScript()
4451     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4452                                                      master_params, ems)
4453     result.Raise("Could not deactivate the master IP")
4454
4455
4456 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4457   """Sleep and poll for an instance's disk to sync.
4458
4459   """
4460   if not instance.disks or disks is not None and not disks:
4461     return True
4462
4463   disks = _ExpandCheckDisks(instance, disks)
4464
4465   if not oneshot:
4466     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4467
4468   node = instance.primary_node
4469
4470   for dev in disks:
4471     lu.cfg.SetDiskID(dev, node)
4472
4473   # TODO: Convert to utils.Retry
4474
4475   retries = 0
4476   degr_retries = 10 # in seconds, as we sleep 1 second each time
4477   while True:
4478     max_time = 0
4479     done = True
4480     cumul_degraded = False
4481     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4482     msg = rstats.fail_msg
4483     if msg:
4484       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4485       retries += 1
4486       if retries >= 10:
4487         raise errors.RemoteError("Can't contact node %s for mirror data,"
4488                                  " aborting." % node)
4489       time.sleep(6)
4490       continue
4491     rstats = rstats.payload
4492     retries = 0
4493     for i, mstat in enumerate(rstats):
4494       if mstat is None:
4495         lu.LogWarning("Can't compute data for node %s/%s",
4496                            node, disks[i].iv_name)
4497         continue
4498
4499       cumul_degraded = (cumul_degraded or
4500                         (mstat.is_degraded and mstat.sync_percent is None))
4501       if mstat.sync_percent is not None:
4502         done = False
4503         if mstat.estimated_time is not None:
4504           rem_time = ("%s remaining (estimated)" %
4505                       utils.FormatSeconds(mstat.estimated_time))
4506           max_time = mstat.estimated_time
4507         else:
4508           rem_time = "no time estimate"
4509         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4510                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4511
4512     # if we're done but degraded, let's do a few small retries, to
4513     # make sure we see a stable and not transient situation; therefore
4514     # we force restart of the loop
4515     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4516       logging.info("Degraded disks found, %d retries left", degr_retries)
4517       degr_retries -= 1
4518       time.sleep(1)
4519       continue
4520
4521     if done or oneshot:
4522       break
4523
4524     time.sleep(min(60, max_time))
4525
4526   if done:
4527     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4528   return not cumul_degraded
4529
4530
4531 def _BlockdevFind(lu, node, dev, instance):
4532   """Wrapper around call_blockdev_find to annotate diskparams.
4533
4534   @param lu: A reference to the lu object
4535   @param node: The node to call out
4536   @param dev: The device to find
4537   @param instance: The instance object the device belongs to
4538   @returns The result of the rpc call
4539
4540   """
4541   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4542   return lu.rpc.call_blockdev_find(node, disk)
4543
4544
4545 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4546   """Wrapper around L{_CheckDiskConsistencyInner}.
4547
4548   """
4549   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4550   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4551                                     ldisk=ldisk)
4552
4553
4554 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4555                                ldisk=False):
4556   """Check that mirrors are not degraded.
4557
4558   @attention: The device has to be annotated already.
4559
4560   The ldisk parameter, if True, will change the test from the
4561   is_degraded attribute (which represents overall non-ok status for
4562   the device(s)) to the ldisk (representing the local storage status).
4563
4564   """
4565   lu.cfg.SetDiskID(dev, node)
4566
4567   result = True
4568
4569   if on_primary or dev.AssembleOnSecondary():
4570     rstats = lu.rpc.call_blockdev_find(node, dev)
4571     msg = rstats.fail_msg
4572     if msg:
4573       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4574       result = False
4575     elif not rstats.payload:
4576       lu.LogWarning("Can't find disk on node %s", node)
4577       result = False
4578     else:
4579       if ldisk:
4580         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4581       else:
4582         result = result and not rstats.payload.is_degraded
4583
4584   if dev.children:
4585     for child in dev.children:
4586       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4587                                                      on_primary)
4588
4589   return result
4590
4591
4592 class LUOobCommand(NoHooksLU):
4593   """Logical unit for OOB handling.
4594
4595   """
4596   REQ_BGL = False
4597   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4598
4599   def ExpandNames(self):
4600     """Gather locks we need.
4601
4602     """
4603     if self.op.node_names:
4604       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4605       lock_names = self.op.node_names
4606     else:
4607       lock_names = locking.ALL_SET
4608
4609     self.needed_locks = {
4610       locking.LEVEL_NODE: lock_names,
4611       }
4612
4613   def CheckPrereq(self):
4614     """Check prerequisites.
4615
4616     This checks:
4617      - the node exists in the configuration
4618      - OOB is supported
4619
4620     Any errors are signaled by raising errors.OpPrereqError.
4621
4622     """
4623     self.nodes = []
4624     self.master_node = self.cfg.GetMasterNode()
4625
4626     assert self.op.power_delay >= 0.0
4627
4628     if self.op.node_names:
4629       if (self.op.command in self._SKIP_MASTER and
4630           self.master_node in self.op.node_names):
4631         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4632         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4633
4634         if master_oob_handler:
4635           additional_text = ("run '%s %s %s' if you want to operate on the"
4636                              " master regardless") % (master_oob_handler,
4637                                                       self.op.command,
4638                                                       self.master_node)
4639         else:
4640           additional_text = "it does not support out-of-band operations"
4641
4642         raise errors.OpPrereqError(("Operating on the master node %s is not"
4643                                     " allowed for %s; %s") %
4644                                    (self.master_node, self.op.command,
4645                                     additional_text), errors.ECODE_INVAL)
4646     else:
4647       self.op.node_names = self.cfg.GetNodeList()
4648       if self.op.command in self._SKIP_MASTER:
4649         self.op.node_names.remove(self.master_node)
4650
4651     if self.op.command in self._SKIP_MASTER:
4652       assert self.master_node not in self.op.node_names
4653
4654     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4655       if node is None:
4656         raise errors.OpPrereqError("Node %s not found" % node_name,
4657                                    errors.ECODE_NOENT)
4658       else:
4659         self.nodes.append(node)
4660
4661       if (not self.op.ignore_status and
4662           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4663         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4664                                     " not marked offline") % node_name,
4665                                    errors.ECODE_STATE)
4666
4667   def Exec(self, feedback_fn):
4668     """Execute OOB and return result if we expect any.
4669
4670     """
4671     master_node = self.master_node
4672     ret = []
4673
4674     for idx, node in enumerate(utils.NiceSort(self.nodes,
4675                                               key=lambda node: node.name)):
4676       node_entry = [(constants.RS_NORMAL, node.name)]
4677       ret.append(node_entry)
4678
4679       oob_program = _SupportsOob(self.cfg, node)
4680
4681       if not oob_program:
4682         node_entry.append((constants.RS_UNAVAIL, None))
4683         continue
4684
4685       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4686                    self.op.command, oob_program, node.name)
4687       result = self.rpc.call_run_oob(master_node, oob_program,
4688                                      self.op.command, node.name,
4689                                      self.op.timeout)
4690
4691       if result.fail_msg:
4692         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4693                         node.name, result.fail_msg)
4694         node_entry.append((constants.RS_NODATA, None))
4695       else:
4696         try:
4697           self._CheckPayload(result)
4698         except errors.OpExecError, err:
4699           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4700                           node.name, err)
4701           node_entry.append((constants.RS_NODATA, None))
4702         else:
4703           if self.op.command == constants.OOB_HEALTH:
4704             # For health we should log important events
4705             for item, status in result.payload:
4706               if status in [constants.OOB_STATUS_WARNING,
4707                             constants.OOB_STATUS_CRITICAL]:
4708                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4709                                 item, node.name, status)
4710
4711           if self.op.command == constants.OOB_POWER_ON:
4712             node.powered = True
4713           elif self.op.command == constants.OOB_POWER_OFF:
4714             node.powered = False
4715           elif self.op.command == constants.OOB_POWER_STATUS:
4716             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4717             if powered != node.powered:
4718               logging.warning(("Recorded power state (%s) of node '%s' does not"
4719                                " match actual power state (%s)"), node.powered,
4720                               node.name, powered)
4721
4722           # For configuration changing commands we should update the node
4723           if self.op.command in (constants.OOB_POWER_ON,
4724                                  constants.OOB_POWER_OFF):
4725             self.cfg.Update(node, feedback_fn)
4726
4727           node_entry.append((constants.RS_NORMAL, result.payload))
4728
4729           if (self.op.command == constants.OOB_POWER_ON and
4730               idx < len(self.nodes) - 1):
4731             time.sleep(self.op.power_delay)
4732
4733     return ret
4734
4735   def _CheckPayload(self, result):
4736     """Checks if the payload is valid.
4737
4738     @param result: RPC result
4739     @raises errors.OpExecError: If payload is not valid
4740
4741     """
4742     errs = []
4743     if self.op.command == constants.OOB_HEALTH:
4744       if not isinstance(result.payload, list):
4745         errs.append("command 'health' is expected to return a list but got %s" %
4746                     type(result.payload))
4747       else:
4748         for item, status in result.payload:
4749           if status not in constants.OOB_STATUSES:
4750             errs.append("health item '%s' has invalid status '%s'" %
4751                         (item, status))
4752
4753     if self.op.command == constants.OOB_POWER_STATUS:
4754       if not isinstance(result.payload, dict):
4755         errs.append("power-status is expected to return a dict but got %s" %
4756                     type(result.payload))
4757
4758     if self.op.command in [
4759         constants.OOB_POWER_ON,
4760         constants.OOB_POWER_OFF,
4761         constants.OOB_POWER_CYCLE,
4762         ]:
4763       if result.payload is not None:
4764         errs.append("%s is expected to not return payload but got '%s'" %
4765                     (self.op.command, result.payload))
4766
4767     if errs:
4768       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4769                                utils.CommaJoin(errs))
4770
4771
4772 class _OsQuery(_QueryBase):
4773   FIELDS = query.OS_FIELDS
4774
4775   def ExpandNames(self, lu):
4776     # Lock all nodes in shared mode
4777     # Temporary removal of locks, should be reverted later
4778     # TODO: reintroduce locks when they are lighter-weight
4779     lu.needed_locks = {}
4780     #self.share_locks[locking.LEVEL_NODE] = 1
4781     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4782
4783     # The following variables interact with _QueryBase._GetNames
4784     if self.names:
4785       self.wanted = self.names
4786     else:
4787       self.wanted = locking.ALL_SET
4788
4789     self.do_locking = self.use_locking
4790
4791   def DeclareLocks(self, lu, level):
4792     pass
4793
4794   @staticmethod
4795   def _DiagnoseByOS(rlist):
4796     """Remaps a per-node return list into an a per-os per-node dictionary
4797
4798     @param rlist: a map with node names as keys and OS objects as values
4799
4800     @rtype: dict
4801     @return: a dictionary with osnames as keys and as value another
4802         map, with nodes as keys and tuples of (path, status, diagnose,
4803         variants, parameters, api_versions) as values, eg::
4804
4805           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4806                                      (/srv/..., False, "invalid api")],
4807                            "node2": [(/srv/..., True, "", [], [])]}
4808           }
4809
4810     """
4811     all_os = {}
4812     # we build here the list of nodes that didn't fail the RPC (at RPC
4813     # level), so that nodes with a non-responding node daemon don't
4814     # make all OSes invalid
4815     good_nodes = [node_name for node_name in rlist
4816                   if not rlist[node_name].fail_msg]
4817     for node_name, nr in rlist.items():
4818       if nr.fail_msg or not nr.payload:
4819         continue
4820       for (name, path, status, diagnose, variants,
4821            params, api_versions) in nr.payload:
4822         if name not in all_os:
4823           # build a list of nodes for this os containing empty lists
4824           # for each node in node_list
4825           all_os[name] = {}
4826           for nname in good_nodes:
4827             all_os[name][nname] = []
4828         # convert params from [name, help] to (name, help)
4829         params = [tuple(v) for v in params]
4830         all_os[name][node_name].append((path, status, diagnose,
4831                                         variants, params, api_versions))
4832     return all_os
4833
4834   def _GetQueryData(self, lu):
4835     """Computes the list of nodes and their attributes.
4836
4837     """
4838     # Locking is not used
4839     assert not (compat.any(lu.glm.is_owned(level)
4840                            for level in locking.LEVELS
4841                            if level != locking.LEVEL_CLUSTER) or
4842                 self.do_locking or self.use_locking)
4843
4844     valid_nodes = [node.name
4845                    for node in lu.cfg.GetAllNodesInfo().values()
4846                    if not node.offline and node.vm_capable]
4847     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4848     cluster = lu.cfg.GetClusterInfo()
4849
4850     data = {}
4851
4852     for (os_name, os_data) in pol.items():
4853       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4854                           hidden=(os_name in cluster.hidden_os),
4855                           blacklisted=(os_name in cluster.blacklisted_os))
4856
4857       variants = set()
4858       parameters = set()
4859       api_versions = set()
4860
4861       for idx, osl in enumerate(os_data.values()):
4862         info.valid = bool(info.valid and osl and osl[0][1])
4863         if not info.valid:
4864           break
4865
4866         (node_variants, node_params, node_api) = osl[0][3:6]
4867         if idx == 0:
4868           # First entry
4869           variants.update(node_variants)
4870           parameters.update(node_params)
4871           api_versions.update(node_api)
4872         else:
4873           # Filter out inconsistent values
4874           variants.intersection_update(node_variants)
4875           parameters.intersection_update(node_params)
4876           api_versions.intersection_update(node_api)
4877
4878       info.variants = list(variants)
4879       info.parameters = list(parameters)
4880       info.api_versions = list(api_versions)
4881
4882       data[os_name] = info
4883
4884     # Prepare data in requested order
4885     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4886             if name in data]
4887
4888
4889 class LUOsDiagnose(NoHooksLU):
4890   """Logical unit for OS diagnose/query.
4891
4892   """
4893   REQ_BGL = False
4894
4895   @staticmethod
4896   def _BuildFilter(fields, names):
4897     """Builds a filter for querying OSes.
4898
4899     """
4900     name_filter = qlang.MakeSimpleFilter("name", names)
4901
4902     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4903     # respective field is not requested
4904     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4905                      for fname in ["hidden", "blacklisted"]
4906                      if fname not in fields]
4907     if "valid" not in fields:
4908       status_filter.append([qlang.OP_TRUE, "valid"])
4909
4910     if status_filter:
4911       status_filter.insert(0, qlang.OP_AND)
4912     else:
4913       status_filter = None
4914
4915     if name_filter and status_filter:
4916       return [qlang.OP_AND, name_filter, status_filter]
4917     elif name_filter:
4918       return name_filter
4919     else:
4920       return status_filter
4921
4922   def CheckArguments(self):
4923     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4924                        self.op.output_fields, False)
4925
4926   def ExpandNames(self):
4927     self.oq.ExpandNames(self)
4928
4929   def Exec(self, feedback_fn):
4930     return self.oq.OldStyleQuery(self)
4931
4932
4933 class LUNodeRemove(LogicalUnit):
4934   """Logical unit for removing a node.
4935
4936   """
4937   HPATH = "node-remove"
4938   HTYPE = constants.HTYPE_NODE
4939
4940   def BuildHooksEnv(self):
4941     """Build hooks env.
4942
4943     """
4944     return {
4945       "OP_TARGET": self.op.node_name,
4946       "NODE_NAME": self.op.node_name,
4947       }
4948
4949   def BuildHooksNodes(self):
4950     """Build hooks nodes.
4951
4952     This doesn't run on the target node in the pre phase as a failed
4953     node would then be impossible to remove.
4954
4955     """
4956     all_nodes = self.cfg.GetNodeList()
4957     try:
4958       all_nodes.remove(self.op.node_name)
4959     except ValueError:
4960       pass
4961     return (all_nodes, all_nodes)
4962
4963   def CheckPrereq(self):
4964     """Check prerequisites.
4965
4966     This checks:
4967      - the node exists in the configuration
4968      - it does not have primary or secondary instances
4969      - it's not the master
4970
4971     Any errors are signaled by raising errors.OpPrereqError.
4972
4973     """
4974     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4975     node = self.cfg.GetNodeInfo(self.op.node_name)
4976     assert node is not None
4977
4978     masternode = self.cfg.GetMasterNode()
4979     if node.name == masternode:
4980       raise errors.OpPrereqError("Node is the master node, failover to another"
4981                                  " node is required", errors.ECODE_INVAL)
4982
4983     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4984       if node.name in instance.all_nodes:
4985         raise errors.OpPrereqError("Instance %s is still running on the node,"
4986                                    " please remove first" % instance_name,
4987                                    errors.ECODE_INVAL)
4988     self.op.node_name = node.name
4989     self.node = node
4990
4991   def Exec(self, feedback_fn):
4992     """Removes the node from the cluster.
4993
4994     """
4995     node = self.node
4996     logging.info("Stopping the node daemon and removing configs from node %s",
4997                  node.name)
4998
4999     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5000
5001     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5002       "Not owning BGL"
5003
5004     # Promote nodes to master candidate as needed
5005     _AdjustCandidatePool(self, exceptions=[node.name])
5006     self.context.RemoveNode(node.name)
5007
5008     # Run post hooks on the node before it's removed
5009     _RunPostHook(self, node.name)
5010
5011     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5012     msg = result.fail_msg
5013     if msg:
5014       self.LogWarning("Errors encountered on the remote node while leaving"
5015                       " the cluster: %s", msg)
5016
5017     # Remove node from our /etc/hosts
5018     if self.cfg.GetClusterInfo().modify_etc_hosts:
5019       master_node = self.cfg.GetMasterNode()
5020       result = self.rpc.call_etc_hosts_modify(master_node,
5021                                               constants.ETC_HOSTS_REMOVE,
5022                                               node.name, None)
5023       result.Raise("Can't update hosts file with new host data")
5024       _RedistributeAncillaryFiles(self)
5025
5026
5027 class _NodeQuery(_QueryBase):
5028   FIELDS = query.NODE_FIELDS
5029
5030   def ExpandNames(self, lu):
5031     lu.needed_locks = {}
5032     lu.share_locks = _ShareAll()
5033
5034     if self.names:
5035       self.wanted = _GetWantedNodes(lu, self.names)
5036     else:
5037       self.wanted = locking.ALL_SET
5038
5039     self.do_locking = (self.use_locking and
5040                        query.NQ_LIVE in self.requested_data)
5041
5042     if self.do_locking:
5043       # If any non-static field is requested we need to lock the nodes
5044       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5045
5046   def DeclareLocks(self, lu, level):
5047     pass
5048
5049   def _GetQueryData(self, lu):
5050     """Computes the list of nodes and their attributes.
5051
5052     """
5053     all_info = lu.cfg.GetAllNodesInfo()
5054
5055     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5056
5057     # Gather data as requested
5058     if query.NQ_LIVE in self.requested_data:
5059       # filter out non-vm_capable nodes
5060       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5061
5062       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5063                                         [lu.cfg.GetHypervisorType()])
5064       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5065                        for (name, nresult) in node_data.items()
5066                        if not nresult.fail_msg and nresult.payload)
5067     else:
5068       live_data = None
5069
5070     if query.NQ_INST in self.requested_data:
5071       node_to_primary = dict([(name, set()) for name in nodenames])
5072       node_to_secondary = dict([(name, set()) for name in nodenames])
5073
5074       inst_data = lu.cfg.GetAllInstancesInfo()
5075
5076       for inst in inst_data.values():
5077         if inst.primary_node in node_to_primary:
5078           node_to_primary[inst.primary_node].add(inst.name)
5079         for secnode in inst.secondary_nodes:
5080           if secnode in node_to_secondary:
5081             node_to_secondary[secnode].add(inst.name)
5082     else:
5083       node_to_primary = None
5084       node_to_secondary = None
5085
5086     if query.NQ_OOB in self.requested_data:
5087       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5088                          for name, node in all_info.iteritems())
5089     else:
5090       oob_support = None
5091
5092     if query.NQ_GROUP in self.requested_data:
5093       groups = lu.cfg.GetAllNodeGroupsInfo()
5094     else:
5095       groups = {}
5096
5097     return query.NodeQueryData([all_info[name] for name in nodenames],
5098                                live_data, lu.cfg.GetMasterNode(),
5099                                node_to_primary, node_to_secondary, groups,
5100                                oob_support, lu.cfg.GetClusterInfo())
5101
5102
5103 class LUNodeQuery(NoHooksLU):
5104   """Logical unit for querying nodes.
5105
5106   """
5107   # pylint: disable=W0142
5108   REQ_BGL = False
5109
5110   def CheckArguments(self):
5111     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5112                          self.op.output_fields, self.op.use_locking)
5113
5114   def ExpandNames(self):
5115     self.nq.ExpandNames(self)
5116
5117   def DeclareLocks(self, level):
5118     self.nq.DeclareLocks(self, level)
5119
5120   def Exec(self, feedback_fn):
5121     return self.nq.OldStyleQuery(self)
5122
5123
5124 class LUNodeQueryvols(NoHooksLU):
5125   """Logical unit for getting volumes on node(s).
5126
5127   """
5128   REQ_BGL = False
5129   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5130   _FIELDS_STATIC = utils.FieldSet("node")
5131
5132   def CheckArguments(self):
5133     _CheckOutputFields(static=self._FIELDS_STATIC,
5134                        dynamic=self._FIELDS_DYNAMIC,
5135                        selected=self.op.output_fields)
5136
5137   def ExpandNames(self):
5138     self.share_locks = _ShareAll()
5139     self.needed_locks = {}
5140
5141     if not self.op.nodes:
5142       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5143     else:
5144       self.needed_locks[locking.LEVEL_NODE] = \
5145         _GetWantedNodes(self, self.op.nodes)
5146
5147   def Exec(self, feedback_fn):
5148     """Computes the list of nodes and their attributes.
5149
5150     """
5151     nodenames = self.owned_locks(locking.LEVEL_NODE)
5152     volumes = self.rpc.call_node_volumes(nodenames)
5153
5154     ilist = self.cfg.GetAllInstancesInfo()
5155     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5156
5157     output = []
5158     for node in nodenames:
5159       nresult = volumes[node]
5160       if nresult.offline:
5161         continue
5162       msg = nresult.fail_msg
5163       if msg:
5164         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5165         continue
5166
5167       node_vols = sorted(nresult.payload,
5168                          key=operator.itemgetter("dev"))
5169
5170       for vol in node_vols:
5171         node_output = []
5172         for field in self.op.output_fields:
5173           if field == "node":
5174             val = node
5175           elif field == "phys":
5176             val = vol["dev"]
5177           elif field == "vg":
5178             val = vol["vg"]
5179           elif field == "name":
5180             val = vol["name"]
5181           elif field == "size":
5182             val = int(float(vol["size"]))
5183           elif field == "instance":
5184             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5185           else:
5186             raise errors.ParameterError(field)
5187           node_output.append(str(val))
5188
5189         output.append(node_output)
5190
5191     return output
5192
5193
5194 class LUNodeQueryStorage(NoHooksLU):
5195   """Logical unit for getting information on storage units on node(s).
5196
5197   """
5198   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5199   REQ_BGL = False
5200
5201   def CheckArguments(self):
5202     _CheckOutputFields(static=self._FIELDS_STATIC,
5203                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5204                        selected=self.op.output_fields)
5205
5206   def ExpandNames(self):
5207     self.share_locks = _ShareAll()
5208     self.needed_locks = {}
5209
5210     if self.op.nodes:
5211       self.needed_locks[locking.LEVEL_NODE] = \
5212         _GetWantedNodes(self, self.op.nodes)
5213     else:
5214       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5215
5216   def Exec(self, feedback_fn):
5217     """Computes the list of nodes and their attributes.
5218
5219     """
5220     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5221
5222     # Always get name to sort by
5223     if constants.SF_NAME in self.op.output_fields:
5224       fields = self.op.output_fields[:]
5225     else:
5226       fields = [constants.SF_NAME] + self.op.output_fields
5227
5228     # Never ask for node or type as it's only known to the LU
5229     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5230       while extra in fields:
5231         fields.remove(extra)
5232
5233     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5234     name_idx = field_idx[constants.SF_NAME]
5235
5236     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5237     data = self.rpc.call_storage_list(self.nodes,
5238                                       self.op.storage_type, st_args,
5239                                       self.op.name, fields)
5240
5241     result = []
5242
5243     for node in utils.NiceSort(self.nodes):
5244       nresult = data[node]
5245       if nresult.offline:
5246         continue
5247
5248       msg = nresult.fail_msg
5249       if msg:
5250         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5251         continue
5252
5253       rows = dict([(row[name_idx], row) for row in nresult.payload])
5254
5255       for name in utils.NiceSort(rows.keys()):
5256         row = rows[name]
5257
5258         out = []
5259
5260         for field in self.op.output_fields:
5261           if field == constants.SF_NODE:
5262             val = node
5263           elif field == constants.SF_TYPE:
5264             val = self.op.storage_type
5265           elif field in field_idx:
5266             val = row[field_idx[field]]
5267           else:
5268             raise errors.ParameterError(field)
5269
5270           out.append(val)
5271
5272         result.append(out)
5273
5274     return result
5275
5276
5277 class _InstanceQuery(_QueryBase):
5278   FIELDS = query.INSTANCE_FIELDS
5279
5280   def ExpandNames(self, lu):
5281     lu.needed_locks = {}
5282     lu.share_locks = _ShareAll()
5283
5284     if self.names:
5285       self.wanted = _GetWantedInstances(lu, self.names)
5286     else:
5287       self.wanted = locking.ALL_SET
5288
5289     self.do_locking = (self.use_locking and
5290                        query.IQ_LIVE in self.requested_data)
5291     if self.do_locking:
5292       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5293       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5294       lu.needed_locks[locking.LEVEL_NODE] = []
5295       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5296
5297     self.do_grouplocks = (self.do_locking and
5298                           query.IQ_NODES in self.requested_data)
5299
5300   def DeclareLocks(self, lu, level):
5301     if self.do_locking:
5302       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5303         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5304
5305         # Lock all groups used by instances optimistically; this requires going
5306         # via the node before it's locked, requiring verification later on
5307         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5308           set(group_uuid
5309               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5310               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5311       elif level == locking.LEVEL_NODE:
5312         lu._LockInstancesNodes() # pylint: disable=W0212
5313
5314   @staticmethod
5315   def _CheckGroupLocks(lu):
5316     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5317     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5318
5319     # Check if node groups for locked instances are still correct
5320     for instance_name in owned_instances:
5321       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5322
5323   def _GetQueryData(self, lu):
5324     """Computes the list of instances and their attributes.
5325
5326     """
5327     if self.do_grouplocks:
5328       self._CheckGroupLocks(lu)
5329
5330     cluster = lu.cfg.GetClusterInfo()
5331     all_info = lu.cfg.GetAllInstancesInfo()
5332
5333     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5334
5335     instance_list = [all_info[name] for name in instance_names]
5336     nodes = frozenset(itertools.chain(*(inst.all_nodes
5337                                         for inst in instance_list)))
5338     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5339     bad_nodes = []
5340     offline_nodes = []
5341     wrongnode_inst = set()
5342
5343     # Gather data as requested
5344     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5345       live_data = {}
5346       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5347       for name in nodes:
5348         result = node_data[name]
5349         if result.offline:
5350           # offline nodes will be in both lists
5351           assert result.fail_msg
5352           offline_nodes.append(name)
5353         if result.fail_msg:
5354           bad_nodes.append(name)
5355         elif result.payload:
5356           for inst in result.payload:
5357             if inst in all_info:
5358               if all_info[inst].primary_node == name:
5359                 live_data.update(result.payload)
5360               else:
5361                 wrongnode_inst.add(inst)
5362             else:
5363               # orphan instance; we don't list it here as we don't
5364               # handle this case yet in the output of instance listing
5365               logging.warning("Orphan instance '%s' found on node %s",
5366                               inst, name)
5367         # else no instance is alive
5368     else:
5369       live_data = {}
5370
5371     if query.IQ_DISKUSAGE in self.requested_data:
5372       disk_usage = dict((inst.name,
5373                          _ComputeDiskSize(inst.disk_template,
5374                                           [{constants.IDISK_SIZE: disk.size}
5375                                            for disk in inst.disks]))
5376                         for inst in instance_list)
5377     else:
5378       disk_usage = None
5379
5380     if query.IQ_CONSOLE in self.requested_data:
5381       consinfo = {}
5382       for inst in instance_list:
5383         if inst.name in live_data:
5384           # Instance is running
5385           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5386         else:
5387           consinfo[inst.name] = None
5388       assert set(consinfo.keys()) == set(instance_names)
5389     else:
5390       consinfo = None
5391
5392     if query.IQ_NODES in self.requested_data:
5393       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5394                                             instance_list)))
5395       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5396       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5397                     for uuid in set(map(operator.attrgetter("group"),
5398                                         nodes.values())))
5399     else:
5400       nodes = None
5401       groups = None
5402
5403     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5404                                    disk_usage, offline_nodes, bad_nodes,
5405                                    live_data, wrongnode_inst, consinfo,
5406                                    nodes, groups)
5407
5408
5409 class LUQuery(NoHooksLU):
5410   """Query for resources/items of a certain kind.
5411
5412   """
5413   # pylint: disable=W0142
5414   REQ_BGL = False
5415
5416   def CheckArguments(self):
5417     qcls = _GetQueryImplementation(self.op.what)
5418
5419     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5420
5421   def ExpandNames(self):
5422     self.impl.ExpandNames(self)
5423
5424   def DeclareLocks(self, level):
5425     self.impl.DeclareLocks(self, level)
5426
5427   def Exec(self, feedback_fn):
5428     return self.impl.NewStyleQuery(self)
5429
5430
5431 class LUQueryFields(NoHooksLU):
5432   """Query for resources/items of a certain kind.
5433
5434   """
5435   # pylint: disable=W0142
5436   REQ_BGL = False
5437
5438   def CheckArguments(self):
5439     self.qcls = _GetQueryImplementation(self.op.what)
5440
5441   def ExpandNames(self):
5442     self.needed_locks = {}
5443
5444   def Exec(self, feedback_fn):
5445     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5446
5447
5448 class LUNodeModifyStorage(NoHooksLU):
5449   """Logical unit for modifying a storage volume on a node.
5450
5451   """
5452   REQ_BGL = False
5453
5454   def CheckArguments(self):
5455     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5456
5457     storage_type = self.op.storage_type
5458
5459     try:
5460       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5461     except KeyError:
5462       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5463                                  " modified" % storage_type,
5464                                  errors.ECODE_INVAL)
5465
5466     diff = set(self.op.changes.keys()) - modifiable
5467     if diff:
5468       raise errors.OpPrereqError("The following fields can not be modified for"
5469                                  " storage units of type '%s': %r" %
5470                                  (storage_type, list(diff)),
5471                                  errors.ECODE_INVAL)
5472
5473   def ExpandNames(self):
5474     self.needed_locks = {
5475       locking.LEVEL_NODE: self.op.node_name,
5476       }
5477
5478   def Exec(self, feedback_fn):
5479     """Computes the list of nodes and their attributes.
5480
5481     """
5482     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5483     result = self.rpc.call_storage_modify(self.op.node_name,
5484                                           self.op.storage_type, st_args,
5485                                           self.op.name, self.op.changes)
5486     result.Raise("Failed to modify storage unit '%s' on %s" %
5487                  (self.op.name, self.op.node_name))
5488
5489
5490 class LUNodeAdd(LogicalUnit):
5491   """Logical unit for adding node to the cluster.
5492
5493   """
5494   HPATH = "node-add"
5495   HTYPE = constants.HTYPE_NODE
5496   _NFLAGS = ["master_capable", "vm_capable"]
5497
5498   def CheckArguments(self):
5499     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5500     # validate/normalize the node name
5501     self.hostname = netutils.GetHostname(name=self.op.node_name,
5502                                          family=self.primary_ip_family)
5503     self.op.node_name = self.hostname.name
5504
5505     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5506       raise errors.OpPrereqError("Cannot readd the master node",
5507                                  errors.ECODE_STATE)
5508
5509     if self.op.readd and self.op.group:
5510       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5511                                  " being readded", errors.ECODE_INVAL)
5512
5513   def BuildHooksEnv(self):
5514     """Build hooks env.
5515
5516     This will run on all nodes before, and on all nodes + the new node after.
5517
5518     """
5519     return {
5520       "OP_TARGET": self.op.node_name,
5521       "NODE_NAME": self.op.node_name,
5522       "NODE_PIP": self.op.primary_ip,
5523       "NODE_SIP": self.op.secondary_ip,
5524       "MASTER_CAPABLE": str(self.op.master_capable),
5525       "VM_CAPABLE": str(self.op.vm_capable),
5526       }
5527
5528   def BuildHooksNodes(self):
5529     """Build hooks nodes.
5530
5531     """
5532     # Exclude added node
5533     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5534     post_nodes = pre_nodes + [self.op.node_name, ]
5535
5536     return (pre_nodes, post_nodes)
5537
5538   def CheckPrereq(self):
5539     """Check prerequisites.
5540
5541     This checks:
5542      - the new node is not already in the config
5543      - it is resolvable
5544      - its parameters (single/dual homed) matches the cluster
5545
5546     Any errors are signaled by raising errors.OpPrereqError.
5547
5548     """
5549     cfg = self.cfg
5550     hostname = self.hostname
5551     node = hostname.name
5552     primary_ip = self.op.primary_ip = hostname.ip
5553     if self.op.secondary_ip is None:
5554       if self.primary_ip_family == netutils.IP6Address.family:
5555         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5556                                    " IPv4 address must be given as secondary",
5557                                    errors.ECODE_INVAL)
5558       self.op.secondary_ip = primary_ip
5559
5560     secondary_ip = self.op.secondary_ip
5561     if not netutils.IP4Address.IsValid(secondary_ip):
5562       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5563                                  " address" % secondary_ip, errors.ECODE_INVAL)
5564
5565     node_list = cfg.GetNodeList()
5566     if not self.op.readd and node in node_list:
5567       raise errors.OpPrereqError("Node %s is already in the configuration" %
5568                                  node, errors.ECODE_EXISTS)
5569     elif self.op.readd and node not in node_list:
5570       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5571                                  errors.ECODE_NOENT)
5572
5573     self.changed_primary_ip = False
5574
5575     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5576       if self.op.readd and node == existing_node_name:
5577         if existing_node.secondary_ip != secondary_ip:
5578           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5579                                      " address configuration as before",
5580                                      errors.ECODE_INVAL)
5581         if existing_node.primary_ip != primary_ip:
5582           self.changed_primary_ip = True
5583
5584         continue
5585
5586       if (existing_node.primary_ip == primary_ip or
5587           existing_node.secondary_ip == primary_ip or
5588           existing_node.primary_ip == secondary_ip or
5589           existing_node.secondary_ip == secondary_ip):
5590         raise errors.OpPrereqError("New node ip address(es) conflict with"
5591                                    " existing node %s" % existing_node.name,
5592                                    errors.ECODE_NOTUNIQUE)
5593
5594     # After this 'if' block, None is no longer a valid value for the
5595     # _capable op attributes
5596     if self.op.readd:
5597       old_node = self.cfg.GetNodeInfo(node)
5598       assert old_node is not None, "Can't retrieve locked node %s" % node
5599       for attr in self._NFLAGS:
5600         if getattr(self.op, attr) is None:
5601           setattr(self.op, attr, getattr(old_node, attr))
5602     else:
5603       for attr in self._NFLAGS:
5604         if getattr(self.op, attr) is None:
5605           setattr(self.op, attr, True)
5606
5607     if self.op.readd and not self.op.vm_capable:
5608       pri, sec = cfg.GetNodeInstances(node)
5609       if pri or sec:
5610         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5611                                    " flag set to false, but it already holds"
5612                                    " instances" % node,
5613                                    errors.ECODE_STATE)
5614
5615     # check that the type of the node (single versus dual homed) is the
5616     # same as for the master
5617     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5618     master_singlehomed = myself.secondary_ip == myself.primary_ip
5619     newbie_singlehomed = secondary_ip == primary_ip
5620     if master_singlehomed != newbie_singlehomed:
5621       if master_singlehomed:
5622         raise errors.OpPrereqError("The master has no secondary ip but the"
5623                                    " new node has one",
5624                                    errors.ECODE_INVAL)
5625       else:
5626         raise errors.OpPrereqError("The master has a secondary ip but the"
5627                                    " new node doesn't have one",
5628                                    errors.ECODE_INVAL)
5629
5630     # checks reachability
5631     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5632       raise errors.OpPrereqError("Node not reachable by ping",
5633                                  errors.ECODE_ENVIRON)
5634
5635     if not newbie_singlehomed:
5636       # check reachability from my secondary ip to newbie's secondary ip
5637       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5638                            source=myself.secondary_ip):
5639         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5640                                    " based ping to node daemon port",
5641                                    errors.ECODE_ENVIRON)
5642
5643     if self.op.readd:
5644       exceptions = [node]
5645     else:
5646       exceptions = []
5647
5648     if self.op.master_capable:
5649       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5650     else:
5651       self.master_candidate = False
5652
5653     if self.op.readd:
5654       self.new_node = old_node
5655     else:
5656       node_group = cfg.LookupNodeGroup(self.op.group)
5657       self.new_node = objects.Node(name=node,
5658                                    primary_ip=primary_ip,
5659                                    secondary_ip=secondary_ip,
5660                                    master_candidate=self.master_candidate,
5661                                    offline=False, drained=False,
5662                                    group=node_group)
5663
5664     if self.op.ndparams:
5665       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5666
5667     if self.op.hv_state:
5668       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5669
5670     if self.op.disk_state:
5671       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5672
5673     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5674     #       it a property on the base class.
5675     result = rpc.DnsOnlyRunner().call_version([node])[node]
5676     result.Raise("Can't get version information from node %s" % node)
5677     if constants.PROTOCOL_VERSION == result.payload:
5678       logging.info("Communication to node %s fine, sw version %s match",
5679                    node, result.payload)
5680     else:
5681       raise errors.OpPrereqError("Version mismatch master version %s,"
5682                                  " node version %s" %
5683                                  (constants.PROTOCOL_VERSION, result.payload),
5684                                  errors.ECODE_ENVIRON)
5685
5686   def Exec(self, feedback_fn):
5687     """Adds the new node to the cluster.
5688
5689     """
5690     new_node = self.new_node
5691     node = new_node.name
5692
5693     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5694       "Not owning BGL"
5695
5696     # We adding a new node so we assume it's powered
5697     new_node.powered = True
5698
5699     # for re-adds, reset the offline/drained/master-candidate flags;
5700     # we need to reset here, otherwise offline would prevent RPC calls
5701     # later in the procedure; this also means that if the re-add
5702     # fails, we are left with a non-offlined, broken node
5703     if self.op.readd:
5704       new_node.drained = new_node.offline = False # pylint: disable=W0201
5705       self.LogInfo("Readding a node, the offline/drained flags were reset")
5706       # if we demote the node, we do cleanup later in the procedure
5707       new_node.master_candidate = self.master_candidate
5708       if self.changed_primary_ip:
5709         new_node.primary_ip = self.op.primary_ip
5710
5711     # copy the master/vm_capable flags
5712     for attr in self._NFLAGS:
5713       setattr(new_node, attr, getattr(self.op, attr))
5714
5715     # notify the user about any possible mc promotion
5716     if new_node.master_candidate:
5717       self.LogInfo("Node will be a master candidate")
5718
5719     if self.op.ndparams:
5720       new_node.ndparams = self.op.ndparams
5721     else:
5722       new_node.ndparams = {}
5723
5724     if self.op.hv_state:
5725       new_node.hv_state_static = self.new_hv_state
5726
5727     if self.op.disk_state:
5728       new_node.disk_state_static = self.new_disk_state
5729
5730     # Add node to our /etc/hosts, and add key to known_hosts
5731     if self.cfg.GetClusterInfo().modify_etc_hosts:
5732       master_node = self.cfg.GetMasterNode()
5733       result = self.rpc.call_etc_hosts_modify(master_node,
5734                                               constants.ETC_HOSTS_ADD,
5735                                               self.hostname.name,
5736                                               self.hostname.ip)
5737       result.Raise("Can't update hosts file with new host data")
5738
5739     if new_node.secondary_ip != new_node.primary_ip:
5740       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5741                                False)
5742
5743     node_verify_list = [self.cfg.GetMasterNode()]
5744     node_verify_param = {
5745       constants.NV_NODELIST: ([node], {}),
5746       # TODO: do a node-net-test as well?
5747     }
5748
5749     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5750                                        self.cfg.GetClusterName())
5751     for verifier in node_verify_list:
5752       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5753       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5754       if nl_payload:
5755         for failed in nl_payload:
5756           feedback_fn("ssh/hostname verification failed"
5757                       " (checking from %s): %s" %
5758                       (verifier, nl_payload[failed]))
5759         raise errors.OpExecError("ssh/hostname verification failed")
5760
5761     if self.op.readd:
5762       _RedistributeAncillaryFiles(self)
5763       self.context.ReaddNode(new_node)
5764       # make sure we redistribute the config
5765       self.cfg.Update(new_node, feedback_fn)
5766       # and make sure the new node will not have old files around
5767       if not new_node.master_candidate:
5768         result = self.rpc.call_node_demote_from_mc(new_node.name)
5769         msg = result.fail_msg
5770         if msg:
5771           self.LogWarning("Node failed to demote itself from master"
5772                           " candidate status: %s" % msg)
5773     else:
5774       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5775                                   additional_vm=self.op.vm_capable)
5776       self.context.AddNode(new_node, self.proc.GetECId())
5777
5778
5779 class LUNodeSetParams(LogicalUnit):
5780   """Modifies the parameters of a node.
5781
5782   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5783       to the node role (as _ROLE_*)
5784   @cvar _R2F: a dictionary from node role to tuples of flags
5785   @cvar _FLAGS: a list of attribute names corresponding to the flags
5786
5787   """
5788   HPATH = "node-modify"
5789   HTYPE = constants.HTYPE_NODE
5790   REQ_BGL = False
5791   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5792   _F2R = {
5793     (True, False, False): _ROLE_CANDIDATE,
5794     (False, True, False): _ROLE_DRAINED,
5795     (False, False, True): _ROLE_OFFLINE,
5796     (False, False, False): _ROLE_REGULAR,
5797     }
5798   _R2F = dict((v, k) for k, v in _F2R.items())
5799   _FLAGS = ["master_candidate", "drained", "offline"]
5800
5801   def CheckArguments(self):
5802     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5803     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5804                 self.op.master_capable, self.op.vm_capable,
5805                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5806                 self.op.disk_state]
5807     if all_mods.count(None) == len(all_mods):
5808       raise errors.OpPrereqError("Please pass at least one modification",
5809                                  errors.ECODE_INVAL)
5810     if all_mods.count(True) > 1:
5811       raise errors.OpPrereqError("Can't set the node into more than one"
5812                                  " state at the same time",
5813                                  errors.ECODE_INVAL)
5814
5815     # Boolean value that tells us whether we might be demoting from MC
5816     self.might_demote = (self.op.master_candidate == False or
5817                          self.op.offline == True or
5818                          self.op.drained == True or
5819                          self.op.master_capable == False)
5820
5821     if self.op.secondary_ip:
5822       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5823         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5824                                    " address" % self.op.secondary_ip,
5825                                    errors.ECODE_INVAL)
5826
5827     self.lock_all = self.op.auto_promote and self.might_demote
5828     self.lock_instances = self.op.secondary_ip is not None
5829
5830   def _InstanceFilter(self, instance):
5831     """Filter for getting affected instances.
5832
5833     """
5834     return (instance.disk_template in constants.DTS_INT_MIRROR and
5835             self.op.node_name in instance.all_nodes)
5836
5837   def ExpandNames(self):
5838     if self.lock_all:
5839       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5840     else:
5841       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5842
5843     # Since modifying a node can have severe effects on currently running
5844     # operations the resource lock is at least acquired in shared mode
5845     self.needed_locks[locking.LEVEL_NODE_RES] = \
5846       self.needed_locks[locking.LEVEL_NODE]
5847
5848     # Get node resource and instance locks in shared mode; they are not used
5849     # for anything but read-only access
5850     self.share_locks[locking.LEVEL_NODE_RES] = 1
5851     self.share_locks[locking.LEVEL_INSTANCE] = 1
5852
5853     if self.lock_instances:
5854       self.needed_locks[locking.LEVEL_INSTANCE] = \
5855         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5856
5857   def BuildHooksEnv(self):
5858     """Build hooks env.
5859
5860     This runs on the master node.
5861
5862     """
5863     return {
5864       "OP_TARGET": self.op.node_name,
5865       "MASTER_CANDIDATE": str(self.op.master_candidate),
5866       "OFFLINE": str(self.op.offline),
5867       "DRAINED": str(self.op.drained),
5868       "MASTER_CAPABLE": str(self.op.master_capable),
5869       "VM_CAPABLE": str(self.op.vm_capable),
5870       }
5871
5872   def BuildHooksNodes(self):
5873     """Build hooks nodes.
5874
5875     """
5876     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5877     return (nl, nl)
5878
5879   def CheckPrereq(self):
5880     """Check prerequisites.
5881
5882     This only checks the instance list against the existing names.
5883
5884     """
5885     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5886
5887     if self.lock_instances:
5888       affected_instances = \
5889         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5890
5891       # Verify instance locks
5892       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5893       wanted_instances = frozenset(affected_instances.keys())
5894       if wanted_instances - owned_instances:
5895         raise errors.OpPrereqError("Instances affected by changing node %s's"
5896                                    " secondary IP address have changed since"
5897                                    " locks were acquired, wanted '%s', have"
5898                                    " '%s'; retry the operation" %
5899                                    (self.op.node_name,
5900                                     utils.CommaJoin(wanted_instances),
5901                                     utils.CommaJoin(owned_instances)),
5902                                    errors.ECODE_STATE)
5903     else:
5904       affected_instances = None
5905
5906     if (self.op.master_candidate is not None or
5907         self.op.drained is not None or
5908         self.op.offline is not None):
5909       # we can't change the master's node flags
5910       if self.op.node_name == self.cfg.GetMasterNode():
5911         raise errors.OpPrereqError("The master role can be changed"
5912                                    " only via master-failover",
5913                                    errors.ECODE_INVAL)
5914
5915     if self.op.master_candidate and not node.master_capable:
5916       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5917                                  " it a master candidate" % node.name,
5918                                  errors.ECODE_STATE)
5919
5920     if self.op.vm_capable == False:
5921       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5922       if ipri or isec:
5923         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5924                                    " the vm_capable flag" % node.name,
5925                                    errors.ECODE_STATE)
5926
5927     if node.master_candidate and self.might_demote and not self.lock_all:
5928       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5929       # check if after removing the current node, we're missing master
5930       # candidates
5931       (mc_remaining, mc_should, _) = \
5932           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5933       if mc_remaining < mc_should:
5934         raise errors.OpPrereqError("Not enough master candidates, please"
5935                                    " pass auto promote option to allow"
5936                                    " promotion (--auto-promote or RAPI"
5937                                    " auto_promote=True)", errors.ECODE_STATE)
5938
5939     self.old_flags = old_flags = (node.master_candidate,
5940                                   node.drained, node.offline)
5941     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5942     self.old_role = old_role = self._F2R[old_flags]
5943
5944     # Check for ineffective changes
5945     for attr in self._FLAGS:
5946       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5947         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5948         setattr(self.op, attr, None)
5949
5950     # Past this point, any flag change to False means a transition
5951     # away from the respective state, as only real changes are kept
5952
5953     # TODO: We might query the real power state if it supports OOB
5954     if _SupportsOob(self.cfg, node):
5955       if self.op.offline is False and not (node.powered or
5956                                            self.op.powered == True):
5957         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5958                                     " offline status can be reset") %
5959                                    self.op.node_name)
5960     elif self.op.powered is not None:
5961       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5962                                   " as it does not support out-of-band"
5963                                   " handling") % self.op.node_name)
5964
5965     # If we're being deofflined/drained, we'll MC ourself if needed
5966     if (self.op.drained == False or self.op.offline == False or
5967         (self.op.master_capable and not node.master_capable)):
5968       if _DecideSelfPromotion(self):
5969         self.op.master_candidate = True
5970         self.LogInfo("Auto-promoting node to master candidate")
5971
5972     # If we're no longer master capable, we'll demote ourselves from MC
5973     if self.op.master_capable == False and node.master_candidate:
5974       self.LogInfo("Demoting from master candidate")
5975       self.op.master_candidate = False
5976
5977     # Compute new role
5978     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5979     if self.op.master_candidate:
5980       new_role = self._ROLE_CANDIDATE
5981     elif self.op.drained:
5982       new_role = self._ROLE_DRAINED
5983     elif self.op.offline:
5984       new_role = self._ROLE_OFFLINE
5985     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5986       # False is still in new flags, which means we're un-setting (the
5987       # only) True flag
5988       new_role = self._ROLE_REGULAR
5989     else: # no new flags, nothing, keep old role
5990       new_role = old_role
5991
5992     self.new_role = new_role
5993
5994     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5995       # Trying to transition out of offline status
5996       result = self.rpc.call_version([node.name])[node.name]
5997       if result.fail_msg:
5998         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5999                                    " to report its version: %s" %
6000                                    (node.name, result.fail_msg),
6001                                    errors.ECODE_STATE)
6002       else:
6003         self.LogWarning("Transitioning node from offline to online state"
6004                         " without using re-add. Please make sure the node"
6005                         " is healthy!")
6006
6007     if self.op.secondary_ip:
6008       # Ok even without locking, because this can't be changed by any LU
6009       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6010       master_singlehomed = master.secondary_ip == master.primary_ip
6011       if master_singlehomed and self.op.secondary_ip:
6012         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6013                                    " homed cluster", errors.ECODE_INVAL)
6014
6015       assert not (frozenset(affected_instances) -
6016                   self.owned_locks(locking.LEVEL_INSTANCE))
6017
6018       if node.offline:
6019         if affected_instances:
6020           raise errors.OpPrereqError("Cannot change secondary IP address:"
6021                                      " offline node has instances (%s)"
6022                                      " configured to use it" %
6023                                      utils.CommaJoin(affected_instances.keys()))
6024       else:
6025         # On online nodes, check that no instances are running, and that
6026         # the node has the new ip and we can reach it.
6027         for instance in affected_instances.values():
6028           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6029                               msg="cannot change secondary ip")
6030
6031         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6032         if master.name != node.name:
6033           # check reachability from master secondary ip to new secondary ip
6034           if not netutils.TcpPing(self.op.secondary_ip,
6035                                   constants.DEFAULT_NODED_PORT,
6036                                   source=master.secondary_ip):
6037             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6038                                        " based ping to node daemon port",
6039                                        errors.ECODE_ENVIRON)
6040
6041     if self.op.ndparams:
6042       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6043       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6044       self.new_ndparams = new_ndparams
6045
6046     if self.op.hv_state:
6047       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6048                                                  self.node.hv_state_static)
6049
6050     if self.op.disk_state:
6051       self.new_disk_state = \
6052         _MergeAndVerifyDiskState(self.op.disk_state,
6053                                  self.node.disk_state_static)
6054
6055   def Exec(self, feedback_fn):
6056     """Modifies a node.
6057
6058     """
6059     node = self.node
6060     old_role = self.old_role
6061     new_role = self.new_role
6062
6063     result = []
6064
6065     if self.op.ndparams:
6066       node.ndparams = self.new_ndparams
6067
6068     if self.op.powered is not None:
6069       node.powered = self.op.powered
6070
6071     if self.op.hv_state:
6072       node.hv_state_static = self.new_hv_state
6073
6074     if self.op.disk_state:
6075       node.disk_state_static = self.new_disk_state
6076
6077     for attr in ["master_capable", "vm_capable"]:
6078       val = getattr(self.op, attr)
6079       if val is not None:
6080         setattr(node, attr, val)
6081         result.append((attr, str(val)))
6082
6083     if new_role != old_role:
6084       # Tell the node to demote itself, if no longer MC and not offline
6085       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6086         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6087         if msg:
6088           self.LogWarning("Node failed to demote itself: %s", msg)
6089
6090       new_flags = self._R2F[new_role]
6091       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6092         if of != nf:
6093           result.append((desc, str(nf)))
6094       (node.master_candidate, node.drained, node.offline) = new_flags
6095
6096       # we locked all nodes, we adjust the CP before updating this node
6097       if self.lock_all:
6098         _AdjustCandidatePool(self, [node.name])
6099
6100     if self.op.secondary_ip:
6101       node.secondary_ip = self.op.secondary_ip
6102       result.append(("secondary_ip", self.op.secondary_ip))
6103
6104     # this will trigger configuration file update, if needed
6105     self.cfg.Update(node, feedback_fn)
6106
6107     # this will trigger job queue propagation or cleanup if the mc
6108     # flag changed
6109     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6110       self.context.ReaddNode(node)
6111
6112     return result
6113
6114
6115 class LUNodePowercycle(NoHooksLU):
6116   """Powercycles a node.
6117
6118   """
6119   REQ_BGL = False
6120
6121   def CheckArguments(self):
6122     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6123     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6124       raise errors.OpPrereqError("The node is the master and the force"
6125                                  " parameter was not set",
6126                                  errors.ECODE_INVAL)
6127
6128   def ExpandNames(self):
6129     """Locking for PowercycleNode.
6130
6131     This is a last-resort option and shouldn't block on other
6132     jobs. Therefore, we grab no locks.
6133
6134     """
6135     self.needed_locks = {}
6136
6137   def Exec(self, feedback_fn):
6138     """Reboots a node.
6139
6140     """
6141     result = self.rpc.call_node_powercycle(self.op.node_name,
6142                                            self.cfg.GetHypervisorType())
6143     result.Raise("Failed to schedule the reboot")
6144     return result.payload
6145
6146
6147 class LUClusterQuery(NoHooksLU):
6148   """Query cluster configuration.
6149
6150   """
6151   REQ_BGL = False
6152
6153   def ExpandNames(self):
6154     self.needed_locks = {}
6155
6156   def Exec(self, feedback_fn):
6157     """Return cluster config.
6158
6159     """
6160     cluster = self.cfg.GetClusterInfo()
6161     os_hvp = {}
6162
6163     # Filter just for enabled hypervisors
6164     for os_name, hv_dict in cluster.os_hvp.items():
6165       os_hvp[os_name] = {}
6166       for hv_name, hv_params in hv_dict.items():
6167         if hv_name in cluster.enabled_hypervisors:
6168           os_hvp[os_name][hv_name] = hv_params
6169
6170     # Convert ip_family to ip_version
6171     primary_ip_version = constants.IP4_VERSION
6172     if cluster.primary_ip_family == netutils.IP6Address.family:
6173       primary_ip_version = constants.IP6_VERSION
6174
6175     result = {
6176       "software_version": constants.RELEASE_VERSION,
6177       "protocol_version": constants.PROTOCOL_VERSION,
6178       "config_version": constants.CONFIG_VERSION,
6179       "os_api_version": max(constants.OS_API_VERSIONS),
6180       "export_version": constants.EXPORT_VERSION,
6181       "architecture": runtime.GetArchInfo(),
6182       "name": cluster.cluster_name,
6183       "master": cluster.master_node,
6184       "default_hypervisor": cluster.primary_hypervisor,
6185       "enabled_hypervisors": cluster.enabled_hypervisors,
6186       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6187                         for hypervisor_name in cluster.enabled_hypervisors]),
6188       "os_hvp": os_hvp,
6189       "beparams": cluster.beparams,
6190       "osparams": cluster.osparams,
6191       "ipolicy": cluster.ipolicy,
6192       "nicparams": cluster.nicparams,
6193       "ndparams": cluster.ndparams,
6194       "diskparams": cluster.diskparams,
6195       "candidate_pool_size": cluster.candidate_pool_size,
6196       "master_netdev": cluster.master_netdev,
6197       "master_netmask": cluster.master_netmask,
6198       "use_external_mip_script": cluster.use_external_mip_script,
6199       "volume_group_name": cluster.volume_group_name,
6200       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6201       "file_storage_dir": cluster.file_storage_dir,
6202       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6203       "maintain_node_health": cluster.maintain_node_health,
6204       "ctime": cluster.ctime,
6205       "mtime": cluster.mtime,
6206       "uuid": cluster.uuid,
6207       "tags": list(cluster.GetTags()),
6208       "uid_pool": cluster.uid_pool,
6209       "default_iallocator": cluster.default_iallocator,
6210       "reserved_lvs": cluster.reserved_lvs,
6211       "primary_ip_version": primary_ip_version,
6212       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6213       "hidden_os": cluster.hidden_os,
6214       "blacklisted_os": cluster.blacklisted_os,
6215       }
6216
6217     return result
6218
6219
6220 class LUClusterConfigQuery(NoHooksLU):
6221   """Return configuration values.
6222
6223   """
6224   REQ_BGL = False
6225
6226   def CheckArguments(self):
6227     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6228
6229   def ExpandNames(self):
6230     self.cq.ExpandNames(self)
6231
6232   def DeclareLocks(self, level):
6233     self.cq.DeclareLocks(self, level)
6234
6235   def Exec(self, feedback_fn):
6236     result = self.cq.OldStyleQuery(self)
6237
6238     assert len(result) == 1
6239
6240     return result[0]
6241
6242
6243 class _ClusterQuery(_QueryBase):
6244   FIELDS = query.CLUSTER_FIELDS
6245
6246   #: Do not sort (there is only one item)
6247   SORT_FIELD = None
6248
6249   def ExpandNames(self, lu):
6250     lu.needed_locks = {}
6251
6252     # The following variables interact with _QueryBase._GetNames
6253     self.wanted = locking.ALL_SET
6254     self.do_locking = self.use_locking
6255
6256     if self.do_locking:
6257       raise errors.OpPrereqError("Can not use locking for cluster queries",
6258                                  errors.ECODE_INVAL)
6259
6260   def DeclareLocks(self, lu, level):
6261     pass
6262
6263   def _GetQueryData(self, lu):
6264     """Computes the list of nodes and their attributes.
6265
6266     """
6267     # Locking is not used
6268     assert not (compat.any(lu.glm.is_owned(level)
6269                            for level in locking.LEVELS
6270                            if level != locking.LEVEL_CLUSTER) or
6271                 self.do_locking or self.use_locking)
6272
6273     if query.CQ_CONFIG in self.requested_data:
6274       cluster = lu.cfg.GetClusterInfo()
6275     else:
6276       cluster = NotImplemented
6277
6278     if query.CQ_QUEUE_DRAINED in self.requested_data:
6279       drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6280     else:
6281       drain_flag = NotImplemented
6282
6283     if query.CQ_WATCHER_PAUSE in self.requested_data:
6284       watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6285     else:
6286       watcher_pause = NotImplemented
6287
6288     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6289
6290
6291 class LUInstanceActivateDisks(NoHooksLU):
6292   """Bring up an instance's disks.
6293
6294   """
6295   REQ_BGL = False
6296
6297   def ExpandNames(self):
6298     self._ExpandAndLockInstance()
6299     self.needed_locks[locking.LEVEL_NODE] = []
6300     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6301
6302   def DeclareLocks(self, level):
6303     if level == locking.LEVEL_NODE:
6304       self._LockInstancesNodes()
6305
6306   def CheckPrereq(self):
6307     """Check prerequisites.
6308
6309     This checks that the instance is in the cluster.
6310
6311     """
6312     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6313     assert self.instance is not None, \
6314       "Cannot retrieve locked instance %s" % self.op.instance_name
6315     _CheckNodeOnline(self, self.instance.primary_node)
6316
6317   def Exec(self, feedback_fn):
6318     """Activate the disks.
6319
6320     """
6321     disks_ok, disks_info = \
6322               _AssembleInstanceDisks(self, self.instance,
6323                                      ignore_size=self.op.ignore_size)
6324     if not disks_ok:
6325       raise errors.OpExecError("Cannot activate block devices")
6326
6327     return disks_info
6328
6329
6330 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6331                            ignore_size=False, check=True):
6332   """Prepare the block devices for an instance.
6333
6334   This sets up the block devices on all nodes.
6335
6336   @type lu: L{LogicalUnit}
6337   @param lu: the logical unit on whose behalf we execute
6338   @type instance: L{objects.Instance}
6339   @param instance: the instance for whose disks we assemble
6340   @type disks: list of L{objects.Disk} or None
6341   @param disks: which disks to assemble (or all, if None)
6342   @type ignore_secondaries: boolean
6343   @param ignore_secondaries: if true, errors on secondary nodes
6344       won't result in an error return from the function
6345   @type ignore_size: boolean
6346   @param ignore_size: if true, the current known size of the disk
6347       will not be used during the disk activation, useful for cases
6348       when the size is wrong
6349   @return: False if the operation failed, otherwise a list of
6350       (host, instance_visible_name, node_visible_name)
6351       with the mapping from node devices to instance devices
6352
6353   """
6354   device_info = []
6355   disks_ok = True
6356   iname = instance.name
6357   if check:
6358     disks = _ExpandCheckDisks(instance, disks)
6359
6360   # With the two passes mechanism we try to reduce the window of
6361   # opportunity for the race condition of switching DRBD to primary
6362   # before handshaking occured, but we do not eliminate it
6363
6364   # The proper fix would be to wait (with some limits) until the
6365   # connection has been made and drbd transitions from WFConnection
6366   # into any other network-connected state (Connected, SyncTarget,
6367   # SyncSource, etc.)
6368
6369   # 1st pass, assemble on all nodes in secondary mode
6370   for idx, inst_disk in enumerate(disks):
6371     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6372       if ignore_size:
6373         node_disk = node_disk.Copy()
6374         node_disk.UnsetSize()
6375       lu.cfg.SetDiskID(node_disk, node)
6376       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6377                                              False, idx)
6378       msg = result.fail_msg
6379       if msg:
6380         is_offline_secondary = (node in instance.secondary_nodes and
6381                                 result.offline)
6382         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6383                            " (is_primary=False, pass=1): %s",
6384                            inst_disk.iv_name, node, msg)
6385         if not (ignore_secondaries or is_offline_secondary):
6386           disks_ok = False
6387
6388   # FIXME: race condition on drbd migration to primary
6389
6390   # 2nd pass, do only the primary node
6391   for idx, inst_disk in enumerate(disks):
6392     dev_path = None
6393
6394     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6395       if node != instance.primary_node:
6396         continue
6397       if ignore_size:
6398         node_disk = node_disk.Copy()
6399         node_disk.UnsetSize()
6400       lu.cfg.SetDiskID(node_disk, node)
6401       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6402                                              True, idx)
6403       msg = result.fail_msg
6404       if msg:
6405         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6406                            " (is_primary=True, pass=2): %s",
6407                            inst_disk.iv_name, node, msg)
6408         disks_ok = False
6409       else:
6410         dev_path = result.payload
6411
6412     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6413
6414   # leave the disks configured for the primary node
6415   # this is a workaround that would be fixed better by
6416   # improving the logical/physical id handling
6417   for disk in disks:
6418     lu.cfg.SetDiskID(disk, instance.primary_node)
6419
6420   return disks_ok, device_info
6421
6422
6423 def _StartInstanceDisks(lu, instance, force):
6424   """Start the disks of an instance.
6425
6426   """
6427   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6428                                            ignore_secondaries=force)
6429   if not disks_ok:
6430     _ShutdownInstanceDisks(lu, instance)
6431     if force is not None and not force:
6432       lu.proc.LogWarning("", hint="If the message above refers to a"
6433                          " secondary node,"
6434                          " you can retry the operation using '--force'.")
6435     raise errors.OpExecError("Disk consistency error")
6436
6437
6438 class LUInstanceDeactivateDisks(NoHooksLU):
6439   """Shutdown an instance's disks.
6440
6441   """
6442   REQ_BGL = False
6443
6444   def ExpandNames(self):
6445     self._ExpandAndLockInstance()
6446     self.needed_locks[locking.LEVEL_NODE] = []
6447     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6448
6449   def DeclareLocks(self, level):
6450     if level == locking.LEVEL_NODE:
6451       self._LockInstancesNodes()
6452
6453   def CheckPrereq(self):
6454     """Check prerequisites.
6455
6456     This checks that the instance is in the cluster.
6457
6458     """
6459     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6460     assert self.instance is not None, \
6461       "Cannot retrieve locked instance %s" % self.op.instance_name
6462
6463   def Exec(self, feedback_fn):
6464     """Deactivate the disks
6465
6466     """
6467     instance = self.instance
6468     if self.op.force:
6469       _ShutdownInstanceDisks(self, instance)
6470     else:
6471       _SafeShutdownInstanceDisks(self, instance)
6472
6473
6474 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6475   """Shutdown block devices of an instance.
6476
6477   This function checks if an instance is running, before calling
6478   _ShutdownInstanceDisks.
6479
6480   """
6481   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6482   _ShutdownInstanceDisks(lu, instance, disks=disks)
6483
6484
6485 def _ExpandCheckDisks(instance, disks):
6486   """Return the instance disks selected by the disks list
6487
6488   @type disks: list of L{objects.Disk} or None
6489   @param disks: selected disks
6490   @rtype: list of L{objects.Disk}
6491   @return: selected instance disks to act on
6492
6493   """
6494   if disks is None:
6495     return instance.disks
6496   else:
6497     if not set(disks).issubset(instance.disks):
6498       raise errors.ProgrammerError("Can only act on disks belonging to the"
6499                                    " target instance")
6500     return disks
6501
6502
6503 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6504   """Shutdown block devices of an instance.
6505
6506   This does the shutdown on all nodes of the instance.
6507
6508   If the ignore_primary is false, errors on the primary node are
6509   ignored.
6510
6511   """
6512   all_result = True
6513   disks = _ExpandCheckDisks(instance, disks)
6514
6515   for disk in disks:
6516     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6517       lu.cfg.SetDiskID(top_disk, node)
6518       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6519       msg = result.fail_msg
6520       if msg:
6521         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6522                       disk.iv_name, node, msg)
6523         if ((node == instance.primary_node and not ignore_primary) or
6524             (node != instance.primary_node and not result.offline)):
6525           all_result = False
6526   return all_result
6527
6528
6529 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6530   """Checks if a node has enough free memory.
6531
6532   This function check if a given node has the needed amount of free
6533   memory. In case the node has less memory or we cannot get the
6534   information from the node, this function raise an OpPrereqError
6535   exception.
6536
6537   @type lu: C{LogicalUnit}
6538   @param lu: a logical unit from which we get configuration data
6539   @type node: C{str}
6540   @param node: the node to check
6541   @type reason: C{str}
6542   @param reason: string to use in the error message
6543   @type requested: C{int}
6544   @param requested: the amount of memory in MiB to check for
6545   @type hypervisor_name: C{str}
6546   @param hypervisor_name: the hypervisor to ask for memory stats
6547   @rtype: integer
6548   @return: node current free memory
6549   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6550       we cannot check the node
6551
6552   """
6553   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6554   nodeinfo[node].Raise("Can't get data from node %s" % node,
6555                        prereq=True, ecode=errors.ECODE_ENVIRON)
6556   (_, _, (hv_info, )) = nodeinfo[node].payload
6557
6558   free_mem = hv_info.get("memory_free", None)
6559   if not isinstance(free_mem, int):
6560     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6561                                " was '%s'" % (node, free_mem),
6562                                errors.ECODE_ENVIRON)
6563   if requested > free_mem:
6564     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6565                                " needed %s MiB, available %s MiB" %
6566                                (node, reason, requested, free_mem),
6567                                errors.ECODE_NORES)
6568   return free_mem
6569
6570
6571 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6572   """Checks if nodes have enough free disk space in the all VGs.
6573
6574   This function check if all given nodes have the needed amount of
6575   free disk. In case any node has less disk or we cannot get the
6576   information from the node, this function raise an OpPrereqError
6577   exception.
6578
6579   @type lu: C{LogicalUnit}
6580   @param lu: a logical unit from which we get configuration data
6581   @type nodenames: C{list}
6582   @param nodenames: the list of node names to check
6583   @type req_sizes: C{dict}
6584   @param req_sizes: the hash of vg and corresponding amount of disk in
6585       MiB to check for
6586   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6587       or we cannot check the node
6588
6589   """
6590   for vg, req_size in req_sizes.items():
6591     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6592
6593
6594 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6595   """Checks if nodes have enough free disk space in the specified VG.
6596
6597   This function check if all given nodes have the needed amount of
6598   free disk. In case any node has less disk or we cannot get the
6599   information from the node, this function raise an OpPrereqError
6600   exception.
6601
6602   @type lu: C{LogicalUnit}
6603   @param lu: a logical unit from which we get configuration data
6604   @type nodenames: C{list}
6605   @param nodenames: the list of node names to check
6606   @type vg: C{str}
6607   @param vg: the volume group to check
6608   @type requested: C{int}
6609   @param requested: the amount of disk in MiB to check for
6610   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6611       or we cannot check the node
6612
6613   """
6614   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6615   for node in nodenames:
6616     info = nodeinfo[node]
6617     info.Raise("Cannot get current information from node %s" % node,
6618                prereq=True, ecode=errors.ECODE_ENVIRON)
6619     (_, (vg_info, ), _) = info.payload
6620     vg_free = vg_info.get("vg_free", None)
6621     if not isinstance(vg_free, int):
6622       raise errors.OpPrereqError("Can't compute free disk space on node"
6623                                  " %s for vg %s, result was '%s'" %
6624                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6625     if requested > vg_free:
6626       raise errors.OpPrereqError("Not enough disk space on target node %s"
6627                                  " vg %s: required %d MiB, available %d MiB" %
6628                                  (node, vg, requested, vg_free),
6629                                  errors.ECODE_NORES)
6630
6631
6632 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6633   """Checks if nodes have enough physical CPUs
6634
6635   This function checks if all given nodes have the needed number of
6636   physical CPUs. In case any node has less CPUs or we cannot get the
6637   information from the node, this function raises an OpPrereqError
6638   exception.
6639
6640   @type lu: C{LogicalUnit}
6641   @param lu: a logical unit from which we get configuration data
6642   @type nodenames: C{list}
6643   @param nodenames: the list of node names to check
6644   @type requested: C{int}
6645   @param requested: the minimum acceptable number of physical CPUs
6646   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6647       or we cannot check the node
6648
6649   """
6650   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6651   for node in nodenames:
6652     info = nodeinfo[node]
6653     info.Raise("Cannot get current information from node %s" % node,
6654                prereq=True, ecode=errors.ECODE_ENVIRON)
6655     (_, _, (hv_info, )) = info.payload
6656     num_cpus = hv_info.get("cpu_total", None)
6657     if not isinstance(num_cpus, int):
6658       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6659                                  " on node %s, result was '%s'" %
6660                                  (node, num_cpus), errors.ECODE_ENVIRON)
6661     if requested > num_cpus:
6662       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6663                                  "required" % (node, num_cpus, requested),
6664                                  errors.ECODE_NORES)
6665
6666
6667 class LUInstanceStartup(LogicalUnit):
6668   """Starts an instance.
6669
6670   """
6671   HPATH = "instance-start"
6672   HTYPE = constants.HTYPE_INSTANCE
6673   REQ_BGL = False
6674
6675   def CheckArguments(self):
6676     # extra beparams
6677     if self.op.beparams:
6678       # fill the beparams dict
6679       objects.UpgradeBeParams(self.op.beparams)
6680       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6681
6682   def ExpandNames(self):
6683     self._ExpandAndLockInstance()
6684     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6685
6686   def DeclareLocks(self, level):
6687     if level == locking.LEVEL_NODE_RES:
6688       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6689
6690   def BuildHooksEnv(self):
6691     """Build hooks env.
6692
6693     This runs on master, primary and secondary nodes of the instance.
6694
6695     """
6696     env = {
6697       "FORCE": self.op.force,
6698       }
6699
6700     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6701
6702     return env
6703
6704   def BuildHooksNodes(self):
6705     """Build hooks nodes.
6706
6707     """
6708     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6709     return (nl, nl)
6710
6711   def CheckPrereq(self):
6712     """Check prerequisites.
6713
6714     This checks that the instance is in the cluster.
6715
6716     """
6717     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6718     assert self.instance is not None, \
6719       "Cannot retrieve locked instance %s" % self.op.instance_name
6720
6721     # extra hvparams
6722     if self.op.hvparams:
6723       # check hypervisor parameter syntax (locally)
6724       cluster = self.cfg.GetClusterInfo()
6725       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6726       filled_hvp = cluster.FillHV(instance)
6727       filled_hvp.update(self.op.hvparams)
6728       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6729       hv_type.CheckParameterSyntax(filled_hvp)
6730       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6731
6732     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6733
6734     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6735
6736     if self.primary_offline and self.op.ignore_offline_nodes:
6737       self.proc.LogWarning("Ignoring offline primary node")
6738
6739       if self.op.hvparams or self.op.beparams:
6740         self.proc.LogWarning("Overridden parameters are ignored")
6741     else:
6742       _CheckNodeOnline(self, instance.primary_node)
6743
6744       bep = self.cfg.GetClusterInfo().FillBE(instance)
6745       bep.update(self.op.beparams)
6746
6747       # check bridges existence
6748       _CheckInstanceBridgesExist(self, instance)
6749
6750       remote_info = self.rpc.call_instance_info(instance.primary_node,
6751                                                 instance.name,
6752                                                 instance.hypervisor)
6753       remote_info.Raise("Error checking node %s" % instance.primary_node,
6754                         prereq=True, ecode=errors.ECODE_ENVIRON)
6755       if not remote_info.payload: # not running already
6756         _CheckNodeFreeMemory(self, instance.primary_node,
6757                              "starting instance %s" % instance.name,
6758                              bep[constants.BE_MINMEM], instance.hypervisor)
6759
6760   def Exec(self, feedback_fn):
6761     """Start the instance.
6762
6763     """
6764     instance = self.instance
6765     force = self.op.force
6766
6767     if not self.op.no_remember:
6768       self.cfg.MarkInstanceUp(instance.name)
6769
6770     if self.primary_offline:
6771       assert self.op.ignore_offline_nodes
6772       self.proc.LogInfo("Primary node offline, marked instance as started")
6773     else:
6774       node_current = instance.primary_node
6775
6776       _StartInstanceDisks(self, instance, force)
6777
6778       result = \
6779         self.rpc.call_instance_start(node_current,
6780                                      (instance, self.op.hvparams,
6781                                       self.op.beparams),
6782                                      self.op.startup_paused)
6783       msg = result.fail_msg
6784       if msg:
6785         _ShutdownInstanceDisks(self, instance)
6786         raise errors.OpExecError("Could not start instance: %s" % msg)
6787
6788
6789 class LUInstanceReboot(LogicalUnit):
6790   """Reboot an instance.
6791
6792   """
6793   HPATH = "instance-reboot"
6794   HTYPE = constants.HTYPE_INSTANCE
6795   REQ_BGL = False
6796
6797   def ExpandNames(self):
6798     self._ExpandAndLockInstance()
6799
6800   def BuildHooksEnv(self):
6801     """Build hooks env.
6802
6803     This runs on master, primary and secondary nodes of the instance.
6804
6805     """
6806     env = {
6807       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6808       "REBOOT_TYPE": self.op.reboot_type,
6809       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6810       }
6811
6812     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6813
6814     return env
6815
6816   def BuildHooksNodes(self):
6817     """Build hooks nodes.
6818
6819     """
6820     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6821     return (nl, nl)
6822
6823   def CheckPrereq(self):
6824     """Check prerequisites.
6825
6826     This checks that the instance is in the cluster.
6827
6828     """
6829     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6830     assert self.instance is not None, \
6831       "Cannot retrieve locked instance %s" % self.op.instance_name
6832     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6833     _CheckNodeOnline(self, instance.primary_node)
6834
6835     # check bridges existence
6836     _CheckInstanceBridgesExist(self, instance)
6837
6838   def Exec(self, feedback_fn):
6839     """Reboot the instance.
6840
6841     """
6842     instance = self.instance
6843     ignore_secondaries = self.op.ignore_secondaries
6844     reboot_type = self.op.reboot_type
6845
6846     remote_info = self.rpc.call_instance_info(instance.primary_node,
6847                                               instance.name,
6848                                               instance.hypervisor)
6849     remote_info.Raise("Error checking node %s" % instance.primary_node)
6850     instance_running = bool(remote_info.payload)
6851
6852     node_current = instance.primary_node
6853
6854     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6855                                             constants.INSTANCE_REBOOT_HARD]:
6856       for disk in instance.disks:
6857         self.cfg.SetDiskID(disk, node_current)
6858       result = self.rpc.call_instance_reboot(node_current, instance,
6859                                              reboot_type,
6860                                              self.op.shutdown_timeout)
6861       result.Raise("Could not reboot instance")
6862     else:
6863       if instance_running:
6864         result = self.rpc.call_instance_shutdown(node_current, instance,
6865                                                  self.op.shutdown_timeout)
6866         result.Raise("Could not shutdown instance for full reboot")
6867         _ShutdownInstanceDisks(self, instance)
6868       else:
6869         self.LogInfo("Instance %s was already stopped, starting now",
6870                      instance.name)
6871       _StartInstanceDisks(self, instance, ignore_secondaries)
6872       result = self.rpc.call_instance_start(node_current,
6873                                             (instance, None, None), False)
6874       msg = result.fail_msg
6875       if msg:
6876         _ShutdownInstanceDisks(self, instance)
6877         raise errors.OpExecError("Could not start instance for"
6878                                  " full reboot: %s" % msg)
6879
6880     self.cfg.MarkInstanceUp(instance.name)
6881
6882
6883 class LUInstanceShutdown(LogicalUnit):
6884   """Shutdown an instance.
6885
6886   """
6887   HPATH = "instance-stop"
6888   HTYPE = constants.HTYPE_INSTANCE
6889   REQ_BGL = False
6890
6891   def ExpandNames(self):
6892     self._ExpandAndLockInstance()
6893
6894   def BuildHooksEnv(self):
6895     """Build hooks env.
6896
6897     This runs on master, primary and secondary nodes of the instance.
6898
6899     """
6900     env = _BuildInstanceHookEnvByObject(self, self.instance)
6901     env["TIMEOUT"] = self.op.timeout
6902     return env
6903
6904   def BuildHooksNodes(self):
6905     """Build hooks nodes.
6906
6907     """
6908     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6909     return (nl, nl)
6910
6911   def CheckPrereq(self):
6912     """Check prerequisites.
6913
6914     This checks that the instance is in the cluster.
6915
6916     """
6917     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6918     assert self.instance is not None, \
6919       "Cannot retrieve locked instance %s" % self.op.instance_name
6920
6921     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6922
6923     self.primary_offline = \
6924       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6925
6926     if self.primary_offline and self.op.ignore_offline_nodes:
6927       self.proc.LogWarning("Ignoring offline primary node")
6928     else:
6929       _CheckNodeOnline(self, self.instance.primary_node)
6930
6931   def Exec(self, feedback_fn):
6932     """Shutdown the instance.
6933
6934     """
6935     instance = self.instance
6936     node_current = instance.primary_node
6937     timeout = self.op.timeout
6938
6939     if not self.op.no_remember:
6940       self.cfg.MarkInstanceDown(instance.name)
6941
6942     if self.primary_offline:
6943       assert self.op.ignore_offline_nodes
6944       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6945     else:
6946       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6947       msg = result.fail_msg
6948       if msg:
6949         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6950
6951       _ShutdownInstanceDisks(self, instance)
6952
6953
6954 class LUInstanceReinstall(LogicalUnit):
6955   """Reinstall an instance.
6956
6957   """
6958   HPATH = "instance-reinstall"
6959   HTYPE = constants.HTYPE_INSTANCE
6960   REQ_BGL = False
6961
6962   def ExpandNames(self):
6963     self._ExpandAndLockInstance()
6964
6965   def BuildHooksEnv(self):
6966     """Build hooks env.
6967
6968     This runs on master, primary and secondary nodes of the instance.
6969
6970     """
6971     return _BuildInstanceHookEnvByObject(self, self.instance)
6972
6973   def BuildHooksNodes(self):
6974     """Build hooks nodes.
6975
6976     """
6977     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6978     return (nl, nl)
6979
6980   def CheckPrereq(self):
6981     """Check prerequisites.
6982
6983     This checks that the instance is in the cluster and is not running.
6984
6985     """
6986     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6987     assert instance is not None, \
6988       "Cannot retrieve locked instance %s" % self.op.instance_name
6989     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6990                      " offline, cannot reinstall")
6991
6992     if instance.disk_template == constants.DT_DISKLESS:
6993       raise errors.OpPrereqError("Instance '%s' has no disks" %
6994                                  self.op.instance_name,
6995                                  errors.ECODE_INVAL)
6996     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6997
6998     if self.op.os_type is not None:
6999       # OS verification
7000       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7001       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7002       instance_os = self.op.os_type
7003     else:
7004       instance_os = instance.os
7005
7006     nodelist = list(instance.all_nodes)
7007
7008     if self.op.osparams:
7009       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7010       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7011       self.os_inst = i_osdict # the new dict (without defaults)
7012     else:
7013       self.os_inst = None
7014
7015     self.instance = instance
7016
7017   def Exec(self, feedback_fn):
7018     """Reinstall the instance.
7019
7020     """
7021     inst = self.instance
7022
7023     if self.op.os_type is not None:
7024       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7025       inst.os = self.op.os_type
7026       # Write to configuration
7027       self.cfg.Update(inst, feedback_fn)
7028
7029     _StartInstanceDisks(self, inst, None)
7030     try:
7031       feedback_fn("Running the instance OS create scripts...")
7032       # FIXME: pass debug option from opcode to backend
7033       result = self.rpc.call_instance_os_add(inst.primary_node,
7034                                              (inst, self.os_inst), True,
7035                                              self.op.debug_level)
7036       result.Raise("Could not install OS for instance %s on node %s" %
7037                    (inst.name, inst.primary_node))
7038     finally:
7039       _ShutdownInstanceDisks(self, inst)
7040
7041
7042 class LUInstanceRecreateDisks(LogicalUnit):
7043   """Recreate an instance's missing disks.
7044
7045   """
7046   HPATH = "instance-recreate-disks"
7047   HTYPE = constants.HTYPE_INSTANCE
7048   REQ_BGL = False
7049
7050   _MODIFYABLE = frozenset([
7051     constants.IDISK_SIZE,
7052     constants.IDISK_MODE,
7053     ])
7054
7055   # New or changed disk parameters may have different semantics
7056   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7057     constants.IDISK_ADOPT,
7058
7059     # TODO: Implement support changing VG while recreating
7060     constants.IDISK_VG,
7061     constants.IDISK_METAVG,
7062     ]))
7063
7064   def CheckArguments(self):
7065     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7066       # Normalize and convert deprecated list of disk indices
7067       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7068
7069     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7070     if duplicates:
7071       raise errors.OpPrereqError("Some disks have been specified more than"
7072                                  " once: %s" % utils.CommaJoin(duplicates),
7073                                  errors.ECODE_INVAL)
7074
7075     for (idx, params) in self.op.disks:
7076       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7077       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7078       if unsupported:
7079         raise errors.OpPrereqError("Parameters for disk %s try to change"
7080                                    " unmodifyable parameter(s): %s" %
7081                                    (idx, utils.CommaJoin(unsupported)),
7082                                    errors.ECODE_INVAL)
7083
7084   def ExpandNames(self):
7085     self._ExpandAndLockInstance()
7086     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7087     if self.op.nodes:
7088       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7089       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7090     else:
7091       self.needed_locks[locking.LEVEL_NODE] = []
7092     self.needed_locks[locking.LEVEL_NODE_RES] = []
7093
7094   def DeclareLocks(self, level):
7095     if level == locking.LEVEL_NODE:
7096       # if we replace the nodes, we only need to lock the old primary,
7097       # otherwise we need to lock all nodes for disk re-creation
7098       primary_only = bool(self.op.nodes)
7099       self._LockInstancesNodes(primary_only=primary_only)
7100     elif level == locking.LEVEL_NODE_RES:
7101       # Copy node locks
7102       self.needed_locks[locking.LEVEL_NODE_RES] = \
7103         self.needed_locks[locking.LEVEL_NODE][:]
7104
7105   def BuildHooksEnv(self):
7106     """Build hooks env.
7107
7108     This runs on master, primary and secondary nodes of the instance.
7109
7110     """
7111     return _BuildInstanceHookEnvByObject(self, self.instance)
7112
7113   def BuildHooksNodes(self):
7114     """Build hooks nodes.
7115
7116     """
7117     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7118     return (nl, nl)
7119
7120   def CheckPrereq(self):
7121     """Check prerequisites.
7122
7123     This checks that the instance is in the cluster and is not running.
7124
7125     """
7126     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7127     assert instance is not None, \
7128       "Cannot retrieve locked instance %s" % self.op.instance_name
7129     if self.op.nodes:
7130       if len(self.op.nodes) != len(instance.all_nodes):
7131         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7132                                    " %d replacement nodes were specified" %
7133                                    (instance.name, len(instance.all_nodes),
7134                                     len(self.op.nodes)),
7135                                    errors.ECODE_INVAL)
7136       assert instance.disk_template != constants.DT_DRBD8 or \
7137           len(self.op.nodes) == 2
7138       assert instance.disk_template != constants.DT_PLAIN or \
7139           len(self.op.nodes) == 1
7140       primary_node = self.op.nodes[0]
7141     else:
7142       primary_node = instance.primary_node
7143     _CheckNodeOnline(self, primary_node)
7144
7145     if instance.disk_template == constants.DT_DISKLESS:
7146       raise errors.OpPrereqError("Instance '%s' has no disks" %
7147                                  self.op.instance_name, errors.ECODE_INVAL)
7148
7149     # if we replace nodes *and* the old primary is offline, we don't
7150     # check
7151     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7152     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7153     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7154     if not (self.op.nodes and old_pnode.offline):
7155       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7156                           msg="cannot recreate disks")
7157
7158     if self.op.disks:
7159       self.disks = dict(self.op.disks)
7160     else:
7161       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7162
7163     maxidx = max(self.disks.keys())
7164     if maxidx >= len(instance.disks):
7165       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7166                                  errors.ECODE_INVAL)
7167
7168     if (self.op.nodes and
7169         sorted(self.disks.keys()) != range(len(instance.disks))):
7170       raise errors.OpPrereqError("Can't recreate disks partially and"
7171                                  " change the nodes at the same time",
7172                                  errors.ECODE_INVAL)
7173
7174     self.instance = instance
7175
7176   def Exec(self, feedback_fn):
7177     """Recreate the disks.
7178
7179     """
7180     instance = self.instance
7181
7182     assert (self.owned_locks(locking.LEVEL_NODE) ==
7183             self.owned_locks(locking.LEVEL_NODE_RES))
7184
7185     to_skip = []
7186     mods = [] # keeps track of needed changes
7187
7188     for idx, disk in enumerate(instance.disks):
7189       try:
7190         changes = self.disks[idx]
7191       except KeyError:
7192         # Disk should not be recreated
7193         to_skip.append(idx)
7194         continue
7195
7196       # update secondaries for disks, if needed
7197       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7198         # need to update the nodes and minors
7199         assert len(self.op.nodes) == 2
7200         assert len(disk.logical_id) == 6 # otherwise disk internals
7201                                          # have changed
7202         (_, _, old_port, _, _, old_secret) = disk.logical_id
7203         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7204         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7205                   new_minors[0], new_minors[1], old_secret)
7206         assert len(disk.logical_id) == len(new_id)
7207       else:
7208         new_id = None
7209
7210       mods.append((idx, new_id, changes))
7211
7212     # now that we have passed all asserts above, we can apply the mods
7213     # in a single run (to avoid partial changes)
7214     for idx, new_id, changes in mods:
7215       disk = instance.disks[idx]
7216       if new_id is not None:
7217         assert disk.dev_type == constants.LD_DRBD8
7218         disk.logical_id = new_id
7219       if changes:
7220         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7221                     mode=changes.get(constants.IDISK_MODE, None))
7222
7223     # change primary node, if needed
7224     if self.op.nodes:
7225       instance.primary_node = self.op.nodes[0]
7226       self.LogWarning("Changing the instance's nodes, you will have to"
7227                       " remove any disks left on the older nodes manually")
7228
7229     if self.op.nodes:
7230       self.cfg.Update(instance, feedback_fn)
7231
7232     _CreateDisks(self, instance, to_skip=to_skip)
7233
7234
7235 class LUInstanceRename(LogicalUnit):
7236   """Rename an instance.
7237
7238   """
7239   HPATH = "instance-rename"
7240   HTYPE = constants.HTYPE_INSTANCE
7241
7242   def CheckArguments(self):
7243     """Check arguments.
7244
7245     """
7246     if self.op.ip_check and not self.op.name_check:
7247       # TODO: make the ip check more flexible and not depend on the name check
7248       raise errors.OpPrereqError("IP address check requires a name check",
7249                                  errors.ECODE_INVAL)
7250
7251   def BuildHooksEnv(self):
7252     """Build hooks env.
7253
7254     This runs on master, primary and secondary nodes of the instance.
7255
7256     """
7257     env = _BuildInstanceHookEnvByObject(self, self.instance)
7258     env["INSTANCE_NEW_NAME"] = self.op.new_name
7259     return env
7260
7261   def BuildHooksNodes(self):
7262     """Build hooks nodes.
7263
7264     """
7265     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7266     return (nl, nl)
7267
7268   def CheckPrereq(self):
7269     """Check prerequisites.
7270
7271     This checks that the instance is in the cluster and is not running.
7272
7273     """
7274     self.op.instance_name = _ExpandInstanceName(self.cfg,
7275                                                 self.op.instance_name)
7276     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7277     assert instance is not None
7278     _CheckNodeOnline(self, instance.primary_node)
7279     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7280                         msg="cannot rename")
7281     self.instance = instance
7282
7283     new_name = self.op.new_name
7284     if self.op.name_check:
7285       hostname = netutils.GetHostname(name=new_name)
7286       if hostname.name != new_name:
7287         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7288                      hostname.name)
7289       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7290         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7291                                     " same as given hostname '%s'") %
7292                                     (hostname.name, self.op.new_name),
7293                                     errors.ECODE_INVAL)
7294       new_name = self.op.new_name = hostname.name
7295       if (self.op.ip_check and
7296           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7297         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7298                                    (hostname.ip, new_name),
7299                                    errors.ECODE_NOTUNIQUE)
7300
7301     instance_list = self.cfg.GetInstanceList()
7302     if new_name in instance_list and new_name != instance.name:
7303       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7304                                  new_name, errors.ECODE_EXISTS)
7305
7306   def Exec(self, feedback_fn):
7307     """Rename the instance.
7308
7309     """
7310     inst = self.instance
7311     old_name = inst.name
7312
7313     rename_file_storage = False
7314     if (inst.disk_template in constants.DTS_FILEBASED and
7315         self.op.new_name != inst.name):
7316       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7317       rename_file_storage = True
7318
7319     self.cfg.RenameInstance(inst.name, self.op.new_name)
7320     # Change the instance lock. This is definitely safe while we hold the BGL.
7321     # Otherwise the new lock would have to be added in acquired mode.
7322     assert self.REQ_BGL
7323     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7324     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7325
7326     # re-read the instance from the configuration after rename
7327     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7328
7329     if rename_file_storage:
7330       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7331       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7332                                                      old_file_storage_dir,
7333                                                      new_file_storage_dir)
7334       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7335                    " (but the instance has been renamed in Ganeti)" %
7336                    (inst.primary_node, old_file_storage_dir,
7337                     new_file_storage_dir))
7338
7339     _StartInstanceDisks(self, inst, None)
7340     try:
7341       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7342                                                  old_name, self.op.debug_level)
7343       msg = result.fail_msg
7344       if msg:
7345         msg = ("Could not run OS rename script for instance %s on node %s"
7346                " (but the instance has been renamed in Ganeti): %s" %
7347                (inst.name, inst.primary_node, msg))
7348         self.proc.LogWarning(msg)
7349     finally:
7350       _ShutdownInstanceDisks(self, inst)
7351
7352     return inst.name
7353
7354
7355 class LUInstanceRemove(LogicalUnit):
7356   """Remove an instance.
7357
7358   """
7359   HPATH = "instance-remove"
7360   HTYPE = constants.HTYPE_INSTANCE
7361   REQ_BGL = False
7362
7363   def ExpandNames(self):
7364     self._ExpandAndLockInstance()
7365     self.needed_locks[locking.LEVEL_NODE] = []
7366     self.needed_locks[locking.LEVEL_NODE_RES] = []
7367     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7368
7369   def DeclareLocks(self, level):
7370     if level == locking.LEVEL_NODE:
7371       self._LockInstancesNodes()
7372     elif level == locking.LEVEL_NODE_RES:
7373       # Copy node locks
7374       self.needed_locks[locking.LEVEL_NODE_RES] = \
7375         self.needed_locks[locking.LEVEL_NODE][:]
7376
7377   def BuildHooksEnv(self):
7378     """Build hooks env.
7379
7380     This runs on master, primary and secondary nodes of the instance.
7381
7382     """
7383     env = _BuildInstanceHookEnvByObject(self, self.instance)
7384     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7385     return env
7386
7387   def BuildHooksNodes(self):
7388     """Build hooks nodes.
7389
7390     """
7391     nl = [self.cfg.GetMasterNode()]
7392     nl_post = list(self.instance.all_nodes) + nl
7393     return (nl, nl_post)
7394
7395   def CheckPrereq(self):
7396     """Check prerequisites.
7397
7398     This checks that the instance is in the cluster.
7399
7400     """
7401     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7402     assert self.instance is not None, \
7403       "Cannot retrieve locked instance %s" % self.op.instance_name
7404
7405   def Exec(self, feedback_fn):
7406     """Remove the instance.
7407
7408     """
7409     instance = self.instance
7410     logging.info("Shutting down instance %s on node %s",
7411                  instance.name, instance.primary_node)
7412
7413     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7414                                              self.op.shutdown_timeout)
7415     msg = result.fail_msg
7416     if msg:
7417       if self.op.ignore_failures:
7418         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7419       else:
7420         raise errors.OpExecError("Could not shutdown instance %s on"
7421                                  " node %s: %s" %
7422                                  (instance.name, instance.primary_node, msg))
7423
7424     assert (self.owned_locks(locking.LEVEL_NODE) ==
7425             self.owned_locks(locking.LEVEL_NODE_RES))
7426     assert not (set(instance.all_nodes) -
7427                 self.owned_locks(locking.LEVEL_NODE)), \
7428       "Not owning correct locks"
7429
7430     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7431
7432
7433 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7434   """Utility function to remove an instance.
7435
7436   """
7437   logging.info("Removing block devices for instance %s", instance.name)
7438
7439   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7440     if not ignore_failures:
7441       raise errors.OpExecError("Can't remove instance's disks")
7442     feedback_fn("Warning: can't remove instance's disks")
7443
7444   logging.info("Removing instance %s out of cluster config", instance.name)
7445
7446   lu.cfg.RemoveInstance(instance.name)
7447
7448   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7449     "Instance lock removal conflict"
7450
7451   # Remove lock for the instance
7452   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7453
7454
7455 class LUInstanceQuery(NoHooksLU):
7456   """Logical unit for querying instances.
7457
7458   """
7459   # pylint: disable=W0142
7460   REQ_BGL = False
7461
7462   def CheckArguments(self):
7463     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7464                              self.op.output_fields, self.op.use_locking)
7465
7466   def ExpandNames(self):
7467     self.iq.ExpandNames(self)
7468
7469   def DeclareLocks(self, level):
7470     self.iq.DeclareLocks(self, level)
7471
7472   def Exec(self, feedback_fn):
7473     return self.iq.OldStyleQuery(self)
7474
7475
7476 class LUInstanceFailover(LogicalUnit):
7477   """Failover an instance.
7478
7479   """
7480   HPATH = "instance-failover"
7481   HTYPE = constants.HTYPE_INSTANCE
7482   REQ_BGL = False
7483
7484   def CheckArguments(self):
7485     """Check the arguments.
7486
7487     """
7488     self.iallocator = getattr(self.op, "iallocator", None)
7489     self.target_node = getattr(self.op, "target_node", None)
7490
7491   def ExpandNames(self):
7492     self._ExpandAndLockInstance()
7493
7494     if self.op.target_node is not None:
7495       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7496
7497     self.needed_locks[locking.LEVEL_NODE] = []
7498     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7499
7500     self.needed_locks[locking.LEVEL_NODE_RES] = []
7501     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7502
7503     ignore_consistency = self.op.ignore_consistency
7504     shutdown_timeout = self.op.shutdown_timeout
7505     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7506                                        cleanup=False,
7507                                        failover=True,
7508                                        ignore_consistency=ignore_consistency,
7509                                        shutdown_timeout=shutdown_timeout,
7510                                        ignore_ipolicy=self.op.ignore_ipolicy)
7511     self.tasklets = [self._migrater]
7512
7513   def DeclareLocks(self, level):
7514     if level == locking.LEVEL_NODE:
7515       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7516       if instance.disk_template in constants.DTS_EXT_MIRROR:
7517         if self.op.target_node is None:
7518           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7519         else:
7520           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7521                                                    self.op.target_node]
7522         del self.recalculate_locks[locking.LEVEL_NODE]
7523       else:
7524         self._LockInstancesNodes()
7525     elif level == locking.LEVEL_NODE_RES:
7526       # Copy node locks
7527       self.needed_locks[locking.LEVEL_NODE_RES] = \
7528         self.needed_locks[locking.LEVEL_NODE][:]
7529
7530   def BuildHooksEnv(self):
7531     """Build hooks env.
7532
7533     This runs on master, primary and secondary nodes of the instance.
7534
7535     """
7536     instance = self._migrater.instance
7537     source_node = instance.primary_node
7538     target_node = self.op.target_node
7539     env = {
7540       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7541       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7542       "OLD_PRIMARY": source_node,
7543       "NEW_PRIMARY": target_node,
7544       }
7545
7546     if instance.disk_template in constants.DTS_INT_MIRROR:
7547       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7548       env["NEW_SECONDARY"] = source_node
7549     else:
7550       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7551
7552     env.update(_BuildInstanceHookEnvByObject(self, instance))
7553
7554     return env
7555
7556   def BuildHooksNodes(self):
7557     """Build hooks nodes.
7558
7559     """
7560     instance = self._migrater.instance
7561     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7562     return (nl, nl + [instance.primary_node])
7563
7564
7565 class LUInstanceMigrate(LogicalUnit):
7566   """Migrate an instance.
7567
7568   This is migration without shutting down, compared to the failover,
7569   which is done with shutdown.
7570
7571   """
7572   HPATH = "instance-migrate"
7573   HTYPE = constants.HTYPE_INSTANCE
7574   REQ_BGL = False
7575
7576   def ExpandNames(self):
7577     self._ExpandAndLockInstance()
7578
7579     if self.op.target_node is not None:
7580       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7581
7582     self.needed_locks[locking.LEVEL_NODE] = []
7583     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7584
7585     self.needed_locks[locking.LEVEL_NODE] = []
7586     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7587
7588     self._migrater = \
7589       TLMigrateInstance(self, self.op.instance_name,
7590                         cleanup=self.op.cleanup,
7591                         failover=False,
7592                         fallback=self.op.allow_failover,
7593                         allow_runtime_changes=self.op.allow_runtime_changes,
7594                         ignore_ipolicy=self.op.ignore_ipolicy)
7595     self.tasklets = [self._migrater]
7596
7597   def DeclareLocks(self, level):
7598     if level == locking.LEVEL_NODE:
7599       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7600       if instance.disk_template in constants.DTS_EXT_MIRROR:
7601         if self.op.target_node is None:
7602           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7603         else:
7604           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7605                                                    self.op.target_node]
7606         del self.recalculate_locks[locking.LEVEL_NODE]
7607       else:
7608         self._LockInstancesNodes()
7609     elif level == locking.LEVEL_NODE_RES:
7610       # Copy node locks
7611       self.needed_locks[locking.LEVEL_NODE_RES] = \
7612         self.needed_locks[locking.LEVEL_NODE][:]
7613
7614   def BuildHooksEnv(self):
7615     """Build hooks env.
7616
7617     This runs on master, primary and secondary nodes of the instance.
7618
7619     """
7620     instance = self._migrater.instance
7621     source_node = instance.primary_node
7622     target_node = self.op.target_node
7623     env = _BuildInstanceHookEnvByObject(self, instance)
7624     env.update({
7625       "MIGRATE_LIVE": self._migrater.live,
7626       "MIGRATE_CLEANUP": self.op.cleanup,
7627       "OLD_PRIMARY": source_node,
7628       "NEW_PRIMARY": target_node,
7629       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7630       })
7631
7632     if instance.disk_template in constants.DTS_INT_MIRROR:
7633       env["OLD_SECONDARY"] = target_node
7634       env["NEW_SECONDARY"] = source_node
7635     else:
7636       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7637
7638     return env
7639
7640   def BuildHooksNodes(self):
7641     """Build hooks nodes.
7642
7643     """
7644     instance = self._migrater.instance
7645     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7646     return (nl, nl + [instance.primary_node])
7647
7648
7649 class LUInstanceMove(LogicalUnit):
7650   """Move an instance by data-copying.
7651
7652   """
7653   HPATH = "instance-move"
7654   HTYPE = constants.HTYPE_INSTANCE
7655   REQ_BGL = False
7656
7657   def ExpandNames(self):
7658     self._ExpandAndLockInstance()
7659     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7660     self.op.target_node = target_node
7661     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7662     self.needed_locks[locking.LEVEL_NODE_RES] = []
7663     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7664
7665   def DeclareLocks(self, level):
7666     if level == locking.LEVEL_NODE:
7667       self._LockInstancesNodes(primary_only=True)
7668     elif level == locking.LEVEL_NODE_RES:
7669       # Copy node locks
7670       self.needed_locks[locking.LEVEL_NODE_RES] = \
7671         self.needed_locks[locking.LEVEL_NODE][:]
7672
7673   def BuildHooksEnv(self):
7674     """Build hooks env.
7675
7676     This runs on master, primary and secondary nodes of the instance.
7677
7678     """
7679     env = {
7680       "TARGET_NODE": self.op.target_node,
7681       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7682       }
7683     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7684     return env
7685
7686   def BuildHooksNodes(self):
7687     """Build hooks nodes.
7688
7689     """
7690     nl = [
7691       self.cfg.GetMasterNode(),
7692       self.instance.primary_node,
7693       self.op.target_node,
7694       ]
7695     return (nl, nl)
7696
7697   def CheckPrereq(self):
7698     """Check prerequisites.
7699
7700     This checks that the instance is in the cluster.
7701
7702     """
7703     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7704     assert self.instance is not None, \
7705       "Cannot retrieve locked instance %s" % self.op.instance_name
7706
7707     node = self.cfg.GetNodeInfo(self.op.target_node)
7708     assert node is not None, \
7709       "Cannot retrieve locked node %s" % self.op.target_node
7710
7711     self.target_node = target_node = node.name
7712
7713     if target_node == instance.primary_node:
7714       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7715                                  (instance.name, target_node),
7716                                  errors.ECODE_STATE)
7717
7718     bep = self.cfg.GetClusterInfo().FillBE(instance)
7719
7720     for idx, dsk in enumerate(instance.disks):
7721       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7722         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7723                                    " cannot copy" % idx, errors.ECODE_STATE)
7724
7725     _CheckNodeOnline(self, target_node)
7726     _CheckNodeNotDrained(self, target_node)
7727     _CheckNodeVmCapable(self, target_node)
7728     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7729                                      self.cfg.GetNodeGroup(node.group))
7730     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7731                             ignore=self.op.ignore_ipolicy)
7732
7733     if instance.admin_state == constants.ADMINST_UP:
7734       # check memory requirements on the secondary node
7735       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7736                            instance.name, bep[constants.BE_MAXMEM],
7737                            instance.hypervisor)
7738     else:
7739       self.LogInfo("Not checking memory on the secondary node as"
7740                    " instance will not be started")
7741
7742     # check bridge existance
7743     _CheckInstanceBridgesExist(self, instance, node=target_node)
7744
7745   def Exec(self, feedback_fn):
7746     """Move an instance.
7747
7748     The move is done by shutting it down on its present node, copying
7749     the data over (slow) and starting it on the new node.
7750
7751     """
7752     instance = self.instance
7753
7754     source_node = instance.primary_node
7755     target_node = self.target_node
7756
7757     self.LogInfo("Shutting down instance %s on source node %s",
7758                  instance.name, source_node)
7759
7760     assert (self.owned_locks(locking.LEVEL_NODE) ==
7761             self.owned_locks(locking.LEVEL_NODE_RES))
7762
7763     result = self.rpc.call_instance_shutdown(source_node, instance,
7764                                              self.op.shutdown_timeout)
7765     msg = result.fail_msg
7766     if msg:
7767       if self.op.ignore_consistency:
7768         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7769                              " Proceeding anyway. Please make sure node"
7770                              " %s is down. Error details: %s",
7771                              instance.name, source_node, source_node, msg)
7772       else:
7773         raise errors.OpExecError("Could not shutdown instance %s on"
7774                                  " node %s: %s" %
7775                                  (instance.name, source_node, msg))
7776
7777     # create the target disks
7778     try:
7779       _CreateDisks(self, instance, target_node=target_node)
7780     except errors.OpExecError:
7781       self.LogWarning("Device creation failed, reverting...")
7782       try:
7783         _RemoveDisks(self, instance, target_node=target_node)
7784       finally:
7785         self.cfg.ReleaseDRBDMinors(instance.name)
7786         raise
7787
7788     cluster_name = self.cfg.GetClusterInfo().cluster_name
7789
7790     errs = []
7791     # activate, get path, copy the data over
7792     for idx, disk in enumerate(instance.disks):
7793       self.LogInfo("Copying data for disk %d", idx)
7794       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7795                                                instance.name, True, idx)
7796       if result.fail_msg:
7797         self.LogWarning("Can't assemble newly created disk %d: %s",
7798                         idx, result.fail_msg)
7799         errs.append(result.fail_msg)
7800         break
7801       dev_path = result.payload
7802       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7803                                              target_node, dev_path,
7804                                              cluster_name)
7805       if result.fail_msg:
7806         self.LogWarning("Can't copy data over for disk %d: %s",
7807                         idx, result.fail_msg)
7808         errs.append(result.fail_msg)
7809         break
7810
7811     if errs:
7812       self.LogWarning("Some disks failed to copy, aborting")
7813       try:
7814         _RemoveDisks(self, instance, target_node=target_node)
7815       finally:
7816         self.cfg.ReleaseDRBDMinors(instance.name)
7817         raise errors.OpExecError("Errors during disk copy: %s" %
7818                                  (",".join(errs),))
7819
7820     instance.primary_node = target_node
7821     self.cfg.Update(instance, feedback_fn)
7822
7823     self.LogInfo("Removing the disks on the original node")
7824     _RemoveDisks(self, instance, target_node=source_node)
7825
7826     # Only start the instance if it's marked as up
7827     if instance.admin_state == constants.ADMINST_UP:
7828       self.LogInfo("Starting instance %s on node %s",
7829                    instance.name, target_node)
7830
7831       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7832                                            ignore_secondaries=True)
7833       if not disks_ok:
7834         _ShutdownInstanceDisks(self, instance)
7835         raise errors.OpExecError("Can't activate the instance's disks")
7836
7837       result = self.rpc.call_instance_start(target_node,
7838                                             (instance, None, None), False)
7839       msg = result.fail_msg
7840       if msg:
7841         _ShutdownInstanceDisks(self, instance)
7842         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7843                                  (instance.name, target_node, msg))
7844
7845
7846 class LUNodeMigrate(LogicalUnit):
7847   """Migrate all instances from a node.
7848
7849   """
7850   HPATH = "node-migrate"
7851   HTYPE = constants.HTYPE_NODE
7852   REQ_BGL = False
7853
7854   def CheckArguments(self):
7855     pass
7856
7857   def ExpandNames(self):
7858     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7859
7860     self.share_locks = _ShareAll()
7861     self.needed_locks = {
7862       locking.LEVEL_NODE: [self.op.node_name],
7863       }
7864
7865   def BuildHooksEnv(self):
7866     """Build hooks env.
7867
7868     This runs on the master, the primary and all the secondaries.
7869
7870     """
7871     return {
7872       "NODE_NAME": self.op.node_name,
7873       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7874       }
7875
7876   def BuildHooksNodes(self):
7877     """Build hooks nodes.
7878
7879     """
7880     nl = [self.cfg.GetMasterNode()]
7881     return (nl, nl)
7882
7883   def CheckPrereq(self):
7884     pass
7885
7886   def Exec(self, feedback_fn):
7887     # Prepare jobs for migration instances
7888     allow_runtime_changes = self.op.allow_runtime_changes
7889     jobs = [
7890       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7891                                  mode=self.op.mode,
7892                                  live=self.op.live,
7893                                  iallocator=self.op.iallocator,
7894                                  target_node=self.op.target_node,
7895                                  allow_runtime_changes=allow_runtime_changes,
7896                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7897       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7898       ]
7899
7900     # TODO: Run iallocator in this opcode and pass correct placement options to
7901     # OpInstanceMigrate. Since other jobs can modify the cluster between
7902     # running the iallocator and the actual migration, a good consistency model
7903     # will have to be found.
7904
7905     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7906             frozenset([self.op.node_name]))
7907
7908     return ResultWithJobs(jobs)
7909
7910
7911 class TLMigrateInstance(Tasklet):
7912   """Tasklet class for instance migration.
7913
7914   @type live: boolean
7915   @ivar live: whether the migration will be done live or non-live;
7916       this variable is initalized only after CheckPrereq has run
7917   @type cleanup: boolean
7918   @ivar cleanup: Wheater we cleanup from a failed migration
7919   @type iallocator: string
7920   @ivar iallocator: The iallocator used to determine target_node
7921   @type target_node: string
7922   @ivar target_node: If given, the target_node to reallocate the instance to
7923   @type failover: boolean
7924   @ivar failover: Whether operation results in failover or migration
7925   @type fallback: boolean
7926   @ivar fallback: Whether fallback to failover is allowed if migration not
7927                   possible
7928   @type ignore_consistency: boolean
7929   @ivar ignore_consistency: Wheter we should ignore consistency between source
7930                             and target node
7931   @type shutdown_timeout: int
7932   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7933   @type ignore_ipolicy: bool
7934   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7935
7936   """
7937
7938   # Constants
7939   _MIGRATION_POLL_INTERVAL = 1      # seconds
7940   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7941
7942   def __init__(self, lu, instance_name, cleanup=False,
7943                failover=False, fallback=False,
7944                ignore_consistency=False,
7945                allow_runtime_changes=True,
7946                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7947                ignore_ipolicy=False):
7948     """Initializes this class.
7949
7950     """
7951     Tasklet.__init__(self, lu)
7952
7953     # Parameters
7954     self.instance_name = instance_name
7955     self.cleanup = cleanup
7956     self.live = False # will be overridden later
7957     self.failover = failover
7958     self.fallback = fallback
7959     self.ignore_consistency = ignore_consistency
7960     self.shutdown_timeout = shutdown_timeout
7961     self.ignore_ipolicy = ignore_ipolicy
7962     self.allow_runtime_changes = allow_runtime_changes
7963
7964   def CheckPrereq(self):
7965     """Check prerequisites.
7966
7967     This checks that the instance is in the cluster.
7968
7969     """
7970     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7971     instance = self.cfg.GetInstanceInfo(instance_name)
7972     assert instance is not None
7973     self.instance = instance
7974     cluster = self.cfg.GetClusterInfo()
7975
7976     if (not self.cleanup and
7977         not instance.admin_state == constants.ADMINST_UP and
7978         not self.failover and self.fallback):
7979       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7980                       " switching to failover")
7981       self.failover = True
7982
7983     if instance.disk_template not in constants.DTS_MIRRORED:
7984       if self.failover:
7985         text = "failovers"
7986       else:
7987         text = "migrations"
7988       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7989                                  " %s" % (instance.disk_template, text),
7990                                  errors.ECODE_STATE)
7991
7992     if instance.disk_template in constants.DTS_EXT_MIRROR:
7993       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7994
7995       if self.lu.op.iallocator:
7996         self._RunAllocator()
7997       else:
7998         # We set set self.target_node as it is required by
7999         # BuildHooksEnv
8000         self.target_node = self.lu.op.target_node
8001
8002       # Check that the target node is correct in terms of instance policy
8003       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8004       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8005       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8006       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8007                               ignore=self.ignore_ipolicy)
8008
8009       # self.target_node is already populated, either directly or by the
8010       # iallocator run
8011       target_node = self.target_node
8012       if self.target_node == instance.primary_node:
8013         raise errors.OpPrereqError("Cannot migrate instance %s"
8014                                    " to its primary (%s)" %
8015                                    (instance.name, instance.primary_node))
8016
8017       if len(self.lu.tasklets) == 1:
8018         # It is safe to release locks only when we're the only tasklet
8019         # in the LU
8020         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8021                       keep=[instance.primary_node, self.target_node])
8022
8023     else:
8024       secondary_nodes = instance.secondary_nodes
8025       if not secondary_nodes:
8026         raise errors.ConfigurationError("No secondary node but using"
8027                                         " %s disk template" %
8028                                         instance.disk_template)
8029       target_node = secondary_nodes[0]
8030       if self.lu.op.iallocator or (self.lu.op.target_node and
8031                                    self.lu.op.target_node != target_node):
8032         if self.failover:
8033           text = "failed over"
8034         else:
8035           text = "migrated"
8036         raise errors.OpPrereqError("Instances with disk template %s cannot"
8037                                    " be %s to arbitrary nodes"
8038                                    " (neither an iallocator nor a target"
8039                                    " node can be passed)" %
8040                                    (instance.disk_template, text),
8041                                    errors.ECODE_INVAL)
8042       nodeinfo = self.cfg.GetNodeInfo(target_node)
8043       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8044       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8045       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8046                               ignore=self.ignore_ipolicy)
8047
8048     i_be = cluster.FillBE(instance)
8049
8050     # check memory requirements on the secondary node
8051     if (not self.cleanup and
8052          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8053       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8054                                                "migrating instance %s" %
8055                                                instance.name,
8056                                                i_be[constants.BE_MINMEM],
8057                                                instance.hypervisor)
8058     else:
8059       self.lu.LogInfo("Not checking memory on the secondary node as"
8060                       " instance will not be started")
8061
8062     # check if failover must be forced instead of migration
8063     if (not self.cleanup and not self.failover and
8064         i_be[constants.BE_ALWAYS_FAILOVER]):
8065       if self.fallback:
8066         self.lu.LogInfo("Instance configured to always failover; fallback"
8067                         " to failover")
8068         self.failover = True
8069       else:
8070         raise errors.OpPrereqError("This instance has been configured to"
8071                                    " always failover, please allow failover",
8072                                    errors.ECODE_STATE)
8073
8074     # check bridge existance
8075     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8076
8077     if not self.cleanup:
8078       _CheckNodeNotDrained(self.lu, target_node)
8079       if not self.failover:
8080         result = self.rpc.call_instance_migratable(instance.primary_node,
8081                                                    instance)
8082         if result.fail_msg and self.fallback:
8083           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8084                           " failover")
8085           self.failover = True
8086         else:
8087           result.Raise("Can't migrate, please use failover",
8088                        prereq=True, ecode=errors.ECODE_STATE)
8089
8090     assert not (self.failover and self.cleanup)
8091
8092     if not self.failover:
8093       if self.lu.op.live is not None and self.lu.op.mode is not None:
8094         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8095                                    " parameters are accepted",
8096                                    errors.ECODE_INVAL)
8097       if self.lu.op.live is not None:
8098         if self.lu.op.live:
8099           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8100         else:
8101           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8102         # reset the 'live' parameter to None so that repeated
8103         # invocations of CheckPrereq do not raise an exception
8104         self.lu.op.live = None
8105       elif self.lu.op.mode is None:
8106         # read the default value from the hypervisor
8107         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8108         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8109
8110       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8111     else:
8112       # Failover is never live
8113       self.live = False
8114
8115     if not (self.failover or self.cleanup):
8116       remote_info = self.rpc.call_instance_info(instance.primary_node,
8117                                                 instance.name,
8118                                                 instance.hypervisor)
8119       remote_info.Raise("Error checking instance on node %s" %
8120                         instance.primary_node)
8121       instance_running = bool(remote_info.payload)
8122       if instance_running:
8123         self.current_mem = int(remote_info.payload["memory"])
8124
8125   def _RunAllocator(self):
8126     """Run the allocator based on input opcode.
8127
8128     """
8129     # FIXME: add a self.ignore_ipolicy option
8130     ial = IAllocator(self.cfg, self.rpc,
8131                      mode=constants.IALLOCATOR_MODE_RELOC,
8132                      name=self.instance_name,
8133                      relocate_from=[self.instance.primary_node],
8134                      )
8135
8136     ial.Run(self.lu.op.iallocator)
8137
8138     if not ial.success:
8139       raise errors.OpPrereqError("Can't compute nodes using"
8140                                  " iallocator '%s': %s" %
8141                                  (self.lu.op.iallocator, ial.info),
8142                                  errors.ECODE_NORES)
8143     if len(ial.result) != ial.required_nodes:
8144       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8145                                  " of nodes (%s), required %s" %
8146                                  (self.lu.op.iallocator, len(ial.result),
8147                                   ial.required_nodes), errors.ECODE_FAULT)
8148     self.target_node = ial.result[0]
8149     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8150                  self.instance_name, self.lu.op.iallocator,
8151                  utils.CommaJoin(ial.result))
8152
8153   def _WaitUntilSync(self):
8154     """Poll with custom rpc for disk sync.
8155
8156     This uses our own step-based rpc call.
8157
8158     """
8159     self.feedback_fn("* wait until resync is done")
8160     all_done = False
8161     while not all_done:
8162       all_done = True
8163       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8164                                             self.nodes_ip,
8165                                             (self.instance.disks,
8166                                              self.instance))
8167       min_percent = 100
8168       for node, nres in result.items():
8169         nres.Raise("Cannot resync disks on node %s" % node)
8170         node_done, node_percent = nres.payload
8171         all_done = all_done and node_done
8172         if node_percent is not None:
8173           min_percent = min(min_percent, node_percent)
8174       if not all_done:
8175         if min_percent < 100:
8176           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8177         time.sleep(2)
8178
8179   def _EnsureSecondary(self, node):
8180     """Demote a node to secondary.
8181
8182     """
8183     self.feedback_fn("* switching node %s to secondary mode" % node)
8184
8185     for dev in self.instance.disks:
8186       self.cfg.SetDiskID(dev, node)
8187
8188     result = self.rpc.call_blockdev_close(node, self.instance.name,
8189                                           self.instance.disks)
8190     result.Raise("Cannot change disk to secondary on node %s" % node)
8191
8192   def _GoStandalone(self):
8193     """Disconnect from the network.
8194
8195     """
8196     self.feedback_fn("* changing into standalone mode")
8197     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8198                                                self.instance.disks)
8199     for node, nres in result.items():
8200       nres.Raise("Cannot disconnect disks node %s" % node)
8201
8202   def _GoReconnect(self, multimaster):
8203     """Reconnect to the network.
8204
8205     """
8206     if multimaster:
8207       msg = "dual-master"
8208     else:
8209       msg = "single-master"
8210     self.feedback_fn("* changing disks into %s mode" % msg)
8211     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8212                                            (self.instance.disks, self.instance),
8213                                            self.instance.name, multimaster)
8214     for node, nres in result.items():
8215       nres.Raise("Cannot change disks config on node %s" % node)
8216
8217   def _ExecCleanup(self):
8218     """Try to cleanup after a failed migration.
8219
8220     The cleanup is done by:
8221       - check that the instance is running only on one node
8222         (and update the config if needed)
8223       - change disks on its secondary node to secondary
8224       - wait until disks are fully synchronized
8225       - disconnect from the network
8226       - change disks into single-master mode
8227       - wait again until disks are fully synchronized
8228
8229     """
8230     instance = self.instance
8231     target_node = self.target_node
8232     source_node = self.source_node
8233
8234     # check running on only one node
8235     self.feedback_fn("* checking where the instance actually runs"
8236                      " (if this hangs, the hypervisor might be in"
8237                      " a bad state)")
8238     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8239     for node, result in ins_l.items():
8240       result.Raise("Can't contact node %s" % node)
8241
8242     runningon_source = instance.name in ins_l[source_node].payload
8243     runningon_target = instance.name in ins_l[target_node].payload
8244
8245     if runningon_source and runningon_target:
8246       raise errors.OpExecError("Instance seems to be running on two nodes,"
8247                                " or the hypervisor is confused; you will have"
8248                                " to ensure manually that it runs only on one"
8249                                " and restart this operation")
8250
8251     if not (runningon_source or runningon_target):
8252       raise errors.OpExecError("Instance does not seem to be running at all;"
8253                                " in this case it's safer to repair by"
8254                                " running 'gnt-instance stop' to ensure disk"
8255                                " shutdown, and then restarting it")
8256
8257     if runningon_target:
8258       # the migration has actually succeeded, we need to update the config
8259       self.feedback_fn("* instance running on secondary node (%s),"
8260                        " updating config" % target_node)
8261       instance.primary_node = target_node
8262       self.cfg.Update(instance, self.feedback_fn)
8263       demoted_node = source_node
8264     else:
8265       self.feedback_fn("* instance confirmed to be running on its"
8266                        " primary node (%s)" % source_node)
8267       demoted_node = target_node
8268
8269     if instance.disk_template in constants.DTS_INT_MIRROR:
8270       self._EnsureSecondary(demoted_node)
8271       try:
8272         self._WaitUntilSync()
8273       except errors.OpExecError:
8274         # we ignore here errors, since if the device is standalone, it
8275         # won't be able to sync
8276         pass
8277       self._GoStandalone()
8278       self._GoReconnect(False)
8279       self._WaitUntilSync()
8280
8281     self.feedback_fn("* done")
8282
8283   def _RevertDiskStatus(self):
8284     """Try to revert the disk status after a failed migration.
8285
8286     """
8287     target_node = self.target_node
8288     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8289       return
8290
8291     try:
8292       self._EnsureSecondary(target_node)
8293       self._GoStandalone()
8294       self._GoReconnect(False)
8295       self._WaitUntilSync()
8296     except errors.OpExecError, err:
8297       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8298                          " please try to recover the instance manually;"
8299                          " error '%s'" % str(err))
8300
8301   def _AbortMigration(self):
8302     """Call the hypervisor code to abort a started migration.
8303
8304     """
8305     instance = self.instance
8306     target_node = self.target_node
8307     source_node = self.source_node
8308     migration_info = self.migration_info
8309
8310     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8311                                                                  instance,
8312                                                                  migration_info,
8313                                                                  False)
8314     abort_msg = abort_result.fail_msg
8315     if abort_msg:
8316       logging.error("Aborting migration failed on target node %s: %s",
8317                     target_node, abort_msg)
8318       # Don't raise an exception here, as we stil have to try to revert the
8319       # disk status, even if this step failed.
8320
8321     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8322         instance, False, self.live)
8323     abort_msg = abort_result.fail_msg
8324     if abort_msg:
8325       logging.error("Aborting migration failed on source node %s: %s",
8326                     source_node, abort_msg)
8327
8328   def _ExecMigration(self):
8329     """Migrate an instance.
8330
8331     The migrate is done by:
8332       - change the disks into dual-master mode
8333       - wait until disks are fully synchronized again
8334       - migrate the instance
8335       - change disks on the new secondary node (the old primary) to secondary
8336       - wait until disks are fully synchronized
8337       - change disks into single-master mode
8338
8339     """
8340     instance = self.instance
8341     target_node = self.target_node
8342     source_node = self.source_node
8343
8344     # Check for hypervisor version mismatch and warn the user.
8345     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8346                                        None, [self.instance.hypervisor])
8347     for ninfo in nodeinfo.values():
8348       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8349                   ninfo.node)
8350     (_, _, (src_info, )) = nodeinfo[source_node].payload
8351     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8352
8353     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8354         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8355       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8356       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8357       if src_version != dst_version:
8358         self.feedback_fn("* warning: hypervisor version mismatch between"
8359                          " source (%s) and target (%s) node" %
8360                          (src_version, dst_version))
8361
8362     self.feedback_fn("* checking disk consistency between source and target")
8363     for (idx, dev) in enumerate(instance.disks):
8364       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8365         raise errors.OpExecError("Disk %s is degraded or not fully"
8366                                  " synchronized on target node,"
8367                                  " aborting migration" % idx)
8368
8369     if self.current_mem > self.tgt_free_mem:
8370       if not self.allow_runtime_changes:
8371         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8372                                  " free memory to fit instance %s on target"
8373                                  " node %s (have %dMB, need %dMB)" %
8374                                  (instance.name, target_node,
8375                                   self.tgt_free_mem, self.current_mem))
8376       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8377       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8378                                                      instance,
8379                                                      self.tgt_free_mem)
8380       rpcres.Raise("Cannot modify instance runtime memory")
8381
8382     # First get the migration information from the remote node
8383     result = self.rpc.call_migration_info(source_node, instance)
8384     msg = result.fail_msg
8385     if msg:
8386       log_err = ("Failed fetching source migration information from %s: %s" %
8387                  (source_node, msg))
8388       logging.error(log_err)
8389       raise errors.OpExecError(log_err)
8390
8391     self.migration_info = migration_info = result.payload
8392
8393     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8394       # Then switch the disks to master/master mode
8395       self._EnsureSecondary(target_node)
8396       self._GoStandalone()
8397       self._GoReconnect(True)
8398       self._WaitUntilSync()
8399
8400     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8401     result = self.rpc.call_accept_instance(target_node,
8402                                            instance,
8403                                            migration_info,
8404                                            self.nodes_ip[target_node])
8405
8406     msg = result.fail_msg
8407     if msg:
8408       logging.error("Instance pre-migration failed, trying to revert"
8409                     " disk status: %s", msg)
8410       self.feedback_fn("Pre-migration failed, aborting")
8411       self._AbortMigration()
8412       self._RevertDiskStatus()
8413       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8414                                (instance.name, msg))
8415
8416     self.feedback_fn("* migrating instance to %s" % target_node)
8417     result = self.rpc.call_instance_migrate(source_node, instance,
8418                                             self.nodes_ip[target_node],
8419                                             self.live)
8420     msg = result.fail_msg
8421     if msg:
8422       logging.error("Instance migration failed, trying to revert"
8423                     " disk status: %s", msg)
8424       self.feedback_fn("Migration failed, aborting")
8425       self._AbortMigration()
8426       self._RevertDiskStatus()
8427       raise errors.OpExecError("Could not migrate instance %s: %s" %
8428                                (instance.name, msg))
8429
8430     self.feedback_fn("* starting memory transfer")
8431     last_feedback = time.time()
8432     while True:
8433       result = self.rpc.call_instance_get_migration_status(source_node,
8434                                                            instance)
8435       msg = result.fail_msg
8436       ms = result.payload   # MigrationStatus instance
8437       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8438         logging.error("Instance migration failed, trying to revert"
8439                       " disk status: %s", msg)
8440         self.feedback_fn("Migration failed, aborting")
8441         self._AbortMigration()
8442         self._RevertDiskStatus()
8443         raise errors.OpExecError("Could not migrate instance %s: %s" %
8444                                  (instance.name, msg))
8445
8446       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8447         self.feedback_fn("* memory transfer complete")
8448         break
8449
8450       if (utils.TimeoutExpired(last_feedback,
8451                                self._MIGRATION_FEEDBACK_INTERVAL) and
8452           ms.transferred_ram is not None):
8453         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8454         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8455         last_feedback = time.time()
8456
8457       time.sleep(self._MIGRATION_POLL_INTERVAL)
8458
8459     result = self.rpc.call_instance_finalize_migration_src(source_node,
8460                                                            instance,
8461                                                            True,
8462                                                            self.live)
8463     msg = result.fail_msg
8464     if msg:
8465       logging.error("Instance migration succeeded, but finalization failed"
8466                     " on the source node: %s", msg)
8467       raise errors.OpExecError("Could not finalize instance migration: %s" %
8468                                msg)
8469
8470     instance.primary_node = target_node
8471
8472     # distribute new instance config to the other nodes
8473     self.cfg.Update(instance, self.feedback_fn)
8474
8475     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8476                                                            instance,
8477                                                            migration_info,
8478                                                            True)
8479     msg = result.fail_msg
8480     if msg:
8481       logging.error("Instance migration succeeded, but finalization failed"
8482                     " on the target node: %s", msg)
8483       raise errors.OpExecError("Could not finalize instance migration: %s" %
8484                                msg)
8485
8486     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8487       self._EnsureSecondary(source_node)
8488       self._WaitUntilSync()
8489       self._GoStandalone()
8490       self._GoReconnect(False)
8491       self._WaitUntilSync()
8492
8493     # If the instance's disk template is `rbd' and there was a successful
8494     # migration, unmap the device from the source node.
8495     if self.instance.disk_template == constants.DT_RBD:
8496       disks = _ExpandCheckDisks(instance, instance.disks)
8497       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8498       for disk in disks:
8499         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8500         msg = result.fail_msg
8501         if msg:
8502           logging.error("Migration was successful, but couldn't unmap the"
8503                         " block device %s on source node %s: %s",
8504                         disk.iv_name, source_node, msg)
8505           logging.error("You need to unmap the device %s manually on %s",
8506                         disk.iv_name, source_node)
8507
8508     self.feedback_fn("* done")
8509
8510   def _ExecFailover(self):
8511     """Failover an instance.
8512
8513     The failover is done by shutting it down on its present node and
8514     starting it on the secondary.
8515
8516     """
8517     instance = self.instance
8518     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8519
8520     source_node = instance.primary_node
8521     target_node = self.target_node
8522
8523     if instance.admin_state == constants.ADMINST_UP:
8524       self.feedback_fn("* checking disk consistency between source and target")
8525       for (idx, dev) in enumerate(instance.disks):
8526         # for drbd, these are drbd over lvm
8527         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8528                                      False):
8529           if primary_node.offline:
8530             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8531                              " target node %s" %
8532                              (primary_node.name, idx, target_node))
8533           elif not self.ignore_consistency:
8534             raise errors.OpExecError("Disk %s is degraded on target node,"
8535                                      " aborting failover" % idx)
8536     else:
8537       self.feedback_fn("* not checking disk consistency as instance is not"
8538                        " running")
8539
8540     self.feedback_fn("* shutting down instance on source node")
8541     logging.info("Shutting down instance %s on node %s",
8542                  instance.name, source_node)
8543
8544     result = self.rpc.call_instance_shutdown(source_node, instance,
8545                                              self.shutdown_timeout)
8546     msg = result.fail_msg
8547     if msg:
8548       if self.ignore_consistency or primary_node.offline:
8549         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8550                            " proceeding anyway; please make sure node"
8551                            " %s is down; error details: %s",
8552                            instance.name, source_node, source_node, msg)
8553       else:
8554         raise errors.OpExecError("Could not shutdown instance %s on"
8555                                  " node %s: %s" %
8556                                  (instance.name, source_node, msg))
8557
8558     self.feedback_fn("* deactivating the instance's disks on source node")
8559     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8560       raise errors.OpExecError("Can't shut down the instance's disks")
8561
8562     instance.primary_node = target_node
8563     # distribute new instance config to the other nodes
8564     self.cfg.Update(instance, self.feedback_fn)
8565
8566     # Only start the instance if it's marked as up
8567     if instance.admin_state == constants.ADMINST_UP:
8568       self.feedback_fn("* activating the instance's disks on target node %s" %
8569                        target_node)
8570       logging.info("Starting instance %s on node %s",
8571                    instance.name, target_node)
8572
8573       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8574                                            ignore_secondaries=True)
8575       if not disks_ok:
8576         _ShutdownInstanceDisks(self.lu, instance)
8577         raise errors.OpExecError("Can't activate the instance's disks")
8578
8579       self.feedback_fn("* starting the instance on the target node %s" %
8580                        target_node)
8581       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8582                                             False)
8583       msg = result.fail_msg
8584       if msg:
8585         _ShutdownInstanceDisks(self.lu, instance)
8586         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8587                                  (instance.name, target_node, msg))
8588
8589   def Exec(self, feedback_fn):
8590     """Perform the migration.
8591
8592     """
8593     self.feedback_fn = feedback_fn
8594     self.source_node = self.instance.primary_node
8595
8596     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8597     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8598       self.target_node = self.instance.secondary_nodes[0]
8599       # Otherwise self.target_node has been populated either
8600       # directly, or through an iallocator.
8601
8602     self.all_nodes = [self.source_node, self.target_node]
8603     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8604                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8605
8606     if self.failover:
8607       feedback_fn("Failover instance %s" % self.instance.name)
8608       self._ExecFailover()
8609     else:
8610       feedback_fn("Migrating instance %s" % self.instance.name)
8611
8612       if self.cleanup:
8613         return self._ExecCleanup()
8614       else:
8615         return self._ExecMigration()
8616
8617
8618 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8619                     force_open):
8620   """Wrapper around L{_CreateBlockDevInner}.
8621
8622   This method annotates the root device first.
8623
8624   """
8625   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8626   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8627                               force_open)
8628
8629
8630 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8631                          info, force_open):
8632   """Create a tree of block devices on a given node.
8633
8634   If this device type has to be created on secondaries, create it and
8635   all its children.
8636
8637   If not, just recurse to children keeping the same 'force' value.
8638
8639   @attention: The device has to be annotated already.
8640
8641   @param lu: the lu on whose behalf we execute
8642   @param node: the node on which to create the device
8643   @type instance: L{objects.Instance}
8644   @param instance: the instance which owns the device
8645   @type device: L{objects.Disk}
8646   @param device: the device to create
8647   @type force_create: boolean
8648   @param force_create: whether to force creation of this device; this
8649       will be change to True whenever we find a device which has
8650       CreateOnSecondary() attribute
8651   @param info: the extra 'metadata' we should attach to the device
8652       (this will be represented as a LVM tag)
8653   @type force_open: boolean
8654   @param force_open: this parameter will be passes to the
8655       L{backend.BlockdevCreate} function where it specifies
8656       whether we run on primary or not, and it affects both
8657       the child assembly and the device own Open() execution
8658
8659   """
8660   if device.CreateOnSecondary():
8661     force_create = True
8662
8663   if device.children:
8664     for child in device.children:
8665       _CreateBlockDevInner(lu, node, instance, child, force_create,
8666                            info, force_open)
8667
8668   if not force_create:
8669     return
8670
8671   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8672
8673
8674 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8675   """Create a single block device on a given node.
8676
8677   This will not recurse over children of the device, so they must be
8678   created in advance.
8679
8680   @param lu: the lu on whose behalf we execute
8681   @param node: the node on which to create the device
8682   @type instance: L{objects.Instance}
8683   @param instance: the instance which owns the device
8684   @type device: L{objects.Disk}
8685   @param device: the device to create
8686   @param info: the extra 'metadata' we should attach to the device
8687       (this will be represented as a LVM tag)
8688   @type force_open: boolean
8689   @param force_open: this parameter will be passes to the
8690       L{backend.BlockdevCreate} function where it specifies
8691       whether we run on primary or not, and it affects both
8692       the child assembly and the device own Open() execution
8693
8694   """
8695   lu.cfg.SetDiskID(device, node)
8696   result = lu.rpc.call_blockdev_create(node, device, device.size,
8697                                        instance.name, force_open, info)
8698   result.Raise("Can't create block device %s on"
8699                " node %s for instance %s" % (device, node, instance.name))
8700   if device.physical_id is None:
8701     device.physical_id = result.payload
8702
8703
8704 def _GenerateUniqueNames(lu, exts):
8705   """Generate a suitable LV name.
8706
8707   This will generate a logical volume name for the given instance.
8708
8709   """
8710   results = []
8711   for val in exts:
8712     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8713     results.append("%s%s" % (new_id, val))
8714   return results
8715
8716 def _GetPCIInfo(lu, dev_type):
8717
8718   if lu.op.hotplug:
8719     if hasattr(lu, 'hotplug_info'):
8720       info = lu.hotplug_info
8721     elif hasattr(lu, 'instance') and hasattr(lu.instance, 'hotplug_info'):
8722       return lu.cfg.GetPCIInfo(lu.instance.name, dev_type)
8723
8724     if info:
8725       idx = getattr(info, dev_type)
8726       setattr(info, dev_type, idx+1)
8727       pci = info.pci_pool.pop()
8728       return idx, pci
8729
8730   return None, None
8731
8732
8733 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8734                          iv_name, p_minor, s_minor):
8735   """Generate a drbd8 device complete with its children.
8736
8737   """
8738   assert len(vgnames) == len(names) == 2
8739   port = lu.cfg.AllocatePort()
8740   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8741
8742   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8743                           logical_id=(vgnames[0], names[0]),
8744                           params={})
8745   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8746                           logical_id=(vgnames[1], names[1]),
8747                           params={})
8748
8749   disk_idx, pci = _GetPCIInfo(lu, 'disks')
8750   drbd_dev = objects.Disk(idx=disk_idx, pci=pci,
8751                           dev_type=constants.LD_DRBD8, size=size,
8752                           logical_id=(primary, secondary, port,
8753                                       p_minor, s_minor,
8754                                       shared_secret),
8755                           children=[dev_data, dev_meta],
8756                           iv_name=iv_name, params={})
8757   return drbd_dev
8758
8759
8760 _DISK_TEMPLATE_NAME_PREFIX = {
8761   constants.DT_PLAIN: "",
8762   constants.DT_RBD: ".rbd",
8763   }
8764
8765
8766 _DISK_TEMPLATE_DEVICE_TYPE = {
8767   constants.DT_PLAIN: constants.LD_LV,
8768   constants.DT_FILE: constants.LD_FILE,
8769   constants.DT_SHARED_FILE: constants.LD_FILE,
8770   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8771   constants.DT_RBD: constants.LD_RBD,
8772   }
8773
8774
8775 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8776     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8777     feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8778     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8779   """Generate the entire disk layout for a given template type.
8780
8781   """
8782   #TODO: compute space requirements
8783
8784   vgname = lu.cfg.GetVGName()
8785   disk_count = len(disk_info)
8786   disks = []
8787
8788   if template_name == constants.DT_DISKLESS:
8789     pass
8790   elif template_name == constants.DT_DRBD8:
8791     if len(secondary_nodes) != 1:
8792       raise errors.ProgrammerError("Wrong template configuration")
8793     remote_node = secondary_nodes[0]
8794     minors = lu.cfg.AllocateDRBDMinor(
8795       [primary_node, remote_node] * len(disk_info), instance_name)
8796
8797     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8798                                                        full_disk_params)
8799     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8800
8801     names = []
8802     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8803                                                for i in range(disk_count)]):
8804       names.append(lv_prefix + "_data")
8805       names.append(lv_prefix + "_meta")
8806     for idx, disk in enumerate(disk_info):
8807       disk_index = idx + base_index
8808       data_vg = disk.get(constants.IDISK_VG, vgname)
8809       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8810       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8811                                       disk[constants.IDISK_SIZE],
8812                                       [data_vg, meta_vg],
8813                                       names[idx * 2:idx * 2 + 2],
8814                                       "disk/%d" % disk_index,
8815                                       minors[idx * 2], minors[idx * 2 + 1])
8816       disk_dev.mode = disk[constants.IDISK_MODE]
8817       disks.append(disk_dev)
8818   else:
8819     if secondary_nodes:
8820       raise errors.ProgrammerError("Wrong template configuration")
8821
8822     if template_name == constants.DT_FILE:
8823       _req_file_storage()
8824     elif template_name == constants.DT_SHARED_FILE:
8825       _req_shr_file_storage()
8826
8827     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8828     if name_prefix is None:
8829       names = None
8830     else:
8831       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8832                                         (name_prefix, base_index + i)
8833                                         for i in range(disk_count)])
8834
8835     if template_name == constants.DT_PLAIN:
8836       def logical_id_fn(idx, _, disk):
8837         vg = disk.get(constants.IDISK_VG, vgname)
8838         return (vg, names[idx])
8839     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8840       logical_id_fn = \
8841         lambda _, disk_index, disk: (file_driver,
8842                                      "%s/disk%d" % (file_storage_dir,
8843                                                     disk_index))
8844     elif template_name == constants.DT_BLOCK:
8845       logical_id_fn = \
8846         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8847                                        disk[constants.IDISK_ADOPT])
8848     elif template_name == constants.DT_RBD:
8849       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8850     else:
8851       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8852
8853     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8854
8855     for idx, disk in enumerate(disk_info):
8856       disk_index = idx + base_index
8857       size = disk[constants.IDISK_SIZE]
8858       feedback_fn("* disk %s, size %s" %
8859                   (disk_index, utils.FormatUnit(size, "h")))
8860
8861       disk_idx, pci = _GetPCIInfo(lu, 'disks')
8862
8863       disks.append(objects.Disk(dev_type=dev_type, size=size,
8864                                 logical_id=logical_id_fn(idx, disk_index, disk),
8865                                 iv_name="disk/%d" % disk_index,
8866                                 mode=disk[constants.IDISK_MODE],
8867                                 params={}, idx=disk_idx, pci=pci))
8868
8869   return disks
8870
8871
8872 def _GetInstanceInfoText(instance):
8873   """Compute that text that should be added to the disk's metadata.
8874
8875   """
8876   return "originstname+%s" % instance.name
8877
8878
8879 def _CalcEta(time_taken, written, total_size):
8880   """Calculates the ETA based on size written and total size.
8881
8882   @param time_taken: The time taken so far
8883   @param written: amount written so far
8884   @param total_size: The total size of data to be written
8885   @return: The remaining time in seconds
8886
8887   """
8888   avg_time = time_taken / float(written)
8889   return (total_size - written) * avg_time
8890
8891
8892 def _WipeDisks(lu, instance):
8893   """Wipes instance disks.
8894
8895   @type lu: L{LogicalUnit}
8896   @param lu: the logical unit on whose behalf we execute
8897   @type instance: L{objects.Instance}
8898   @param instance: the instance whose disks we should create
8899   @return: the success of the wipe
8900
8901   """
8902   node = instance.primary_node
8903
8904   for device in instance.disks:
8905     lu.cfg.SetDiskID(device, node)
8906
8907   logging.info("Pause sync of instance %s disks", instance.name)
8908   result = lu.rpc.call_blockdev_pause_resume_sync(node,
8909                                                   (instance.disks, instance),
8910                                                   True)
8911   result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8912
8913   for idx, success in enumerate(result.payload):
8914     if not success:
8915       logging.warn("pause-sync of instance %s for disks %d failed",
8916                    instance.name, idx)
8917
8918   try:
8919     for idx, device in enumerate(instance.disks):
8920       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8921       # MAX_WIPE_CHUNK at max
8922       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8923                             constants.MIN_WIPE_CHUNK_PERCENT)
8924       # we _must_ make this an int, otherwise rounding errors will
8925       # occur
8926       wipe_chunk_size = int(wipe_chunk_size)
8927
8928       lu.LogInfo("* Wiping disk %d", idx)
8929       logging.info("Wiping disk %d for instance %s, node %s using"
8930                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8931
8932       offset = 0
8933       size = device.size
8934       last_output = 0
8935       start_time = time.time()
8936
8937       while offset < size:
8938         wipe_size = min(wipe_chunk_size, size - offset)
8939         logging.debug("Wiping disk %d, offset %s, chunk %s",
8940                       idx, offset, wipe_size)
8941         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8942                                            wipe_size)
8943         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8944                      (idx, offset, wipe_size))
8945         now = time.time()
8946         offset += wipe_size
8947         if now - last_output >= 60:
8948           eta = _CalcEta(now - start_time, offset, size)
8949           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8950                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8951           last_output = now
8952   finally:
8953     logging.info("Resume sync of instance %s disks", instance.name)
8954
8955     result = lu.rpc.call_blockdev_pause_resume_sync(node,
8956                                                     (instance.disks, instance),
8957                                                     False)
8958
8959     if result.fail_msg:
8960       lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8961                     " please have a look at the status and troubleshoot"
8962                     " the issue: %s", node, result.fail_msg)
8963     else:
8964       for idx, success in enumerate(result.payload):
8965         if not success:
8966           lu.LogWarning("Resume sync of disk %d failed, please have a"
8967                         " look at the status and troubleshoot the issue", idx)
8968           logging.warn("resume-sync of instance %s for disks %d failed",
8969                        instance.name, idx)
8970
8971
8972 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8973   """Create all disks for an instance.
8974
8975   This abstracts away some work from AddInstance.
8976
8977   @type lu: L{LogicalUnit}
8978   @param lu: the logical unit on whose behalf we execute
8979   @type instance: L{objects.Instance}
8980   @param instance: the instance whose disks we should create
8981   @type to_skip: list
8982   @param to_skip: list of indices to skip
8983   @type target_node: string
8984   @param target_node: if passed, overrides the target node for creation
8985   @rtype: boolean
8986   @return: the success of the creation
8987
8988   """
8989   info = _GetInstanceInfoText(instance)
8990   if target_node is None:
8991     pnode = instance.primary_node
8992     all_nodes = instance.all_nodes
8993   else:
8994     pnode = target_node
8995     all_nodes = [pnode]
8996
8997   if instance.disk_template in constants.DTS_FILEBASED:
8998     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8999     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9000
9001     result.Raise("Failed to create directory '%s' on"
9002                  " node %s" % (file_storage_dir, pnode))
9003
9004   # Note: this needs to be kept in sync with adding of disks in
9005   # LUInstanceSetParams
9006   for idx, device in enumerate(instance.disks):
9007     if to_skip and idx in to_skip:
9008       continue
9009     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9010     #HARDCODE
9011     for node in all_nodes:
9012       f_create = node == pnode
9013       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9014
9015
9016 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9017   """Remove all disks for an instance.
9018
9019   This abstracts away some work from `AddInstance()` and
9020   `RemoveInstance()`. Note that in case some of the devices couldn't
9021   be removed, the removal will continue with the other ones (compare
9022   with `_CreateDisks()`).
9023
9024   @type lu: L{LogicalUnit}
9025   @param lu: the logical unit on whose behalf we execute
9026   @type instance: L{objects.Instance}
9027   @param instance: the instance whose disks we should remove
9028   @type target_node: string
9029   @param target_node: used to override the node on which to remove the disks
9030   @rtype: boolean
9031   @return: the success of the removal
9032
9033   """
9034   logging.info("Removing block devices for instance %s", instance.name)
9035
9036   all_result = True
9037   ports_to_release = set()
9038   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9039   for (idx, device) in enumerate(anno_disks):
9040     if target_node:
9041       edata = [(target_node, device)]
9042     else:
9043       edata = device.ComputeNodeTree(instance.primary_node)
9044     for node, disk in edata:
9045       lu.cfg.SetDiskID(disk, node)
9046       result = lu.rpc.call_blockdev_remove(node, disk)
9047       if result.fail_msg:
9048         lu.LogWarning("Could not remove disk %s on node %s,"
9049                       " continuing anyway: %s", idx, node, result.fail_msg)
9050         if not (result.offline and node != instance.primary_node):
9051           all_result = False
9052
9053     # if this is a DRBD disk, return its port to the pool
9054     if device.dev_type in constants.LDS_DRBD:
9055       ports_to_release.add(device.logical_id[2])
9056
9057   if all_result or ignore_failures:
9058     for port in ports_to_release:
9059       lu.cfg.AddTcpUdpPort(port)
9060
9061   if instance.disk_template == constants.DT_FILE:
9062     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9063     if target_node:
9064       tgt = target_node
9065     else:
9066       tgt = instance.primary_node
9067     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9068     if result.fail_msg:
9069       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9070                     file_storage_dir, instance.primary_node, result.fail_msg)
9071       all_result = False
9072
9073   return all_result
9074
9075
9076 def _ComputeDiskSizePerVG(disk_template, disks):
9077   """Compute disk size requirements in the volume group
9078
9079   """
9080   def _compute(disks, payload):
9081     """Universal algorithm.
9082
9083     """
9084     vgs = {}
9085     for disk in disks:
9086       vgs[disk[constants.IDISK_VG]] = \
9087         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9088
9089     return vgs
9090
9091   # Required free disk space as a function of disk and swap space
9092   req_size_dict = {
9093     constants.DT_DISKLESS: {},
9094     constants.DT_PLAIN: _compute(disks, 0),
9095     # 128 MB are added for drbd metadata for each disk
9096     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9097     constants.DT_FILE: {},
9098     constants.DT_SHARED_FILE: {},
9099   }
9100
9101   if disk_template not in req_size_dict:
9102     raise errors.ProgrammerError("Disk template '%s' size requirement"
9103                                  " is unknown" % disk_template)
9104
9105   return req_size_dict[disk_template]
9106
9107
9108 def _ComputeDiskSize(disk_template, disks):
9109   """Compute disk size requirements according to disk template
9110
9111   """
9112   # Required free disk space as a function of disk and swap space
9113   req_size_dict = {
9114     constants.DT_DISKLESS: None,
9115     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9116     # 128 MB are added for drbd metadata for each disk
9117     constants.DT_DRBD8:
9118       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9119     constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9120     constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9121     constants.DT_BLOCK: 0,
9122     constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9123   }
9124
9125   if disk_template not in req_size_dict:
9126     raise errors.ProgrammerError("Disk template '%s' size requirement"
9127                                  " is unknown" % disk_template)
9128
9129   return req_size_dict[disk_template]
9130
9131
9132 def _FilterVmNodes(lu, nodenames):
9133   """Filters out non-vm_capable nodes from a list.
9134
9135   @type lu: L{LogicalUnit}
9136   @param lu: the logical unit for which we check
9137   @type nodenames: list
9138   @param nodenames: the list of nodes on which we should check
9139   @rtype: list
9140   @return: the list of vm-capable nodes
9141
9142   """
9143   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9144   return [name for name in nodenames if name not in vm_nodes]
9145
9146
9147 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9148   """Hypervisor parameter validation.
9149
9150   This function abstract the hypervisor parameter validation to be
9151   used in both instance create and instance modify.
9152
9153   @type lu: L{LogicalUnit}
9154   @param lu: the logical unit for which we check
9155   @type nodenames: list
9156   @param nodenames: the list of nodes on which we should check
9157   @type hvname: string
9158   @param hvname: the name of the hypervisor we should use
9159   @type hvparams: dict
9160   @param hvparams: the parameters which we need to check
9161   @raise errors.OpPrereqError: if the parameters are not valid
9162
9163   """
9164   nodenames = _FilterVmNodes(lu, nodenames)
9165
9166   cluster = lu.cfg.GetClusterInfo()
9167   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9168
9169   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9170   for node in nodenames:
9171     info = hvinfo[node]
9172     if info.offline:
9173       continue
9174     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9175
9176
9177 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9178   """OS parameters validation.
9179
9180   @type lu: L{LogicalUnit}
9181   @param lu: the logical unit for which we check
9182   @type required: boolean
9183   @param required: whether the validation should fail if the OS is not
9184       found
9185   @type nodenames: list
9186   @param nodenames: the list of nodes on which we should check
9187   @type osname: string
9188   @param osname: the name of the hypervisor we should use
9189   @type osparams: dict
9190   @param osparams: the parameters which we need to check
9191   @raise errors.OpPrereqError: if the parameters are not valid
9192
9193   """
9194   nodenames = _FilterVmNodes(lu, nodenames)
9195   result = lu.rpc.call_os_validate(nodenames, required, osname,
9196                                    [constants.OS_VALIDATE_PARAMETERS],
9197                                    osparams)
9198   for node, nres in result.items():
9199     # we don't check for offline cases since this should be run only
9200     # against the master node and/or an instance's nodes
9201     nres.Raise("OS Parameters validation failed on node %s" % node)
9202     if not nres.payload:
9203       lu.LogInfo("OS %s not found on node %s, validation skipped",
9204                  osname, node)
9205
9206
9207 class LUInstanceCreate(LogicalUnit):
9208   """Create an instance.
9209
9210   """
9211   HPATH = "instance-add"
9212   HTYPE = constants.HTYPE_INSTANCE
9213   REQ_BGL = False
9214
9215   def CheckArguments(self):
9216     """Check arguments.
9217
9218     """
9219     # do not require name_check to ease forward/backward compatibility
9220     # for tools
9221     if self.op.no_install and self.op.start:
9222       self.LogInfo("No-installation mode selected, disabling startup")
9223       self.op.start = False
9224     # validate/normalize the instance name
9225     self.op.instance_name = \
9226       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9227
9228     if self.op.ip_check and not self.op.name_check:
9229       # TODO: make the ip check more flexible and not depend on the name check
9230       raise errors.OpPrereqError("Cannot do IP address check without a name"
9231                                  " check", errors.ECODE_INVAL)
9232
9233     # check nics' parameter names
9234     for nic in self.op.nics:
9235       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9236
9237     # check disks. parameter names and consistent adopt/no-adopt strategy
9238     has_adopt = has_no_adopt = False
9239     for disk in self.op.disks:
9240       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9241       if constants.IDISK_ADOPT in disk:
9242         has_adopt = True
9243       else:
9244         has_no_adopt = True
9245     if has_adopt and has_no_adopt:
9246       raise errors.OpPrereqError("Either all disks are adopted or none is",
9247                                  errors.ECODE_INVAL)
9248     if has_adopt:
9249       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9250         raise errors.OpPrereqError("Disk adoption is not supported for the"
9251                                    " '%s' disk template" %
9252                                    self.op.disk_template,
9253                                    errors.ECODE_INVAL)
9254       if self.op.iallocator is not None:
9255         raise errors.OpPrereqError("Disk adoption not allowed with an"
9256                                    " iallocator script", errors.ECODE_INVAL)
9257       if self.op.mode == constants.INSTANCE_IMPORT:
9258         raise errors.OpPrereqError("Disk adoption not allowed for"
9259                                    " instance import", errors.ECODE_INVAL)
9260     else:
9261       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9262         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9263                                    " but no 'adopt' parameter given" %
9264                                    self.op.disk_template,
9265                                    errors.ECODE_INVAL)
9266
9267     self.adopt_disks = has_adopt
9268
9269     # instance name verification
9270     if self.op.name_check:
9271       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9272       self.op.instance_name = self.hostname1.name
9273       # used in CheckPrereq for ip ping check
9274       self.check_ip = self.hostname1.ip
9275     else:
9276       self.check_ip = None
9277
9278     # file storage checks
9279     if (self.op.file_driver and
9280         not self.op.file_driver in constants.FILE_DRIVER):
9281       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9282                                  self.op.file_driver, errors.ECODE_INVAL)
9283
9284     if self.op.disk_template == constants.DT_FILE:
9285       opcodes.RequireFileStorage()
9286     elif self.op.disk_template == constants.DT_SHARED_FILE:
9287       opcodes.RequireSharedFileStorage()
9288
9289     ### Node/iallocator related checks
9290     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9291
9292     if self.op.pnode is not None:
9293       if self.op.disk_template in constants.DTS_INT_MIRROR:
9294         if self.op.snode is None:
9295           raise errors.OpPrereqError("The networked disk templates need"
9296                                      " a mirror node", errors.ECODE_INVAL)
9297       elif self.op.snode:
9298         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9299                         " template")
9300         self.op.snode = None
9301
9302     self._cds = _GetClusterDomainSecret()
9303
9304     if self.op.mode == constants.INSTANCE_IMPORT:
9305       # On import force_variant must be True, because if we forced it at
9306       # initial install, our only chance when importing it back is that it
9307       # works again!
9308       self.op.force_variant = True
9309
9310       if self.op.no_install:
9311         self.LogInfo("No-installation mode has no effect during import")
9312
9313     elif self.op.mode == constants.INSTANCE_CREATE:
9314       if self.op.os_type is None:
9315         raise errors.OpPrereqError("No guest OS specified",
9316                                    errors.ECODE_INVAL)
9317       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9318         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9319                                    " installation" % self.op.os_type,
9320                                    errors.ECODE_STATE)
9321       if self.op.disk_template is None:
9322         raise errors.OpPrereqError("No disk template specified",
9323                                    errors.ECODE_INVAL)
9324
9325     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9326       # Check handshake to ensure both clusters have the same domain secret
9327       src_handshake = self.op.source_handshake
9328       if not src_handshake:
9329         raise errors.OpPrereqError("Missing source handshake",
9330                                    errors.ECODE_INVAL)
9331
9332       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9333                                                            src_handshake)
9334       if errmsg:
9335         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9336                                    errors.ECODE_INVAL)
9337
9338       # Load and check source CA
9339       self.source_x509_ca_pem = self.op.source_x509_ca
9340       if not self.source_x509_ca_pem:
9341         raise errors.OpPrereqError("Missing source X509 CA",
9342                                    errors.ECODE_INVAL)
9343
9344       try:
9345         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9346                                                     self._cds)
9347       except OpenSSL.crypto.Error, err:
9348         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9349                                    (err, ), errors.ECODE_INVAL)
9350
9351       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9352       if errcode is not None:
9353         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9354                                    errors.ECODE_INVAL)
9355
9356       self.source_x509_ca = cert
9357
9358       src_instance_name = self.op.source_instance_name
9359       if not src_instance_name:
9360         raise errors.OpPrereqError("Missing source instance name",
9361                                    errors.ECODE_INVAL)
9362
9363       self.source_instance_name = \
9364           netutils.GetHostname(name=src_instance_name).name
9365
9366     else:
9367       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9368                                  self.op.mode, errors.ECODE_INVAL)
9369
9370   def ExpandNames(self):
9371     """ExpandNames for CreateInstance.
9372
9373     Figure out the right locks for instance creation.
9374
9375     """
9376     self.needed_locks = {}
9377
9378     instance_name = self.op.instance_name
9379     # this is just a preventive check, but someone might still add this
9380     # instance in the meantime, and creation will fail at lock-add time
9381     if instance_name in self.cfg.GetInstanceList():
9382       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9383                                  instance_name, errors.ECODE_EXISTS)
9384
9385     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9386
9387     if self.op.iallocator:
9388       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9389       # specifying a group on instance creation and then selecting nodes from
9390       # that group
9391       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9392       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9393     else:
9394       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9395       nodelist = [self.op.pnode]
9396       if self.op.snode is not None:
9397         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9398         nodelist.append(self.op.snode)
9399       self.needed_locks[locking.LEVEL_NODE] = nodelist
9400       # Lock resources of instance's primary and secondary nodes (copy to
9401       # prevent accidential modification)
9402       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9403
9404     # in case of import lock the source node too
9405     if self.op.mode == constants.INSTANCE_IMPORT:
9406       src_node = self.op.src_node
9407       src_path = self.op.src_path
9408
9409       if src_path is None:
9410         self.op.src_path = src_path = self.op.instance_name
9411
9412       if src_node is None:
9413         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9414         self.op.src_node = None
9415         if os.path.isabs(src_path):
9416           raise errors.OpPrereqError("Importing an instance from a path"
9417                                      " requires a source node option",
9418                                      errors.ECODE_INVAL)
9419       else:
9420         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9421         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9422           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9423         if not os.path.isabs(src_path):
9424           self.op.src_path = src_path = \
9425             utils.PathJoin(constants.EXPORT_DIR, src_path)
9426
9427   def _RunAllocator(self):
9428     """Run the allocator based on input opcode.
9429
9430     """
9431     nics = [n.ToDict() for n in self.nics]
9432     ial = IAllocator(self.cfg, self.rpc,
9433                      mode=constants.IALLOCATOR_MODE_ALLOC,
9434                      name=self.op.instance_name,
9435                      disk_template=self.op.disk_template,
9436                      tags=self.op.tags,
9437                      os=self.op.os_type,
9438                      vcpus=self.be_full[constants.BE_VCPUS],
9439                      memory=self.be_full[constants.BE_MAXMEM],
9440                      spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9441                      disks=self.disks,
9442                      nics=nics,
9443                      hypervisor=self.op.hypervisor,
9444                      )
9445
9446     ial.Run(self.op.iallocator)
9447
9448     if not ial.success:
9449       raise errors.OpPrereqError("Can't compute nodes using"
9450                                  " iallocator '%s': %s" %
9451                                  (self.op.iallocator, ial.info),
9452                                  errors.ECODE_NORES)
9453     if len(ial.result) != ial.required_nodes:
9454       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9455                                  " of nodes (%s), required %s" %
9456                                  (self.op.iallocator, len(ial.result),
9457                                   ial.required_nodes), errors.ECODE_FAULT)
9458     self.op.pnode = ial.result[0]
9459     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9460                  self.op.instance_name, self.op.iallocator,
9461                  utils.CommaJoin(ial.result))
9462     if ial.required_nodes == 2:
9463       self.op.snode = ial.result[1]
9464
9465   def BuildHooksEnv(self):
9466     """Build hooks env.
9467
9468     This runs on master, primary and secondary nodes of the instance.
9469
9470     """
9471     env = {
9472       "ADD_MODE": self.op.mode,
9473       }
9474     if self.op.mode == constants.INSTANCE_IMPORT:
9475       env["SRC_NODE"] = self.op.src_node
9476       env["SRC_PATH"] = self.op.src_path
9477       env["SRC_IMAGES"] = self.src_images
9478
9479     env.update(_BuildInstanceHookEnv(
9480       name=self.op.instance_name,
9481       primary_node=self.op.pnode,
9482       secondary_nodes=self.secondaries,
9483       status=self.op.start,
9484       os_type=self.op.os_type,
9485       minmem=self.be_full[constants.BE_MINMEM],
9486       maxmem=self.be_full[constants.BE_MAXMEM],
9487       vcpus=self.be_full[constants.BE_VCPUS],
9488       nics=_NICListToTuple(self, self.nics),
9489       disk_template=self.op.disk_template,
9490       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9491              for d in self.disks],
9492       bep=self.be_full,
9493       hvp=self.hv_full,
9494       hypervisor_name=self.op.hypervisor,
9495       tags=self.op.tags,
9496     ))
9497
9498     return env
9499
9500   def BuildHooksNodes(self):
9501     """Build hooks nodes.
9502
9503     """
9504     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9505     return nl, nl
9506
9507   def _ReadExportInfo(self):
9508     """Reads the export information from disk.
9509
9510     It will override the opcode source node and path with the actual
9511     information, if these two were not specified before.
9512
9513     @return: the export information
9514
9515     """
9516     assert self.op.mode == constants.INSTANCE_IMPORT
9517
9518     src_node = self.op.src_node
9519     src_path = self.op.src_path
9520
9521     if src_node is None:
9522       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9523       exp_list = self.rpc.call_export_list(locked_nodes)
9524       found = False
9525       for node in exp_list:
9526         if exp_list[node].fail_msg:
9527           continue
9528         if src_path in exp_list[node].payload:
9529           found = True
9530           self.op.src_node = src_node = node
9531           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9532                                                        src_path)
9533           break
9534       if not found:
9535         raise errors.OpPrereqError("No export found for relative path %s" %
9536                                     src_path, errors.ECODE_INVAL)
9537
9538     _CheckNodeOnline(self, src_node)
9539     result = self.rpc.call_export_info(src_node, src_path)
9540     result.Raise("No export or invalid export found in dir %s" % src_path)
9541
9542     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9543     if not export_info.has_section(constants.INISECT_EXP):
9544       raise errors.ProgrammerError("Corrupted export config",
9545                                    errors.ECODE_ENVIRON)
9546
9547     ei_version = export_info.get(constants.INISECT_EXP, "version")
9548     if (int(ei_version) != constants.EXPORT_VERSION):
9549       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9550                                  (ei_version, constants.EXPORT_VERSION),
9551                                  errors.ECODE_ENVIRON)
9552     return export_info
9553
9554   def _ReadExportParams(self, einfo):
9555     """Use export parameters as defaults.
9556
9557     In case the opcode doesn't specify (as in override) some instance
9558     parameters, then try to use them from the export information, if
9559     that declares them.
9560
9561     """
9562     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9563
9564     if self.op.disk_template is None:
9565       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9566         self.op.disk_template = einfo.get(constants.INISECT_INS,
9567                                           "disk_template")
9568         if self.op.disk_template not in constants.DISK_TEMPLATES:
9569           raise errors.OpPrereqError("Disk template specified in configuration"
9570                                      " file is not one of the allowed values:"
9571                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9572       else:
9573         raise errors.OpPrereqError("No disk template specified and the export"
9574                                    " is missing the disk_template information",
9575                                    errors.ECODE_INVAL)
9576
9577     if not self.op.disks:
9578       disks = []
9579       # TODO: import the disk iv_name too
9580       for idx in range(constants.MAX_DISKS):
9581         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9582           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9583           disks.append({constants.IDISK_SIZE: disk_sz})
9584       self.op.disks = disks
9585       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9586         raise errors.OpPrereqError("No disk info specified and the export"
9587                                    " is missing the disk information",
9588                                    errors.ECODE_INVAL)
9589
9590     if not self.op.nics:
9591       nics = []
9592       for idx in range(constants.MAX_NICS):
9593         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9594           ndict = {}
9595           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9596             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9597             ndict[name] = v
9598           nics.append(ndict)
9599         else:
9600           break
9601       self.op.nics = nics
9602
9603     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9604       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9605
9606     if (self.op.hypervisor is None and
9607         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9608       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9609
9610     if einfo.has_section(constants.INISECT_HYP):
9611       # use the export parameters but do not override the ones
9612       # specified by the user
9613       for name, value in einfo.items(constants.INISECT_HYP):
9614         if name not in self.op.hvparams:
9615           self.op.hvparams[name] = value
9616
9617     if einfo.has_section(constants.INISECT_BEP):
9618       # use the parameters, without overriding
9619       for name, value in einfo.items(constants.INISECT_BEP):
9620         if name not in self.op.beparams:
9621           self.op.beparams[name] = value
9622         # Compatibility for the old "memory" be param
9623         if name == constants.BE_MEMORY:
9624           if constants.BE_MAXMEM not in self.op.beparams:
9625             self.op.beparams[constants.BE_MAXMEM] = value
9626           if constants.BE_MINMEM not in self.op.beparams:
9627             self.op.beparams[constants.BE_MINMEM] = value
9628     else:
9629       # try to read the parameters old style, from the main section
9630       for name in constants.BES_PARAMETERS:
9631         if (name not in self.op.beparams and
9632             einfo.has_option(constants.INISECT_INS, name)):
9633           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9634
9635     if einfo.has_section(constants.INISECT_OSP):
9636       # use the parameters, without overriding
9637       for name, value in einfo.items(constants.INISECT_OSP):
9638         if name not in self.op.osparams:
9639           self.op.osparams[name] = value
9640
9641   def _RevertToDefaults(self, cluster):
9642     """Revert the instance parameters to the default values.
9643
9644     """
9645     # hvparams
9646     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9647     for name in self.op.hvparams.keys():
9648       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9649         del self.op.hvparams[name]
9650     # beparams
9651     be_defs = cluster.SimpleFillBE({})
9652     for name in self.op.beparams.keys():
9653       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9654         del self.op.beparams[name]
9655     # nic params
9656     nic_defs = cluster.SimpleFillNIC({})
9657     for nic in self.op.nics:
9658       for name in constants.NICS_PARAMETERS:
9659         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9660           del nic[name]
9661     # osparams
9662     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9663     for name in self.op.osparams.keys():
9664       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9665         del self.op.osparams[name]
9666
9667   def _CalculateFileStorageDir(self):
9668     """Calculate final instance file storage dir.
9669
9670     """
9671     # file storage dir calculation/check
9672     self.instance_file_storage_dir = None
9673     if self.op.disk_template in constants.DTS_FILEBASED:
9674       # build the full file storage dir path
9675       joinargs = []
9676
9677       if self.op.disk_template == constants.DT_SHARED_FILE:
9678         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9679       else:
9680         get_fsd_fn = self.cfg.GetFileStorageDir
9681
9682       cfg_storagedir = get_fsd_fn()
9683       if not cfg_storagedir:
9684         raise errors.OpPrereqError("Cluster file storage dir not defined")
9685       joinargs.append(cfg_storagedir)
9686
9687       if self.op.file_storage_dir is not None:
9688         joinargs.append(self.op.file_storage_dir)
9689
9690       joinargs.append(self.op.instance_name)
9691
9692       # pylint: disable=W0142
9693       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9694
9695   def CheckPrereq(self): # pylint: disable=R0914
9696     """Check prerequisites.
9697
9698     """
9699     self._CalculateFileStorageDir()
9700
9701     if self.op.mode == constants.INSTANCE_IMPORT:
9702       export_info = self._ReadExportInfo()
9703       self._ReadExportParams(export_info)
9704       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9705     else:
9706       self._old_instance_name = None
9707
9708     if (not self.cfg.GetVGName() and
9709         self.op.disk_template not in constants.DTS_NOT_LVM):
9710       raise errors.OpPrereqError("Cluster does not support lvm-based"
9711                                  " instances", errors.ECODE_STATE)
9712
9713     if (self.op.hypervisor is None or
9714         self.op.hypervisor == constants.VALUE_AUTO):
9715       self.op.hypervisor = self.cfg.GetHypervisorType()
9716
9717     cluster = self.cfg.GetClusterInfo()
9718     enabled_hvs = cluster.enabled_hypervisors
9719     if self.op.hypervisor not in enabled_hvs:
9720       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9721                                  " cluster (%s)" % (self.op.hypervisor,
9722                                   ",".join(enabled_hvs)),
9723                                  errors.ECODE_STATE)
9724
9725     # Check tag validity
9726     for tag in self.op.tags:
9727       objects.TaggableObject.ValidateTag(tag)
9728
9729     # check hypervisor parameter syntax (locally)
9730     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9731     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9732                                       self.op.hvparams)
9733     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9734     hv_type.CheckParameterSyntax(filled_hvp)
9735     self.hv_full = filled_hvp
9736     # check that we don't specify global parameters on an instance
9737     _CheckGlobalHvParams(self.op.hvparams)
9738
9739     # fill and remember the beparams dict
9740     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9741     for param, value in self.op.beparams.iteritems():
9742       if value == constants.VALUE_AUTO:
9743         self.op.beparams[param] = default_beparams[param]
9744     objects.UpgradeBeParams(self.op.beparams)
9745     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9746     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9747
9748     # build os parameters
9749     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9750
9751     # now that hvp/bep are in final format, let's reset to defaults,
9752     # if told to do so
9753     if self.op.identify_defaults:
9754       self._RevertToDefaults(cluster)
9755
9756     self.hotplug_info = None
9757     if self.op.hotplug:
9758       self.hotplug_info = objects.HotplugInfo(disks=0, nics=0,
9759                                               pci_pool=list(range(16,32)))
9760     # NIC buildup
9761     self.nics = []
9762     for idx, nic in enumerate(self.op.nics):
9763       nic_mode_req = nic.get(constants.INIC_MODE, None)
9764       nic_mode = nic_mode_req
9765       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9766         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9767
9768       # in routed mode, for the first nic, the default ip is 'auto'
9769       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9770         default_ip_mode = constants.VALUE_AUTO
9771       else:
9772         default_ip_mode = constants.VALUE_NONE
9773
9774       # ip validity checks
9775       ip = nic.get(constants.INIC_IP, default_ip_mode)
9776       if ip is None or ip.lower() == constants.VALUE_NONE:
9777         nic_ip = None
9778       elif ip.lower() == constants.VALUE_AUTO:
9779         if not self.op.name_check:
9780           raise errors.OpPrereqError("IP address set to auto but name checks"
9781                                      " have been skipped",
9782                                      errors.ECODE_INVAL)
9783         nic_ip = self.hostname1.ip
9784       else:
9785         if not netutils.IPAddress.IsValid(ip):
9786           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9787                                      errors.ECODE_INVAL)
9788         nic_ip = ip
9789
9790       # TODO: check the ip address for uniqueness
9791       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9792         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9793                                    errors.ECODE_INVAL)
9794
9795       # MAC address verification
9796       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9797       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9798         mac = utils.NormalizeAndValidateMac(mac)
9799
9800         try:
9801           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9802         except errors.ReservationError:
9803           raise errors.OpPrereqError("MAC address %s already in use"
9804                                      " in cluster" % mac,
9805                                      errors.ECODE_NOTUNIQUE)
9806
9807       #  Build nic parameters
9808       link = nic.get(constants.INIC_LINK, None)
9809       if link == constants.VALUE_AUTO:
9810         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9811       nicparams = {}
9812       if nic_mode_req:
9813         nicparams[constants.NIC_MODE] = nic_mode
9814       if link:
9815         nicparams[constants.NIC_LINK] = link
9816
9817       check_params = cluster.SimpleFillNIC(nicparams)
9818       objects.NIC.CheckParameterSyntax(check_params)
9819       nic_idx, pci = _GetPCIInfo(self, 'nics')
9820       self.nics.append(objects.NIC(idx=nic_idx, pci=pci,
9821                                    mac=mac, ip=nic_ip,
9822                                    nicparams=check_params))
9823
9824     # disk checks/pre-build
9825     default_vg = self.cfg.GetVGName()
9826     self.disks = []
9827     for disk in self.op.disks:
9828       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9829       if mode not in constants.DISK_ACCESS_SET:
9830         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9831                                    mode, errors.ECODE_INVAL)
9832       size = disk.get(constants.IDISK_SIZE, None)
9833       if size is None:
9834         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9835       try:
9836         size = int(size)
9837       except (TypeError, ValueError):
9838         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9839                                    errors.ECODE_INVAL)
9840
9841       data_vg = disk.get(constants.IDISK_VG, default_vg)
9842       new_disk = {
9843         constants.IDISK_SIZE: size,
9844         constants.IDISK_MODE: mode,
9845         constants.IDISK_VG: data_vg,
9846         }
9847       if constants.IDISK_METAVG in disk:
9848         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9849       if constants.IDISK_ADOPT in disk:
9850         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9851       self.disks.append(new_disk)
9852
9853     if self.op.mode == constants.INSTANCE_IMPORT:
9854       disk_images = []
9855       for idx in range(len(self.disks)):
9856         option = "disk%d_dump" % idx
9857         if export_info.has_option(constants.INISECT_INS, option):
9858           # FIXME: are the old os-es, disk sizes, etc. useful?
9859           export_name = export_info.get(constants.INISECT_INS, option)
9860           image = utils.PathJoin(self.op.src_path, export_name)
9861           disk_images.append(image)
9862         else:
9863           disk_images.append(False)
9864
9865       self.src_images = disk_images
9866
9867       if self.op.instance_name == self._old_instance_name:
9868         for idx, nic in enumerate(self.nics):
9869           if nic.mac == constants.VALUE_AUTO:
9870             nic_mac_ini = "nic%d_mac" % idx
9871             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9872
9873     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9874
9875     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9876     if self.op.ip_check:
9877       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9878         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9879                                    (self.check_ip, self.op.instance_name),
9880                                    errors.ECODE_NOTUNIQUE)
9881
9882     #### mac address generation
9883     # By generating here the mac address both the allocator and the hooks get
9884     # the real final mac address rather than the 'auto' or 'generate' value.
9885     # There is a race condition between the generation and the instance object
9886     # creation, which means that we know the mac is valid now, but we're not
9887     # sure it will be when we actually add the instance. If things go bad
9888     # adding the instance will abort because of a duplicate mac, and the
9889     # creation job will fail.
9890     for nic in self.nics:
9891       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9892         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9893
9894     #### allocator run
9895
9896     if self.op.iallocator is not None:
9897       self._RunAllocator()
9898
9899     # Release all unneeded node locks
9900     _ReleaseLocks(self, locking.LEVEL_NODE,
9901                   keep=filter(None, [self.op.pnode, self.op.snode,
9902                                      self.op.src_node]))
9903     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9904                   keep=filter(None, [self.op.pnode, self.op.snode,
9905                                      self.op.src_node]))
9906
9907     #### node related checks
9908
9909     # check primary node
9910     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9911     assert self.pnode is not None, \
9912       "Cannot retrieve locked node %s" % self.op.pnode
9913     if pnode.offline:
9914       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9915                                  pnode.name, errors.ECODE_STATE)
9916     if pnode.drained:
9917       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9918                                  pnode.name, errors.ECODE_STATE)
9919     if not pnode.vm_capable:
9920       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9921                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9922
9923     self.secondaries = []
9924
9925     # mirror node verification
9926     if self.op.disk_template in constants.DTS_INT_MIRROR:
9927       if self.op.snode == pnode.name:
9928         raise errors.OpPrereqError("The secondary node cannot be the"
9929                                    " primary node", errors.ECODE_INVAL)
9930       _CheckNodeOnline(self, self.op.snode)
9931       _CheckNodeNotDrained(self, self.op.snode)
9932       _CheckNodeVmCapable(self, self.op.snode)
9933       self.secondaries.append(self.op.snode)
9934
9935       snode = self.cfg.GetNodeInfo(self.op.snode)
9936       if pnode.group != snode.group:
9937         self.LogWarning("The primary and secondary nodes are in two"
9938                         " different node groups; the disk parameters"
9939                         " from the first disk's node group will be"
9940                         " used")
9941
9942     nodenames = [pnode.name] + self.secondaries
9943
9944     # Verify instance specs
9945     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9946     ispec = {
9947       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9948       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9949       constants.ISPEC_DISK_COUNT: len(self.disks),
9950       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9951       constants.ISPEC_NIC_COUNT: len(self.nics),
9952       constants.ISPEC_SPINDLE_USE: spindle_use,
9953       }
9954
9955     group_info = self.cfg.GetNodeGroup(pnode.group)
9956     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9957     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9958     if not self.op.ignore_ipolicy and res:
9959       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9960                                   " policy: %s") % (pnode.group,
9961                                                     utils.CommaJoin(res)),
9962                                   errors.ECODE_INVAL)
9963
9964     if not self.adopt_disks:
9965       if self.op.disk_template == constants.DT_RBD:
9966         # _CheckRADOSFreeSpace() is just a placeholder.
9967         # Any function that checks prerequisites can be placed here.
9968         # Check if there is enough space on the RADOS cluster.
9969         _CheckRADOSFreeSpace()
9970       else:
9971         # Check lv size requirements, if not adopting
9972         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9973         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9974
9975     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9976       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9977                                 disk[constants.IDISK_ADOPT])
9978                      for disk in self.disks])
9979       if len(all_lvs) != len(self.disks):
9980         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9981                                    errors.ECODE_INVAL)
9982       for lv_name in all_lvs:
9983         try:
9984           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9985           # to ReserveLV uses the same syntax
9986           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9987         except errors.ReservationError:
9988           raise errors.OpPrereqError("LV named %s used by another instance" %
9989                                      lv_name, errors.ECODE_NOTUNIQUE)
9990
9991       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9992       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9993
9994       node_lvs = self.rpc.call_lv_list([pnode.name],
9995                                        vg_names.payload.keys())[pnode.name]
9996       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9997       node_lvs = node_lvs.payload
9998
9999       delta = all_lvs.difference(node_lvs.keys())
10000       if delta:
10001         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10002                                    utils.CommaJoin(delta),
10003                                    errors.ECODE_INVAL)
10004       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10005       if online_lvs:
10006         raise errors.OpPrereqError("Online logical volumes found, cannot"
10007                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10008                                    errors.ECODE_STATE)
10009       # update the size of disk based on what is found
10010       for dsk in self.disks:
10011         dsk[constants.IDISK_SIZE] = \
10012           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10013                                         dsk[constants.IDISK_ADOPT])][0]))
10014
10015     elif self.op.disk_template == constants.DT_BLOCK:
10016       # Normalize and de-duplicate device paths
10017       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10018                        for disk in self.disks])
10019       if len(all_disks) != len(self.disks):
10020         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10021                                    errors.ECODE_INVAL)
10022       baddisks = [d for d in all_disks
10023                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10024       if baddisks:
10025         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10026                                    " cannot be adopted" %
10027                                    (", ".join(baddisks),
10028                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10029                                    errors.ECODE_INVAL)
10030
10031       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10032                                             list(all_disks))[pnode.name]
10033       node_disks.Raise("Cannot get block device information from node %s" %
10034                        pnode.name)
10035       node_disks = node_disks.payload
10036       delta = all_disks.difference(node_disks.keys())
10037       if delta:
10038         raise errors.OpPrereqError("Missing block device(s): %s" %
10039                                    utils.CommaJoin(delta),
10040                                    errors.ECODE_INVAL)
10041       for dsk in self.disks:
10042         dsk[constants.IDISK_SIZE] = \
10043           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10044
10045     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10046
10047     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10048     # check OS parameters (remotely)
10049     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10050
10051     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10052
10053     # memory check on primary node
10054     #TODO(dynmem): use MINMEM for checking
10055     if self.op.start:
10056       _CheckNodeFreeMemory(self, self.pnode.name,
10057                            "creating instance %s" % self.op.instance_name,
10058                            self.be_full[constants.BE_MAXMEM],
10059                            self.op.hypervisor)
10060
10061     self.dry_run_result = list(nodenames)
10062
10063   def Exec(self, feedback_fn):
10064     """Create and add the instance to the cluster.
10065
10066     """
10067     instance = self.op.instance_name
10068     pnode_name = self.pnode.name
10069
10070     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10071                 self.owned_locks(locking.LEVEL_NODE)), \
10072       "Node locks differ from node resource locks"
10073
10074     ht_kind = self.op.hypervisor
10075     if ht_kind in constants.HTS_REQ_PORT:
10076       network_port = self.cfg.AllocatePort()
10077     else:
10078       network_port = None
10079
10080     # This is ugly but we got a chicken-egg problem here
10081     # We can only take the group disk parameters, as the instance
10082     # has no disks yet (we are generating them right here).
10083     node = self.cfg.GetNodeInfo(pnode_name)
10084     nodegroup = self.cfg.GetNodeGroup(node.group)
10085     disks = _GenerateDiskTemplate(self,
10086                                   self.op.disk_template,
10087                                   instance, pnode_name,
10088                                   self.secondaries,
10089                                   self.disks,
10090                                   self.instance_file_storage_dir,
10091                                   self.op.file_driver,
10092                                   0,
10093                                   feedback_fn,
10094                                   self.cfg.GetGroupDiskParams(nodegroup))
10095
10096     iobj = objects.Instance(name=instance, os=self.op.os_type,
10097                             primary_node=pnode_name,
10098                             nics=self.nics, disks=disks,
10099                             disk_template=self.op.disk_template,
10100                             admin_state=constants.ADMINST_DOWN,
10101                             network_port=network_port,
10102                             beparams=self.op.beparams,
10103                             hvparams=self.op.hvparams,
10104                             hypervisor=self.op.hypervisor,
10105                             osparams=self.op.osparams,
10106                             hotplug_info=self.hotplug_info,
10107                             )
10108
10109     if self.op.tags:
10110       for tag in self.op.tags:
10111         iobj.AddTag(tag)
10112
10113     if self.adopt_disks:
10114       if self.op.disk_template == constants.DT_PLAIN:
10115         # rename LVs to the newly-generated names; we need to construct
10116         # 'fake' LV disks with the old data, plus the new unique_id
10117         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10118         rename_to = []
10119         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10120           rename_to.append(t_dsk.logical_id)
10121           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10122           self.cfg.SetDiskID(t_dsk, pnode_name)
10123         result = self.rpc.call_blockdev_rename(pnode_name,
10124                                                zip(tmp_disks, rename_to))
10125         result.Raise("Failed to rename adoped LVs")
10126     else:
10127       feedback_fn("* creating instance disks...")
10128       try:
10129         _CreateDisks(self, iobj)
10130       except errors.OpExecError:
10131         self.LogWarning("Device creation failed, reverting...")
10132         try:
10133           _RemoveDisks(self, iobj)
10134         finally:
10135           self.cfg.ReleaseDRBDMinors(instance)
10136           raise
10137
10138     feedback_fn("adding instance %s to cluster config" % instance)
10139
10140     self.cfg.AddInstance(iobj, self.proc.GetECId())
10141
10142     # Declare that we don't want to remove the instance lock anymore, as we've
10143     # added the instance to the config
10144     del self.remove_locks[locking.LEVEL_INSTANCE]
10145
10146     if self.op.mode == constants.INSTANCE_IMPORT:
10147       # Release unused nodes
10148       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10149     else:
10150       # Release all nodes
10151       _ReleaseLocks(self, locking.LEVEL_NODE)
10152
10153     disk_abort = False
10154     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10155       feedback_fn("* wiping instance disks...")
10156       try:
10157         _WipeDisks(self, iobj)
10158       except errors.OpExecError, err:
10159         logging.exception("Wiping disks failed")
10160         self.LogWarning("Wiping instance disks failed (%s)", err)
10161         disk_abort = True
10162
10163     if disk_abort:
10164       # Something is already wrong with the disks, don't do anything else
10165       pass
10166     elif self.op.wait_for_sync:
10167       disk_abort = not _WaitForSync(self, iobj)
10168     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10169       # make sure the disks are not degraded (still sync-ing is ok)
10170       feedback_fn("* checking mirrors status")
10171       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10172     else:
10173       disk_abort = False
10174
10175     if disk_abort:
10176       _RemoveDisks(self, iobj)
10177       self.cfg.RemoveInstance(iobj.name)
10178       # Make sure the instance lock gets removed
10179       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10180       raise errors.OpExecError("There are some degraded disks for"
10181                                " this instance")
10182
10183     # Release all node resource locks
10184     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10185
10186     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10187       # we need to set the disks ID to the primary node, since the
10188       # preceding code might or might have not done it, depending on
10189       # disk template and other options
10190       for disk in iobj.disks:
10191         self.cfg.SetDiskID(disk, pnode_name)
10192       if self.op.mode == constants.INSTANCE_CREATE:
10193         if not self.op.no_install:
10194           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10195                         not self.op.wait_for_sync)
10196           if pause_sync:
10197             feedback_fn("* pausing disk sync to install instance OS")
10198             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10199                                                               (iobj.disks,
10200                                                                iobj), True)
10201             for idx, success in enumerate(result.payload):
10202               if not success:
10203                 logging.warn("pause-sync of instance %s for disk %d failed",
10204                              instance, idx)
10205
10206           feedback_fn("* running the instance OS create scripts...")
10207           # FIXME: pass debug option from opcode to backend
10208           os_add_result = \
10209             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10210                                           self.op.debug_level)
10211           if pause_sync:
10212             feedback_fn("* resuming disk sync")
10213             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10214                                                               (iobj.disks,
10215                                                                iobj), False)
10216             for idx, success in enumerate(result.payload):
10217               if not success:
10218                 logging.warn("resume-sync of instance %s for disk %d failed",
10219                              instance, idx)
10220
10221           os_add_result.Raise("Could not add os for instance %s"
10222                               " on node %s" % (instance, pnode_name))
10223
10224       else:
10225         if self.op.mode == constants.INSTANCE_IMPORT:
10226           feedback_fn("* running the instance OS import scripts...")
10227
10228           transfers = []
10229
10230           for idx, image in enumerate(self.src_images):
10231             if not image:
10232               continue
10233
10234             # FIXME: pass debug option from opcode to backend
10235             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10236                                                constants.IEIO_FILE, (image, ),
10237                                                constants.IEIO_SCRIPT,
10238                                                (iobj.disks[idx], idx),
10239                                                None)
10240             transfers.append(dt)
10241
10242           import_result = \
10243             masterd.instance.TransferInstanceData(self, feedback_fn,
10244                                                   self.op.src_node, pnode_name,
10245                                                   self.pnode.secondary_ip,
10246                                                   iobj, transfers)
10247           if not compat.all(import_result):
10248             self.LogWarning("Some disks for instance %s on node %s were not"
10249                             " imported successfully" % (instance, pnode_name))
10250
10251           rename_from = self._old_instance_name
10252
10253         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10254           feedback_fn("* preparing remote import...")
10255           # The source cluster will stop the instance before attempting to make
10256           # a connection. In some cases stopping an instance can take a long
10257           # time, hence the shutdown timeout is added to the connection
10258           # timeout.
10259           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10260                              self.op.source_shutdown_timeout)
10261           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10262
10263           assert iobj.primary_node == self.pnode.name
10264           disk_results = \
10265             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10266                                           self.source_x509_ca,
10267                                           self._cds, timeouts)
10268           if not compat.all(disk_results):
10269             # TODO: Should the instance still be started, even if some disks
10270             # failed to import (valid for local imports, too)?
10271             self.LogWarning("Some disks for instance %s on node %s were not"
10272                             " imported successfully" % (instance, pnode_name))
10273
10274           rename_from = self.source_instance_name
10275
10276         else:
10277           # also checked in the prereq part
10278           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10279                                        % self.op.mode)
10280
10281         # Run rename script on newly imported instance
10282         assert iobj.name == instance
10283         feedback_fn("Running rename script for %s" % instance)
10284         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10285                                                    rename_from,
10286                                                    self.op.debug_level)
10287         if result.fail_msg:
10288           self.LogWarning("Failed to run rename script for %s on node"
10289                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10290
10291     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10292
10293     if self.op.start:
10294       iobj.admin_state = constants.ADMINST_UP
10295       self.cfg.Update(iobj, feedback_fn)
10296       logging.info("Starting instance %s on node %s", instance, pnode_name)
10297       feedback_fn("* starting instance...")
10298       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10299                                             False)
10300       result.Raise("Could not start instance")
10301
10302     return list(iobj.all_nodes)
10303
10304
10305 def _CheckRADOSFreeSpace():
10306   """Compute disk size requirements inside the RADOS cluster.
10307
10308   """
10309   # For the RADOS cluster we assume there is always enough space.
10310   pass
10311
10312
10313 class LUInstanceConsole(NoHooksLU):
10314   """Connect to an instance's console.
10315
10316   This is somewhat special in that it returns the command line that
10317   you need to run on the master node in order to connect to the
10318   console.
10319
10320   """
10321   REQ_BGL = False
10322
10323   def ExpandNames(self):
10324     self.share_locks = _ShareAll()
10325     self._ExpandAndLockInstance()
10326
10327   def CheckPrereq(self):
10328     """Check prerequisites.
10329
10330     This checks that the instance is in the cluster.
10331
10332     """
10333     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10334     assert self.instance is not None, \
10335       "Cannot retrieve locked instance %s" % self.op.instance_name
10336     _CheckNodeOnline(self, self.instance.primary_node)
10337
10338   def Exec(self, feedback_fn):
10339     """Connect to the console of an instance
10340
10341     """
10342     instance = self.instance
10343     node = instance.primary_node
10344
10345     node_insts = self.rpc.call_instance_list([node],
10346                                              [instance.hypervisor])[node]
10347     node_insts.Raise("Can't get node information from %s" % node)
10348
10349     if instance.name not in node_insts.payload:
10350       if instance.admin_state == constants.ADMINST_UP:
10351         state = constants.INSTST_ERRORDOWN
10352       elif instance.admin_state == constants.ADMINST_DOWN:
10353         state = constants.INSTST_ADMINDOWN
10354       else:
10355         state = constants.INSTST_ADMINOFFLINE
10356       raise errors.OpExecError("Instance %s is not running (state %s)" %
10357                                (instance.name, state))
10358
10359     logging.debug("Connecting to console of %s on %s", instance.name, node)
10360
10361     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10362
10363
10364 def _GetInstanceConsole(cluster, instance):
10365   """Returns console information for an instance.
10366
10367   @type cluster: L{objects.Cluster}
10368   @type instance: L{objects.Instance}
10369   @rtype: dict
10370
10371   """
10372   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10373   # beparams and hvparams are passed separately, to avoid editing the
10374   # instance and then saving the defaults in the instance itself.
10375   hvparams = cluster.FillHV(instance)
10376   beparams = cluster.FillBE(instance)
10377   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10378
10379   assert console.instance == instance.name
10380   assert console.Validate()
10381
10382   return console.ToDict()
10383
10384
10385 class LUInstanceReplaceDisks(LogicalUnit):
10386   """Replace the disks of an instance.
10387
10388   """
10389   HPATH = "mirrors-replace"
10390   HTYPE = constants.HTYPE_INSTANCE
10391   REQ_BGL = False
10392
10393   def CheckArguments(self):
10394     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10395                                   self.op.iallocator)
10396
10397   def ExpandNames(self):
10398     self._ExpandAndLockInstance()
10399
10400     assert locking.LEVEL_NODE not in self.needed_locks
10401     assert locking.LEVEL_NODE_RES not in self.needed_locks
10402     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10403
10404     assert self.op.iallocator is None or self.op.remote_node is None, \
10405       "Conflicting options"
10406
10407     if self.op.remote_node is not None:
10408       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10409
10410       # Warning: do not remove the locking of the new secondary here
10411       # unless DRBD8.AddChildren is changed to work in parallel;
10412       # currently it doesn't since parallel invocations of
10413       # FindUnusedMinor will conflict
10414       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10415       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10416     else:
10417       self.needed_locks[locking.LEVEL_NODE] = []
10418       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10419
10420       if self.op.iallocator is not None:
10421         # iallocator will select a new node in the same group
10422         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10423
10424     self.needed_locks[locking.LEVEL_NODE_RES] = []
10425
10426     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10427                                    self.op.iallocator, self.op.remote_node,
10428                                    self.op.disks, False, self.op.early_release,
10429                                    self.op.ignore_ipolicy)
10430
10431     self.tasklets = [self.replacer]
10432
10433   def DeclareLocks(self, level):
10434     if level == locking.LEVEL_NODEGROUP:
10435       assert self.op.remote_node is None
10436       assert self.op.iallocator is not None
10437       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10438
10439       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10440       # Lock all groups used by instance optimistically; this requires going
10441       # via the node before it's locked, requiring verification later on
10442       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10443         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10444
10445     elif level == locking.LEVEL_NODE:
10446       if self.op.iallocator is not None:
10447         assert self.op.remote_node is None
10448         assert not self.needed_locks[locking.LEVEL_NODE]
10449
10450         # Lock member nodes of all locked groups
10451         self.needed_locks[locking.LEVEL_NODE] = [node_name
10452           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10453           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10454       else:
10455         self._LockInstancesNodes()
10456     elif level == locking.LEVEL_NODE_RES:
10457       # Reuse node locks
10458       self.needed_locks[locking.LEVEL_NODE_RES] = \
10459         self.needed_locks[locking.LEVEL_NODE]
10460
10461   def BuildHooksEnv(self):
10462     """Build hooks env.
10463
10464     This runs on the master, the primary and all the secondaries.
10465
10466     """
10467     instance = self.replacer.instance
10468     env = {
10469       "MODE": self.op.mode,
10470       "NEW_SECONDARY": self.op.remote_node,
10471       "OLD_SECONDARY": instance.secondary_nodes[0],
10472       }
10473     env.update(_BuildInstanceHookEnvByObject(self, instance))
10474     return env
10475
10476   def BuildHooksNodes(self):
10477     """Build hooks nodes.
10478
10479     """
10480     instance = self.replacer.instance
10481     nl = [
10482       self.cfg.GetMasterNode(),
10483       instance.primary_node,
10484       ]
10485     if self.op.remote_node is not None:
10486       nl.append(self.op.remote_node)
10487     return nl, nl
10488
10489   def CheckPrereq(self):
10490     """Check prerequisites.
10491
10492     """
10493     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10494             self.op.iallocator is None)
10495
10496     # Verify if node group locks are still correct
10497     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10498     if owned_groups:
10499       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10500
10501     return LogicalUnit.CheckPrereq(self)
10502
10503
10504 class TLReplaceDisks(Tasklet):
10505   """Replaces disks for an instance.
10506
10507   Note: Locking is not within the scope of this class.
10508
10509   """
10510   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10511                disks, delay_iallocator, early_release, ignore_ipolicy):
10512     """Initializes this class.
10513
10514     """
10515     Tasklet.__init__(self, lu)
10516
10517     # Parameters
10518     self.instance_name = instance_name
10519     self.mode = mode
10520     self.iallocator_name = iallocator_name
10521     self.remote_node = remote_node
10522     self.disks = disks
10523     self.delay_iallocator = delay_iallocator
10524     self.early_release = early_release
10525     self.ignore_ipolicy = ignore_ipolicy
10526
10527     # Runtime data
10528     self.instance = None
10529     self.new_node = None
10530     self.target_node = None
10531     self.other_node = None
10532     self.remote_node_info = None
10533     self.node_secondary_ip = None
10534
10535   @staticmethod
10536   def CheckArguments(mode, remote_node, iallocator):
10537     """Helper function for users of this class.
10538
10539     """
10540     # check for valid parameter combination
10541     if mode == constants.REPLACE_DISK_CHG:
10542       if remote_node is None and iallocator is None:
10543         raise errors.OpPrereqError("When changing the secondary either an"
10544                                    " iallocator script must be used or the"
10545                                    " new node given", errors.ECODE_INVAL)
10546
10547       if remote_node is not None and iallocator is not None:
10548         raise errors.OpPrereqError("Give either the iallocator or the new"
10549                                    " secondary, not both", errors.ECODE_INVAL)
10550
10551     elif remote_node is not None or iallocator is not None:
10552       # Not replacing the secondary
10553       raise errors.OpPrereqError("The iallocator and new node options can"
10554                                  " only be used when changing the"
10555                                  " secondary node", errors.ECODE_INVAL)
10556
10557   @staticmethod
10558   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10559     """Compute a new secondary node using an IAllocator.
10560
10561     """
10562     ial = IAllocator(lu.cfg, lu.rpc,
10563                      mode=constants.IALLOCATOR_MODE_RELOC,
10564                      name=instance_name,
10565                      relocate_from=list(relocate_from))
10566
10567     ial.Run(iallocator_name)
10568
10569     if not ial.success:
10570       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10571                                  " %s" % (iallocator_name, ial.info),
10572                                  errors.ECODE_NORES)
10573
10574     if len(ial.result) != ial.required_nodes:
10575       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10576                                  " of nodes (%s), required %s" %
10577                                  (iallocator_name,
10578                                   len(ial.result), ial.required_nodes),
10579                                  errors.ECODE_FAULT)
10580
10581     remote_node_name = ial.result[0]
10582
10583     lu.LogInfo("Selected new secondary for instance '%s': %s",
10584                instance_name, remote_node_name)
10585
10586     return remote_node_name
10587
10588   def _FindFaultyDisks(self, node_name):
10589     """Wrapper for L{_FindFaultyInstanceDisks}.
10590
10591     """
10592     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10593                                     node_name, True)
10594
10595   def _CheckDisksActivated(self, instance):
10596     """Checks if the instance disks are activated.
10597
10598     @param instance: The instance to check disks
10599     @return: True if they are activated, False otherwise
10600
10601     """
10602     nodes = instance.all_nodes
10603
10604     for idx, dev in enumerate(instance.disks):
10605       for node in nodes:
10606         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10607         self.cfg.SetDiskID(dev, node)
10608
10609         result = _BlockdevFind(self, node, dev, instance)
10610
10611         if result.offline:
10612           continue
10613         elif result.fail_msg or not result.payload:
10614           return False
10615
10616     return True
10617
10618   def CheckPrereq(self):
10619     """Check prerequisites.
10620
10621     This checks that the instance is in the cluster.
10622
10623     """
10624     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10625     assert instance is not None, \
10626       "Cannot retrieve locked instance %s" % self.instance_name
10627
10628     if instance.disk_template != constants.DT_DRBD8:
10629       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10630                                  " instances", errors.ECODE_INVAL)
10631
10632     if len(instance.secondary_nodes) != 1:
10633       raise errors.OpPrereqError("The instance has a strange layout,"
10634                                  " expected one secondary but found %d" %
10635                                  len(instance.secondary_nodes),
10636                                  errors.ECODE_FAULT)
10637
10638     if not self.delay_iallocator:
10639       self._CheckPrereq2()
10640
10641   def _CheckPrereq2(self):
10642     """Check prerequisites, second part.
10643
10644     This function should always be part of CheckPrereq. It was separated and is
10645     now called from Exec because during node evacuation iallocator was only
10646     called with an unmodified cluster model, not taking planned changes into
10647     account.
10648
10649     """
10650     instance = self.instance
10651     secondary_node = instance.secondary_nodes[0]
10652
10653     if self.iallocator_name is None:
10654       remote_node = self.remote_node
10655     else:
10656       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10657                                        instance.name, instance.secondary_nodes)
10658
10659     if remote_node is None:
10660       self.remote_node_info = None
10661     else:
10662       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10663              "Remote node '%s' is not locked" % remote_node
10664
10665       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10666       assert self.remote_node_info is not None, \
10667         "Cannot retrieve locked node %s" % remote_node
10668
10669     if remote_node == self.instance.primary_node:
10670       raise errors.OpPrereqError("The specified node is the primary node of"
10671                                  " the instance", errors.ECODE_INVAL)
10672
10673     if remote_node == secondary_node:
10674       raise errors.OpPrereqError("The specified node is already the"
10675                                  " secondary node of the instance",
10676                                  errors.ECODE_INVAL)
10677
10678     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10679                                     constants.REPLACE_DISK_CHG):
10680       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10681                                  errors.ECODE_INVAL)
10682
10683     if self.mode == constants.REPLACE_DISK_AUTO:
10684       if not self._CheckDisksActivated(instance):
10685         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10686                                    " first" % self.instance_name,
10687                                    errors.ECODE_STATE)
10688       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10689       faulty_secondary = self._FindFaultyDisks(secondary_node)
10690
10691       if faulty_primary and faulty_secondary:
10692         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10693                                    " one node and can not be repaired"
10694                                    " automatically" % self.instance_name,
10695                                    errors.ECODE_STATE)
10696
10697       if faulty_primary:
10698         self.disks = faulty_primary
10699         self.target_node = instance.primary_node
10700         self.other_node = secondary_node
10701         check_nodes = [self.target_node, self.other_node]
10702       elif faulty_secondary:
10703         self.disks = faulty_secondary
10704         self.target_node = secondary_node
10705         self.other_node = instance.primary_node
10706         check_nodes = [self.target_node, self.other_node]
10707       else:
10708         self.disks = []
10709         check_nodes = []
10710
10711     else:
10712       # Non-automatic modes
10713       if self.mode == constants.REPLACE_DISK_PRI:
10714         self.target_node = instance.primary_node
10715         self.other_node = secondary_node
10716         check_nodes = [self.target_node, self.other_node]
10717
10718       elif self.mode == constants.REPLACE_DISK_SEC:
10719         self.target_node = secondary_node
10720         self.other_node = instance.primary_node
10721         check_nodes = [self.target_node, self.other_node]
10722
10723       elif self.mode == constants.REPLACE_DISK_CHG:
10724         self.new_node = remote_node
10725         self.other_node = instance.primary_node
10726         self.target_node = secondary_node
10727         check_nodes = [self.new_node, self.other_node]
10728
10729         _CheckNodeNotDrained(self.lu, remote_node)
10730         _CheckNodeVmCapable(self.lu, remote_node)
10731
10732         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10733         assert old_node_info is not None
10734         if old_node_info.offline and not self.early_release:
10735           # doesn't make sense to delay the release
10736           self.early_release = True
10737           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10738                           " early-release mode", secondary_node)
10739
10740       else:
10741         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10742                                      self.mode)
10743
10744       # If not specified all disks should be replaced
10745       if not self.disks:
10746         self.disks = range(len(self.instance.disks))
10747
10748     # TODO: This is ugly, but right now we can't distinguish between internal
10749     # submitted opcode and external one. We should fix that.
10750     if self.remote_node_info:
10751       # We change the node, lets verify it still meets instance policy
10752       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10753       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10754                                        new_group_info)
10755       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10756                               ignore=self.ignore_ipolicy)
10757
10758     for node in check_nodes:
10759       _CheckNodeOnline(self.lu, node)
10760
10761     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10762                                                           self.other_node,
10763                                                           self.target_node]
10764                               if node_name is not None)
10765
10766     # Release unneeded node and node resource locks
10767     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10768     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10769
10770     # Release any owned node group
10771     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10772       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10773
10774     # Check whether disks are valid
10775     for disk_idx in self.disks:
10776       instance.FindDisk(disk_idx)
10777
10778     # Get secondary node IP addresses
10779     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10780                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10781
10782   def Exec(self, feedback_fn):
10783     """Execute disk replacement.
10784
10785     This dispatches the disk replacement to the appropriate handler.
10786
10787     """
10788     if self.delay_iallocator:
10789       self._CheckPrereq2()
10790
10791     if __debug__:
10792       # Verify owned locks before starting operation
10793       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10794       assert set(owned_nodes) == set(self.node_secondary_ip), \
10795           ("Incorrect node locks, owning %s, expected %s" %
10796            (owned_nodes, self.node_secondary_ip.keys()))
10797       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10798               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10799
10800       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10801       assert list(owned_instances) == [self.instance_name], \
10802           "Instance '%s' not locked" % self.instance_name
10803
10804       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10805           "Should not own any node group lock at this point"
10806
10807     if not self.disks:
10808       feedback_fn("No disks need replacement")
10809       return
10810
10811     feedback_fn("Replacing disk(s) %s for %s" %
10812                 (utils.CommaJoin(self.disks), self.instance.name))
10813
10814     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10815
10816     # Activate the instance disks if we're replacing them on a down instance
10817     if activate_disks:
10818       _StartInstanceDisks(self.lu, self.instance, True)
10819
10820     try:
10821       # Should we replace the secondary node?
10822       if self.new_node is not None:
10823         fn = self._ExecDrbd8Secondary
10824       else:
10825         fn = self._ExecDrbd8DiskOnly
10826
10827       result = fn(feedback_fn)
10828     finally:
10829       # Deactivate the instance disks if we're replacing them on a
10830       # down instance
10831       if activate_disks:
10832         _SafeShutdownInstanceDisks(self.lu, self.instance)
10833
10834     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10835
10836     if __debug__:
10837       # Verify owned locks
10838       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10839       nodes = frozenset(self.node_secondary_ip)
10840       assert ((self.early_release and not owned_nodes) or
10841               (not self.early_release and not (set(owned_nodes) - nodes))), \
10842         ("Not owning the correct locks, early_release=%s, owned=%r,"
10843          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10844
10845     return result
10846
10847   def _CheckVolumeGroup(self, nodes):
10848     self.lu.LogInfo("Checking volume groups")
10849
10850     vgname = self.cfg.GetVGName()
10851
10852     # Make sure volume group exists on all involved nodes
10853     results = self.rpc.call_vg_list(nodes)
10854     if not results:
10855       raise errors.OpExecError("Can't list volume groups on the nodes")
10856
10857     for node in nodes:
10858       res = results[node]
10859       res.Raise("Error checking node %s" % node)
10860       if vgname not in res.payload:
10861         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10862                                  (vgname, node))
10863
10864   def _CheckDisksExistence(self, nodes):
10865     # Check disk existence
10866     for idx, dev in enumerate(self.instance.disks):
10867       if idx not in self.disks:
10868         continue
10869
10870       for node in nodes:
10871         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10872         self.cfg.SetDiskID(dev, node)
10873
10874         result = _BlockdevFind(self, node, dev, self.instance)
10875
10876         msg = result.fail_msg
10877         if msg or not result.payload:
10878           if not msg:
10879             msg = "disk not found"
10880           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10881                                    (idx, node, msg))
10882
10883   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10884     for idx, dev in enumerate(self.instance.disks):
10885       if idx not in self.disks:
10886         continue
10887
10888       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10889                       (idx, node_name))
10890
10891       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10892                                    on_primary, ldisk=ldisk):
10893         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10894                                  " replace disks for instance %s" %
10895                                  (node_name, self.instance.name))
10896
10897   def _CreateNewStorage(self, node_name):
10898     """Create new storage on the primary or secondary node.
10899
10900     This is only used for same-node replaces, not for changing the
10901     secondary node, hence we don't want to modify the existing disk.
10902
10903     """
10904     iv_names = {}
10905
10906     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10907     for idx, dev in enumerate(disks):
10908       if idx not in self.disks:
10909         continue
10910
10911       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10912
10913       self.cfg.SetDiskID(dev, node_name)
10914
10915       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10916       names = _GenerateUniqueNames(self.lu, lv_names)
10917
10918       (data_disk, meta_disk) = dev.children
10919       vg_data = data_disk.logical_id[0]
10920       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10921                              logical_id=(vg_data, names[0]),
10922                              params=data_disk.params)
10923       vg_meta = meta_disk.logical_id[0]
10924       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10925                              logical_id=(vg_meta, names[1]),
10926                              params=meta_disk.params)
10927
10928       new_lvs = [lv_data, lv_meta]
10929       old_lvs = [child.Copy() for child in dev.children]
10930       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10931
10932       # we pass force_create=True to force the LVM creation
10933       for new_lv in new_lvs:
10934         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10935                              _GetInstanceInfoText(self.instance), False)
10936
10937     return iv_names
10938
10939   def _CheckDevices(self, node_name, iv_names):
10940     for name, (dev, _, _) in iv_names.iteritems():
10941       self.cfg.SetDiskID(dev, node_name)
10942
10943       result = _BlockdevFind(self, node_name, dev, self.instance)
10944
10945       msg = result.fail_msg
10946       if msg or not result.payload:
10947         if not msg:
10948           msg = "disk not found"
10949         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10950                                  (name, msg))
10951
10952       if result.payload.is_degraded:
10953         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10954
10955   def _RemoveOldStorage(self, node_name, iv_names):
10956     for name, (_, old_lvs, _) in iv_names.iteritems():
10957       self.lu.LogInfo("Remove logical volumes for %s" % name)
10958
10959       for lv in old_lvs:
10960         self.cfg.SetDiskID(lv, node_name)
10961
10962         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10963         if msg:
10964           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10965                              hint="remove unused LVs manually")
10966
10967   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10968     """Replace a disk on the primary or secondary for DRBD 8.
10969
10970     The algorithm for replace is quite complicated:
10971
10972       1. for each disk to be replaced:
10973
10974         1. create new LVs on the target node with unique names
10975         1. detach old LVs from the drbd device
10976         1. rename old LVs to name_replaced.<time_t>
10977         1. rename new LVs to old LVs
10978         1. attach the new LVs (with the old names now) to the drbd device
10979
10980       1. wait for sync across all devices
10981
10982       1. for each modified disk:
10983
10984         1. remove old LVs (which have the name name_replaces.<time_t>)
10985
10986     Failures are not very well handled.
10987
10988     """
10989     steps_total = 6
10990
10991     # Step: check device activation
10992     self.lu.LogStep(1, steps_total, "Check device existence")
10993     self._CheckDisksExistence([self.other_node, self.target_node])
10994     self._CheckVolumeGroup([self.target_node, self.other_node])
10995
10996     # Step: check other node consistency
10997     self.lu.LogStep(2, steps_total, "Check peer consistency")
10998     self._CheckDisksConsistency(self.other_node,
10999                                 self.other_node == self.instance.primary_node,
11000                                 False)
11001
11002     # Step: create new storage
11003     self.lu.LogStep(3, steps_total, "Allocate new storage")
11004     iv_names = self._CreateNewStorage(self.target_node)
11005
11006     # Step: for each lv, detach+rename*2+attach
11007     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11008     for dev, old_lvs, new_lvs in iv_names.itervalues():
11009       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11010
11011       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11012                                                      old_lvs)
11013       result.Raise("Can't detach drbd from local storage on node"
11014                    " %s for device %s" % (self.target_node, dev.iv_name))
11015       #dev.children = []
11016       #cfg.Update(instance)
11017
11018       # ok, we created the new LVs, so now we know we have the needed
11019       # storage; as such, we proceed on the target node to rename
11020       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11021       # using the assumption that logical_id == physical_id (which in
11022       # turn is the unique_id on that node)
11023
11024       # FIXME(iustin): use a better name for the replaced LVs
11025       temp_suffix = int(time.time())
11026       ren_fn = lambda d, suff: (d.physical_id[0],
11027                                 d.physical_id[1] + "_replaced-%s" % suff)
11028
11029       # Build the rename list based on what LVs exist on the node
11030       rename_old_to_new = []
11031       for to_ren in old_lvs:
11032         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11033         if not result.fail_msg and result.payload:
11034           # device exists
11035           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11036
11037       self.lu.LogInfo("Renaming the old LVs on the target node")
11038       result = self.rpc.call_blockdev_rename(self.target_node,
11039                                              rename_old_to_new)
11040       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11041
11042       # Now we rename the new LVs to the old LVs
11043       self.lu.LogInfo("Renaming the new LVs on the target node")
11044       rename_new_to_old = [(new, old.physical_id)
11045                            for old, new in zip(old_lvs, new_lvs)]
11046       result = self.rpc.call_blockdev_rename(self.target_node,
11047                                              rename_new_to_old)
11048       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11049
11050       # Intermediate steps of in memory modifications
11051       for old, new in zip(old_lvs, new_lvs):
11052         new.logical_id = old.logical_id
11053         self.cfg.SetDiskID(new, self.target_node)
11054
11055       # We need to modify old_lvs so that removal later removes the
11056       # right LVs, not the newly added ones; note that old_lvs is a
11057       # copy here
11058       for disk in old_lvs:
11059         disk.logical_id = ren_fn(disk, temp_suffix)
11060         self.cfg.SetDiskID(disk, self.target_node)
11061
11062       # Now that the new lvs have the old name, we can add them to the device
11063       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11064       result = self.rpc.call_blockdev_addchildren(self.target_node,
11065                                                   (dev, self.instance), new_lvs)
11066       msg = result.fail_msg
11067       if msg:
11068         for new_lv in new_lvs:
11069           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11070                                                new_lv).fail_msg
11071           if msg2:
11072             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11073                                hint=("cleanup manually the unused logical"
11074                                      "volumes"))
11075         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11076
11077     cstep = itertools.count(5)
11078
11079     if self.early_release:
11080       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11081       self._RemoveOldStorage(self.target_node, iv_names)
11082       # TODO: Check if releasing locks early still makes sense
11083       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11084     else:
11085       # Release all resource locks except those used by the instance
11086       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11087                     keep=self.node_secondary_ip.keys())
11088
11089     # Release all node locks while waiting for sync
11090     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11091
11092     # TODO: Can the instance lock be downgraded here? Take the optional disk
11093     # shutdown in the caller into consideration.
11094
11095     # Wait for sync
11096     # This can fail as the old devices are degraded and _WaitForSync
11097     # does a combined result over all disks, so we don't check its return value
11098     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11099     _WaitForSync(self.lu, self.instance)
11100
11101     # Check all devices manually
11102     self._CheckDevices(self.instance.primary_node, iv_names)
11103
11104     # Step: remove old storage
11105     if not self.early_release:
11106       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11107       self._RemoveOldStorage(self.target_node, iv_names)
11108
11109   def _ExecDrbd8Secondary(self, feedback_fn):
11110     """Replace the secondary node for DRBD 8.
11111
11112     The algorithm for replace is quite complicated:
11113       - for all disks of the instance:
11114         - create new LVs on the new node with same names
11115         - shutdown the drbd device on the old secondary
11116         - disconnect the drbd network on the primary
11117         - create the drbd device on the new secondary
11118         - network attach the drbd on the primary, using an artifice:
11119           the drbd code for Attach() will connect to the network if it
11120           finds a device which is connected to the good local disks but
11121           not network enabled
11122       - wait for sync across all devices
11123       - remove all disks from the old secondary
11124
11125     Failures are not very well handled.
11126
11127     """
11128     steps_total = 6
11129
11130     pnode = self.instance.primary_node
11131
11132     # Step: check device activation
11133     self.lu.LogStep(1, steps_total, "Check device existence")
11134     self._CheckDisksExistence([self.instance.primary_node])
11135     self._CheckVolumeGroup([self.instance.primary_node])
11136
11137     # Step: check other node consistency
11138     self.lu.LogStep(2, steps_total, "Check peer consistency")
11139     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11140
11141     # Step: create new storage
11142     self.lu.LogStep(3, steps_total, "Allocate new storage")
11143     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11144     for idx, dev in enumerate(disks):
11145       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11146                       (self.new_node, idx))
11147       # we pass force_create=True to force LVM creation
11148       for new_lv in dev.children:
11149         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11150                              True, _GetInstanceInfoText(self.instance), False)
11151
11152     # Step 4: dbrd minors and drbd setups changes
11153     # after this, we must manually remove the drbd minors on both the
11154     # error and the success paths
11155     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11156     minors = self.cfg.AllocateDRBDMinor([self.new_node
11157                                          for dev in self.instance.disks],
11158                                         self.instance.name)
11159     logging.debug("Allocated minors %r", minors)
11160
11161     iv_names = {}
11162     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11163       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11164                       (self.new_node, idx))
11165       # create new devices on new_node; note that we create two IDs:
11166       # one without port, so the drbd will be activated without
11167       # networking information on the new node at this stage, and one
11168       # with network, for the latter activation in step 4
11169       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11170       if self.instance.primary_node == o_node1:
11171         p_minor = o_minor1
11172       else:
11173         assert self.instance.primary_node == o_node2, "Three-node instance?"
11174         p_minor = o_minor2
11175
11176       new_alone_id = (self.instance.primary_node, self.new_node, None,
11177                       p_minor, new_minor, o_secret)
11178       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11179                     p_minor, new_minor, o_secret)
11180
11181       iv_names[idx] = (dev, dev.children, new_net_id)
11182       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11183                     new_net_id)
11184       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11185                               logical_id=new_alone_id,
11186                               children=dev.children,
11187                               size=dev.size,
11188                               params={})
11189       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11190                                              self.cfg)
11191       try:
11192         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11193                               anno_new_drbd,
11194                               _GetInstanceInfoText(self.instance), False)
11195       except errors.GenericError:
11196         self.cfg.ReleaseDRBDMinors(self.instance.name)
11197         raise
11198
11199     # We have new devices, shutdown the drbd on the old secondary
11200     for idx, dev in enumerate(self.instance.disks):
11201       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11202       self.cfg.SetDiskID(dev, self.target_node)
11203       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11204                                             (dev, self.instance)).fail_msg
11205       if msg:
11206         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11207                            "node: %s" % (idx, msg),
11208                            hint=("Please cleanup this device manually as"
11209                                  " soon as possible"))
11210
11211     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11212     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11213                                                self.instance.disks)[pnode]
11214
11215     msg = result.fail_msg
11216     if msg:
11217       # detaches didn't succeed (unlikely)
11218       self.cfg.ReleaseDRBDMinors(self.instance.name)
11219       raise errors.OpExecError("Can't detach the disks from the network on"
11220                                " old node: %s" % (msg,))
11221
11222     # if we managed to detach at least one, we update all the disks of
11223     # the instance to point to the new secondary
11224     self.lu.LogInfo("Updating instance configuration")
11225     for dev, _, new_logical_id in iv_names.itervalues():
11226       dev.logical_id = new_logical_id
11227       self.cfg.SetDiskID(dev, self.instance.primary_node)
11228
11229     self.cfg.Update(self.instance, feedback_fn)
11230
11231     # Release all node locks (the configuration has been updated)
11232     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11233
11234     # and now perform the drbd attach
11235     self.lu.LogInfo("Attaching primary drbds to new secondary"
11236                     " (standalone => connected)")
11237     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11238                                             self.new_node],
11239                                            self.node_secondary_ip,
11240                                            (self.instance.disks, self.instance),
11241                                            self.instance.name,
11242                                            False)
11243     for to_node, to_result in result.items():
11244       msg = to_result.fail_msg
11245       if msg:
11246         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11247                            to_node, msg,
11248                            hint=("please do a gnt-instance info to see the"
11249                                  " status of disks"))
11250
11251     cstep = itertools.count(5)
11252
11253     if self.early_release:
11254       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11255       self._RemoveOldStorage(self.target_node, iv_names)
11256       # TODO: Check if releasing locks early still makes sense
11257       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11258     else:
11259       # Release all resource locks except those used by the instance
11260       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11261                     keep=self.node_secondary_ip.keys())
11262
11263     # TODO: Can the instance lock be downgraded here? Take the optional disk
11264     # shutdown in the caller into consideration.
11265
11266     # Wait for sync
11267     # This can fail as the old devices are degraded and _WaitForSync
11268     # does a combined result over all disks, so we don't check its return value
11269     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11270     _WaitForSync(self.lu, self.instance)
11271
11272     # Check all devices manually
11273     self._CheckDevices(self.instance.primary_node, iv_names)
11274
11275     # Step: remove old storage
11276     if not self.early_release:
11277       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11278       self._RemoveOldStorage(self.target_node, iv_names)
11279
11280
11281 class LURepairNodeStorage(NoHooksLU):
11282   """Repairs the volume group on a node.
11283
11284   """
11285   REQ_BGL = False
11286
11287   def CheckArguments(self):
11288     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11289
11290     storage_type = self.op.storage_type
11291
11292     if (constants.SO_FIX_CONSISTENCY not in
11293         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11294       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11295                                  " repaired" % storage_type,
11296                                  errors.ECODE_INVAL)
11297
11298   def ExpandNames(self):
11299     self.needed_locks = {
11300       locking.LEVEL_NODE: [self.op.node_name],
11301       }
11302
11303   def _CheckFaultyDisks(self, instance, node_name):
11304     """Ensure faulty disks abort the opcode or at least warn."""
11305     try:
11306       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11307                                   node_name, True):
11308         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11309                                    " node '%s'" % (instance.name, node_name),
11310                                    errors.ECODE_STATE)
11311     except errors.OpPrereqError, err:
11312       if self.op.ignore_consistency:
11313         self.proc.LogWarning(str(err.args[0]))
11314       else:
11315         raise
11316
11317   def CheckPrereq(self):
11318     """Check prerequisites.
11319
11320     """
11321     # Check whether any instance on this node has faulty disks
11322     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11323       if inst.admin_state != constants.ADMINST_UP:
11324         continue
11325       check_nodes = set(inst.all_nodes)
11326       check_nodes.discard(self.op.node_name)
11327       for inst_node_name in check_nodes:
11328         self._CheckFaultyDisks(inst, inst_node_name)
11329
11330   def Exec(self, feedback_fn):
11331     feedback_fn("Repairing storage unit '%s' on %s ..." %
11332                 (self.op.name, self.op.node_name))
11333
11334     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11335     result = self.rpc.call_storage_execute(self.op.node_name,
11336                                            self.op.storage_type, st_args,
11337                                            self.op.name,
11338                                            constants.SO_FIX_CONSISTENCY)
11339     result.Raise("Failed to repair storage unit '%s' on %s" %
11340                  (self.op.name, self.op.node_name))
11341
11342
11343 class LUNodeEvacuate(NoHooksLU):
11344   """Evacuates instances off a list of nodes.
11345
11346   """
11347   REQ_BGL = False
11348
11349   _MODE2IALLOCATOR = {
11350     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11351     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11352     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11353     }
11354   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11355   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11356           constants.IALLOCATOR_NEVAC_MODES)
11357
11358   def CheckArguments(self):
11359     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11360
11361   def ExpandNames(self):
11362     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11363
11364     if self.op.remote_node is not None:
11365       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11366       assert self.op.remote_node
11367
11368       if self.op.remote_node == self.op.node_name:
11369         raise errors.OpPrereqError("Can not use evacuated node as a new"
11370                                    " secondary node", errors.ECODE_INVAL)
11371
11372       if self.op.mode != constants.NODE_EVAC_SEC:
11373         raise errors.OpPrereqError("Without the use of an iallocator only"
11374                                    " secondary instances can be evacuated",
11375                                    errors.ECODE_INVAL)
11376
11377     # Declare locks
11378     self.share_locks = _ShareAll()
11379     self.needed_locks = {
11380       locking.LEVEL_INSTANCE: [],
11381       locking.LEVEL_NODEGROUP: [],
11382       locking.LEVEL_NODE: [],
11383       }
11384
11385     # Determine nodes (via group) optimistically, needs verification once locks
11386     # have been acquired
11387     self.lock_nodes = self._DetermineNodes()
11388
11389   def _DetermineNodes(self):
11390     """Gets the list of nodes to operate on.
11391
11392     """
11393     if self.op.remote_node is None:
11394       # Iallocator will choose any node(s) in the same group
11395       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11396     else:
11397       group_nodes = frozenset([self.op.remote_node])
11398
11399     # Determine nodes to be locked
11400     return set([self.op.node_name]) | group_nodes
11401
11402   def _DetermineInstances(self):
11403     """Builds list of instances to operate on.
11404
11405     """
11406     assert self.op.mode in constants.NODE_EVAC_MODES
11407
11408     if self.op.mode == constants.NODE_EVAC_PRI:
11409       # Primary instances only
11410       inst_fn = _GetNodePrimaryInstances
11411       assert self.op.remote_node is None, \
11412         "Evacuating primary instances requires iallocator"
11413     elif self.op.mode == constants.NODE_EVAC_SEC:
11414       # Secondary instances only
11415       inst_fn = _GetNodeSecondaryInstances
11416     else:
11417       # All instances
11418       assert self.op.mode == constants.NODE_EVAC_ALL
11419       inst_fn = _GetNodeInstances
11420       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11421       # per instance
11422       raise errors.OpPrereqError("Due to an issue with the iallocator"
11423                                  " interface it is not possible to evacuate"
11424                                  " all instances at once; specify explicitly"
11425                                  " whether to evacuate primary or secondary"
11426                                  " instances",
11427                                  errors.ECODE_INVAL)
11428
11429     return inst_fn(self.cfg, self.op.node_name)
11430
11431   def DeclareLocks(self, level):
11432     if level == locking.LEVEL_INSTANCE:
11433       # Lock instances optimistically, needs verification once node and group
11434       # locks have been acquired
11435       self.needed_locks[locking.LEVEL_INSTANCE] = \
11436         set(i.name for i in self._DetermineInstances())
11437
11438     elif level == locking.LEVEL_NODEGROUP:
11439       # Lock node groups for all potential target nodes optimistically, needs
11440       # verification once nodes have been acquired
11441       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11442         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11443
11444     elif level == locking.LEVEL_NODE:
11445       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11446
11447   def CheckPrereq(self):
11448     # Verify locks
11449     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11450     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11451     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11452
11453     need_nodes = self._DetermineNodes()
11454
11455     if not owned_nodes.issuperset(need_nodes):
11456       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11457                                  " locks were acquired, current nodes are"
11458                                  " are '%s', used to be '%s'; retry the"
11459                                  " operation" %
11460                                  (self.op.node_name,
11461                                   utils.CommaJoin(need_nodes),
11462                                   utils.CommaJoin(owned_nodes)),
11463                                  errors.ECODE_STATE)
11464
11465     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11466     if owned_groups != wanted_groups:
11467       raise errors.OpExecError("Node groups changed since locks were acquired,"
11468                                " current groups are '%s', used to be '%s';"
11469                                " retry the operation" %
11470                                (utils.CommaJoin(wanted_groups),
11471                                 utils.CommaJoin(owned_groups)))
11472
11473     # Determine affected instances
11474     self.instances = self._DetermineInstances()
11475     self.instance_names = [i.name for i in self.instances]
11476
11477     if set(self.instance_names) != owned_instances:
11478       raise errors.OpExecError("Instances on node '%s' changed since locks"
11479                                " were acquired, current instances are '%s',"
11480                                " used to be '%s'; retry the operation" %
11481                                (self.op.node_name,
11482                                 utils.CommaJoin(self.instance_names),
11483                                 utils.CommaJoin(owned_instances)))
11484
11485     if self.instance_names:
11486       self.LogInfo("Evacuating instances from node '%s': %s",
11487                    self.op.node_name,
11488                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11489     else:
11490       self.LogInfo("No instances to evacuate from node '%s'",
11491                    self.op.node_name)
11492
11493     if self.op.remote_node is not None:
11494       for i in self.instances:
11495         if i.primary_node == self.op.remote_node:
11496           raise errors.OpPrereqError("Node %s is the primary node of"
11497                                      " instance %s, cannot use it as"
11498                                      " secondary" %
11499                                      (self.op.remote_node, i.name),
11500                                      errors.ECODE_INVAL)
11501
11502   def Exec(self, feedback_fn):
11503     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11504
11505     if not self.instance_names:
11506       # No instances to evacuate
11507       jobs = []
11508
11509     elif self.op.iallocator is not None:
11510       # TODO: Implement relocation to other group
11511       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11512                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11513                        instances=list(self.instance_names))
11514
11515       ial.Run(self.op.iallocator)
11516
11517       if not ial.success:
11518         raise errors.OpPrereqError("Can't compute node evacuation using"
11519                                    " iallocator '%s': %s" %
11520                                    (self.op.iallocator, ial.info),
11521                                    errors.ECODE_NORES)
11522
11523       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11524
11525     elif self.op.remote_node is not None:
11526       assert self.op.mode == constants.NODE_EVAC_SEC
11527       jobs = [
11528         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11529                                         remote_node=self.op.remote_node,
11530                                         disks=[],
11531                                         mode=constants.REPLACE_DISK_CHG,
11532                                         early_release=self.op.early_release)]
11533         for instance_name in self.instance_names
11534         ]
11535
11536     else:
11537       raise errors.ProgrammerError("No iallocator or remote node")
11538
11539     return ResultWithJobs(jobs)
11540
11541
11542 def _SetOpEarlyRelease(early_release, op):
11543   """Sets C{early_release} flag on opcodes if available.
11544
11545   """
11546   try:
11547     op.early_release = early_release
11548   except AttributeError:
11549     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11550
11551   return op
11552
11553
11554 def _NodeEvacDest(use_nodes, group, nodes):
11555   """Returns group or nodes depending on caller's choice.
11556
11557   """
11558   if use_nodes:
11559     return utils.CommaJoin(nodes)
11560   else:
11561     return group
11562
11563
11564 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11565   """Unpacks the result of change-group and node-evacuate iallocator requests.
11566
11567   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11568   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11569
11570   @type lu: L{LogicalUnit}
11571   @param lu: Logical unit instance
11572   @type alloc_result: tuple/list
11573   @param alloc_result: Result from iallocator
11574   @type early_release: bool
11575   @param early_release: Whether to release locks early if possible
11576   @type use_nodes: bool
11577   @param use_nodes: Whether to display node names instead of groups
11578
11579   """
11580   (moved, failed, jobs) = alloc_result
11581
11582   if failed:
11583     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11584                                  for (name, reason) in failed)
11585     lu.LogWarning("Unable to evacuate instances %s", failreason)
11586     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11587
11588   if moved:
11589     lu.LogInfo("Instances to be moved: %s",
11590                utils.CommaJoin("%s (to %s)" %
11591                                (name, _NodeEvacDest(use_nodes, group, nodes))
11592                                for (name, group, nodes) in moved))
11593
11594   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11595               map(opcodes.OpCode.LoadOpCode, ops))
11596           for ops in jobs]
11597
11598
11599 class LUInstanceGrowDisk(LogicalUnit):
11600   """Grow a disk of an instance.
11601
11602   """
11603   HPATH = "disk-grow"
11604   HTYPE = constants.HTYPE_INSTANCE
11605   REQ_BGL = False
11606
11607   def ExpandNames(self):
11608     self._ExpandAndLockInstance()
11609     self.needed_locks[locking.LEVEL_NODE] = []
11610     self.needed_locks[locking.LEVEL_NODE_RES] = []
11611     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11612     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11613
11614   def DeclareLocks(self, level):
11615     if level == locking.LEVEL_NODE:
11616       self._LockInstancesNodes()
11617     elif level == locking.LEVEL_NODE_RES:
11618       # Copy node locks
11619       self.needed_locks[locking.LEVEL_NODE_RES] = \
11620         self.needed_locks[locking.LEVEL_NODE][:]
11621
11622   def BuildHooksEnv(self):
11623     """Build hooks env.
11624
11625     This runs on the master, the primary and all the secondaries.
11626
11627     """
11628     env = {
11629       "DISK": self.op.disk,
11630       "AMOUNT": self.op.amount,
11631       "ABSOLUTE": self.op.absolute,
11632       }
11633     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11634     return env
11635
11636   def BuildHooksNodes(self):
11637     """Build hooks nodes.
11638
11639     """
11640     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11641     return (nl, nl)
11642
11643   def CheckPrereq(self):
11644     """Check prerequisites.
11645
11646     This checks that the instance is in the cluster.
11647
11648     """
11649     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11650     assert instance is not None, \
11651       "Cannot retrieve locked instance %s" % self.op.instance_name
11652     nodenames = list(instance.all_nodes)
11653     for node in nodenames:
11654       _CheckNodeOnline(self, node)
11655
11656     self.instance = instance
11657
11658     if instance.disk_template not in constants.DTS_GROWABLE:
11659       raise errors.OpPrereqError("Instance's disk layout does not support"
11660                                  " growing", errors.ECODE_INVAL)
11661
11662     self.disk = instance.FindDisk(self.op.disk)
11663
11664     if self.op.absolute:
11665       self.target = self.op.amount
11666       self.delta = self.target - self.disk.size
11667       if self.delta < 0:
11668         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11669                                    "current disk size (%s)" %
11670                                    (utils.FormatUnit(self.target, "h"),
11671                                     utils.FormatUnit(self.disk.size, "h")),
11672                                    errors.ECODE_STATE)
11673     else:
11674       self.delta = self.op.amount
11675       self.target = self.disk.size + self.delta
11676       if self.delta < 0:
11677         raise errors.OpPrereqError("Requested increment (%s) is negative" %
11678                                    utils.FormatUnit(self.delta, "h"),
11679                                    errors.ECODE_INVAL)
11680
11681     if instance.disk_template not in (constants.DT_FILE,
11682                                       constants.DT_SHARED_FILE,
11683                                       constants.DT_RBD):
11684       # TODO: check the free disk space for file, when that feature will be
11685       # supported
11686       _CheckNodesFreeDiskPerVG(self, nodenames,
11687                                self.disk.ComputeGrowth(self.delta))
11688
11689   def Exec(self, feedback_fn):
11690     """Execute disk grow.
11691
11692     """
11693     instance = self.instance
11694     disk = self.disk
11695
11696     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11697     assert (self.owned_locks(locking.LEVEL_NODE) ==
11698             self.owned_locks(locking.LEVEL_NODE_RES))
11699
11700     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11701     if not disks_ok:
11702       raise errors.OpExecError("Cannot activate block device to grow")
11703
11704     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11705                 (self.op.disk, instance.name,
11706                  utils.FormatUnit(self.delta, "h"),
11707                  utils.FormatUnit(self.target, "h")))
11708
11709     # First run all grow ops in dry-run mode
11710     for node in instance.all_nodes:
11711       self.cfg.SetDiskID(disk, node)
11712       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11713                                            True)
11714       result.Raise("Grow request failed to node %s" % node)
11715
11716     # We know that (as far as we can test) operations across different
11717     # nodes will succeed, time to run it for real
11718     for node in instance.all_nodes:
11719       self.cfg.SetDiskID(disk, node)
11720       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11721                                            False)
11722       result.Raise("Grow request failed to node %s" % node)
11723
11724       # TODO: Rewrite code to work properly
11725       # DRBD goes into sync mode for a short amount of time after executing the
11726       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11727       # calling "resize" in sync mode fails. Sleeping for a short amount of
11728       # time is a work-around.
11729       time.sleep(5)
11730
11731     disk.RecordGrow(self.delta)
11732     self.cfg.Update(instance, feedback_fn)
11733
11734     # Changes have been recorded, release node lock
11735     _ReleaseLocks(self, locking.LEVEL_NODE)
11736
11737     # Downgrade lock while waiting for sync
11738     self.glm.downgrade(locking.LEVEL_INSTANCE)
11739
11740     if self.op.wait_for_sync:
11741       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11742       if disk_abort:
11743         self.proc.LogWarning("Disk sync-ing has not returned a good"
11744                              " status; please check the instance")
11745       if instance.admin_state != constants.ADMINST_UP:
11746         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11747     elif instance.admin_state != constants.ADMINST_UP:
11748       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11749                            " not supposed to be running because no wait for"
11750                            " sync mode was requested")
11751
11752     assert self.owned_locks(locking.LEVEL_NODE_RES)
11753     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11754
11755
11756 class LUInstanceQueryData(NoHooksLU):
11757   """Query runtime instance data.
11758
11759   """
11760   REQ_BGL = False
11761
11762   def ExpandNames(self):
11763     self.needed_locks = {}
11764
11765     # Use locking if requested or when non-static information is wanted
11766     if not (self.op.static or self.op.use_locking):
11767       self.LogWarning("Non-static data requested, locks need to be acquired")
11768       self.op.use_locking = True
11769
11770     if self.op.instances or not self.op.use_locking:
11771       # Expand instance names right here
11772       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11773     else:
11774       # Will use acquired locks
11775       self.wanted_names = None
11776
11777     if self.op.use_locking:
11778       self.share_locks = _ShareAll()
11779
11780       if self.wanted_names is None:
11781         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11782       else:
11783         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11784
11785       self.needed_locks[locking.LEVEL_NODEGROUP] = []
11786       self.needed_locks[locking.LEVEL_NODE] = []
11787       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11788
11789   def DeclareLocks(self, level):
11790     if self.op.use_locking:
11791       if level == locking.LEVEL_NODEGROUP:
11792         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11793
11794         # Lock all groups used by instances optimistically; this requires going
11795         # via the node before it's locked, requiring verification later on
11796         self.needed_locks[locking.LEVEL_NODEGROUP] = \
11797           frozenset(group_uuid
11798                     for instance_name in owned_instances
11799                     for group_uuid in
11800                       self.cfg.GetInstanceNodeGroups(instance_name))
11801
11802       elif level == locking.LEVEL_NODE:
11803         self._LockInstancesNodes()
11804
11805   def CheckPrereq(self):
11806     """Check prerequisites.
11807
11808     This only checks the optional instance list against the existing names.
11809
11810     """
11811     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11812     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11813     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11814
11815     if self.wanted_names is None:
11816       assert self.op.use_locking, "Locking was not used"
11817       self.wanted_names = owned_instances
11818
11819     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11820
11821     if self.op.use_locking:
11822       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11823                                 None)
11824     else:
11825       assert not (owned_instances or owned_groups or owned_nodes)
11826
11827     self.wanted_instances = instances.values()
11828
11829   def _ComputeBlockdevStatus(self, node, instance, dev):
11830     """Returns the status of a block device
11831
11832     """
11833     if self.op.static or not node:
11834       return None
11835
11836     self.cfg.SetDiskID(dev, node)
11837
11838     result = self.rpc.call_blockdev_find(node, dev)
11839     if result.offline:
11840       return None
11841
11842     result.Raise("Can't compute disk status for %s" % instance.name)
11843
11844     status = result.payload
11845     if status is None:
11846       return None
11847
11848     return (status.dev_path, status.major, status.minor,
11849             status.sync_percent, status.estimated_time,
11850             status.is_degraded, status.ldisk_status)
11851
11852   def _ComputeDiskStatus(self, instance, snode, dev):
11853     """Compute block device status.
11854
11855     """
11856     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11857
11858     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11859
11860   def _ComputeDiskStatusInner(self, instance, snode, dev):
11861     """Compute block device status.
11862
11863     @attention: The device has to be annotated already.
11864
11865     """
11866     if dev.dev_type in constants.LDS_DRBD:
11867       # we change the snode then (otherwise we use the one passed in)
11868       if dev.logical_id[0] == instance.primary_node:
11869         snode = dev.logical_id[1]
11870       else:
11871         snode = dev.logical_id[0]
11872
11873     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11874                                               instance, dev)
11875     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11876
11877     if dev.children:
11878       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11879                                         instance, snode),
11880                          dev.children)
11881     else:
11882       dev_children = []
11883
11884     return {
11885       "iv_name": dev.iv_name,
11886       "dev_type": dev.dev_type,
11887       "logical_id": dev.logical_id,
11888       "physical_id": dev.physical_id,
11889       "pstatus": dev_pstatus,
11890       "sstatus": dev_sstatus,
11891       "children": dev_children,
11892       "mode": dev.mode,
11893       "size": dev.size,
11894       }
11895
11896   def Exec(self, feedback_fn):
11897     """Gather and return data"""
11898     result = {}
11899
11900     cluster = self.cfg.GetClusterInfo()
11901
11902     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11903     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11904
11905     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11906                                                  for node in nodes.values()))
11907
11908     group2name_fn = lambda uuid: groups[uuid].name
11909
11910     for instance in self.wanted_instances:
11911       pnode = nodes[instance.primary_node]
11912
11913       if self.op.static or pnode.offline:
11914         remote_state = None
11915         if pnode.offline:
11916           self.LogWarning("Primary node %s is marked offline, returning static"
11917                           " information only for instance %s" %
11918                           (pnode.name, instance.name))
11919       else:
11920         remote_info = self.rpc.call_instance_info(instance.primary_node,
11921                                                   instance.name,
11922                                                   instance.hypervisor)
11923         remote_info.Raise("Error checking node %s" % instance.primary_node)
11924         remote_info = remote_info.payload
11925         if remote_info and "state" in remote_info:
11926           remote_state = "up"
11927         else:
11928           if instance.admin_state == constants.ADMINST_UP:
11929             remote_state = "down"
11930           else:
11931             remote_state = instance.admin_state
11932
11933       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11934                   instance.disks)
11935
11936       snodes_group_uuids = [nodes[snode_name].group
11937                             for snode_name in instance.secondary_nodes]
11938
11939       result[instance.name] = {
11940         "name": instance.name,
11941         "config_state": instance.admin_state,
11942         "run_state": remote_state,
11943         "pnode": instance.primary_node,
11944         "pnode_group_uuid": pnode.group,
11945         "pnode_group_name": group2name_fn(pnode.group),
11946         "snodes": instance.secondary_nodes,
11947         "snodes_group_uuids": snodes_group_uuids,
11948         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11949         "os": instance.os,
11950         # this happens to be the same format used for hooks
11951         "nics": _NICListToTuple(self, instance.nics),
11952         "disk_template": instance.disk_template,
11953         "disks": disks,
11954         "hypervisor": instance.hypervisor,
11955         "network_port": instance.network_port,
11956         "hv_instance": instance.hvparams,
11957         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11958         "be_instance": instance.beparams,
11959         "be_actual": cluster.FillBE(instance),
11960         "os_instance": instance.osparams,
11961         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11962         "serial_no": instance.serial_no,
11963         "mtime": instance.mtime,
11964         "ctime": instance.ctime,
11965         "uuid": instance.uuid,
11966         }
11967
11968     return result
11969
11970
11971 def PrepareContainerMods(mods, private_fn):
11972   """Prepares a list of container modifications by adding a private data field.
11973
11974   @type mods: list of tuples; (operation, index, parameters)
11975   @param mods: List of modifications
11976   @type private_fn: callable or None
11977   @param private_fn: Callable for constructing a private data field for a
11978     modification
11979   @rtype: list
11980
11981   """
11982   if private_fn is None:
11983     fn = lambda: None
11984   else:
11985     fn = private_fn
11986
11987   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11988
11989
11990 #: Type description for changes as returned by L{ApplyContainerMods}'s
11991 #: callbacks
11992 _TApplyContModsCbChanges = \
11993   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11994     ht.TNonEmptyString,
11995     ht.TAny,
11996     ])))
11997
11998
11999 def ApplyContainerMods(kind, container, chgdesc, mods,
12000                        create_fn, modify_fn, remove_fn):
12001   """Applies descriptions in C{mods} to C{container}.
12002
12003   @type kind: string
12004   @param kind: One-word item description
12005   @type container: list
12006   @param container: Container to modify
12007   @type chgdesc: None or list
12008   @param chgdesc: List of applied changes
12009   @type mods: list
12010   @param mods: Modifications as returned by L{PrepareContainerMods}
12011   @type create_fn: callable
12012   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12013     receives absolute item index, parameters and private data object as added
12014     by L{PrepareContainerMods}, returns tuple containing new item and changes
12015     as list
12016   @type modify_fn: callable
12017   @param modify_fn: Callback for modifying an existing item
12018     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12019     and private data object as added by L{PrepareContainerMods}, returns
12020     changes as list
12021   @type remove_fn: callable
12022   @param remove_fn: Callback on removing item; receives absolute item index,
12023     item and private data object as added by L{PrepareContainerMods}
12024
12025   """
12026   for (op, idx, params, private) in mods:
12027     if idx == -1:
12028       # Append
12029       absidx = len(container) - 1
12030     elif idx < 0:
12031       raise IndexError("Not accepting negative indices other than -1")
12032     elif idx > len(container):
12033       raise IndexError("Got %s index %s, but there are only %s" %
12034                        (kind, idx, len(container)))
12035     else:
12036       absidx = idx
12037
12038     changes = None
12039
12040     if op == constants.DDM_ADD:
12041       # Calculate where item will be added
12042       if idx == -1:
12043         addidx = len(container)
12044       else:
12045         addidx = idx
12046
12047       if create_fn is None:
12048         item = params
12049       else:
12050         (item, changes) = create_fn(addidx, params, private)
12051
12052       if idx == -1:
12053         container.append(item)
12054       else:
12055         assert idx >= 0
12056         assert idx <= len(container)
12057         # list.insert does so before the specified index
12058         container.insert(idx, item)
12059     else:
12060       # Retrieve existing item
12061       try:
12062         item = container[absidx]
12063       except IndexError:
12064         raise IndexError("Invalid %s index %s" % (kind, idx))
12065
12066       if op == constants.DDM_REMOVE:
12067         assert not params
12068
12069         if remove_fn is not None:
12070           remove_fn(absidx, item, private)
12071
12072         #TODO: include a hotplugged msg in changes
12073         changes = [("%s/%s" % (kind, absidx), "remove")]
12074
12075         assert container[absidx] == item
12076         del container[absidx]
12077       elif op == constants.DDM_MODIFY:
12078         if modify_fn is not None:
12079           #TODO: include a hotplugged msg in changes
12080           changes = modify_fn(absidx, item, params, private)
12081
12082       else:
12083         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12084
12085     assert _TApplyContModsCbChanges(changes)
12086
12087     if not (chgdesc is None or changes is None):
12088       chgdesc.extend(changes)
12089
12090
12091 def _UpdateIvNames(base_index, disks):
12092   """Updates the C{iv_name} attribute of disks.
12093
12094   @type disks: list of L{objects.Disk}
12095
12096   """
12097   for (idx, disk) in enumerate(disks):
12098     disk.iv_name = "disk/%s" % (base_index + idx, )
12099
12100
12101 class _InstNicModPrivate:
12102   """Data structure for network interface modifications.
12103
12104   Used by L{LUInstanceSetParams}.
12105
12106   """
12107   def __init__(self):
12108     self.params = None
12109     self.filled = None
12110
12111
12112 class LUInstanceSetParams(LogicalUnit):
12113   """Modifies an instances's parameters.
12114
12115   """
12116   HPATH = "instance-modify"
12117   HTYPE = constants.HTYPE_INSTANCE
12118   REQ_BGL = False
12119
12120   @staticmethod
12121   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12122     assert ht.TList(mods)
12123     assert not mods or len(mods[0]) in (2, 3)
12124
12125     if mods and len(mods[0]) == 2:
12126       result = []
12127
12128       addremove = 0
12129       for op, params in mods:
12130         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12131           result.append((op, -1, params))
12132           addremove += 1
12133
12134           if addremove > 1:
12135             raise errors.OpPrereqError("Only one %s add or remove operation is"
12136                                        " supported at a time" % kind,
12137                                        errors.ECODE_INVAL)
12138         else:
12139           result.append((constants.DDM_MODIFY, op, params))
12140
12141       assert verify_fn(result)
12142     else:
12143       result = mods
12144
12145     return result
12146
12147   @staticmethod
12148   def _CheckMods(kind, mods, key_types, item_fn):
12149     """Ensures requested disk/NIC modifications are valid.
12150
12151     """
12152     for (op, _, params) in mods:
12153       assert ht.TDict(params)
12154
12155       utils.ForceDictType(params, key_types)
12156
12157       if op == constants.DDM_REMOVE:
12158         if params:
12159           raise errors.OpPrereqError("No settings should be passed when"
12160                                      " removing a %s" % kind,
12161                                      errors.ECODE_INVAL)
12162       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12163         item_fn(op, params)
12164       else:
12165         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12166
12167   @staticmethod
12168   def _VerifyDiskModification(op, params):
12169     """Verifies a disk modification.
12170
12171     """
12172     if op == constants.DDM_ADD:
12173       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12174       if mode not in constants.DISK_ACCESS_SET:
12175         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12176                                    errors.ECODE_INVAL)
12177
12178       size = params.get(constants.IDISK_SIZE, None)
12179       if size is None:
12180         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12181                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12182
12183       try:
12184         size = int(size)
12185       except (TypeError, ValueError), err:
12186         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12187                                    errors.ECODE_INVAL)
12188
12189       params[constants.IDISK_SIZE] = size
12190
12191     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12192       raise errors.OpPrereqError("Disk size change not possible, use"
12193                                  " grow-disk", errors.ECODE_INVAL)
12194
12195   @staticmethod
12196   def _VerifyNicModification(op, params):
12197     """Verifies a network interface modification.
12198
12199     """
12200     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12201       ip = params.get(constants.INIC_IP, None)
12202       if ip is None:
12203         pass
12204       elif ip.lower() == constants.VALUE_NONE:
12205         params[constants.INIC_IP] = None
12206       elif not netutils.IPAddress.IsValid(ip):
12207         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12208                                    errors.ECODE_INVAL)
12209
12210       bridge = params.get("bridge", None)
12211       link = params.get(constants.INIC_LINK, None)
12212       if bridge and link:
12213         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12214                                    " at the same time", errors.ECODE_INVAL)
12215       elif bridge and bridge.lower() == constants.VALUE_NONE:
12216         params["bridge"] = None
12217       elif link and link.lower() == constants.VALUE_NONE:
12218         params[constants.INIC_LINK] = None
12219
12220       if op == constants.DDM_ADD:
12221         macaddr = params.get(constants.INIC_MAC, None)
12222         if macaddr is None:
12223           params[constants.INIC_MAC] = constants.VALUE_AUTO
12224
12225       if constants.INIC_MAC in params:
12226         macaddr = params[constants.INIC_MAC]
12227         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12228           macaddr = utils.NormalizeAndValidateMac(macaddr)
12229
12230         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12231           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12232                                      " modifying an existing NIC",
12233                                      errors.ECODE_INVAL)
12234
12235   def CheckArguments(self):
12236     if not (self.op.nics or self.op.disks or self.op.disk_template or
12237             self.op.hvparams or self.op.beparams or self.op.os_name or
12238             self.op.offline is not None or self.op.runtime_mem):
12239       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12240
12241     if self.op.hvparams:
12242       _CheckGlobalHvParams(self.op.hvparams)
12243
12244     self.op.disks = \
12245       self._UpgradeDiskNicMods("disk", self.op.disks,
12246         opcodes.OpInstanceSetParams.TestDiskModifications)
12247     self.op.nics = \
12248       self._UpgradeDiskNicMods("NIC", self.op.nics,
12249         opcodes.OpInstanceSetParams.TestNicModifications)
12250
12251     # Check disk modifications
12252     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12253                     self._VerifyDiskModification)
12254
12255     if self.op.disks and self.op.disk_template is not None:
12256       raise errors.OpPrereqError("Disk template conversion and other disk"
12257                                  " changes not supported at the same time",
12258                                  errors.ECODE_INVAL)
12259
12260     if (self.op.disk_template and
12261         self.op.disk_template in constants.DTS_INT_MIRROR and
12262         self.op.remote_node is None):
12263       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12264                                  " one requires specifying a secondary node",
12265                                  errors.ECODE_INVAL)
12266
12267     # Check NIC modifications
12268     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12269                     self._VerifyNicModification)
12270
12271   def ExpandNames(self):
12272     self._ExpandAndLockInstance()
12273     # Can't even acquire node locks in shared mode as upcoming changes in
12274     # Ganeti 2.6 will start to modify the node object on disk conversion
12275     self.needed_locks[locking.LEVEL_NODE] = []
12276     self.needed_locks[locking.LEVEL_NODE_RES] = []
12277     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12278
12279   def DeclareLocks(self, level):
12280     # TODO: Acquire group lock in shared mode (disk parameters)
12281     if level == locking.LEVEL_NODE:
12282       self._LockInstancesNodes()
12283       if self.op.disk_template and self.op.remote_node:
12284         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12285         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12286     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12287       # Copy node locks
12288       self.needed_locks[locking.LEVEL_NODE_RES] = \
12289         self.needed_locks[locking.LEVEL_NODE][:]
12290
12291   def BuildHooksEnv(self):
12292     """Build hooks env.
12293
12294     This runs on the master, primary and secondaries.
12295
12296     """
12297     args = dict()
12298     if constants.BE_MINMEM in self.be_new:
12299       args["minmem"] = self.be_new[constants.BE_MINMEM]
12300     if constants.BE_MAXMEM in self.be_new:
12301       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12302     if constants.BE_VCPUS in self.be_new:
12303       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12304     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12305     # information at all.
12306
12307     if self._new_nics is not None:
12308       nics = []
12309
12310       for nic in self._new_nics:
12311         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12312         mode = nicparams[constants.NIC_MODE]
12313         link = nicparams[constants.NIC_LINK]
12314         nics.append((nic.ip, nic.mac, mode, link))
12315
12316       args["nics"] = nics
12317
12318     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12319     if self.op.disk_template:
12320       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12321     if self.op.runtime_mem:
12322       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12323
12324     return env
12325
12326   def BuildHooksNodes(self):
12327     """Build hooks nodes.
12328
12329     """
12330     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12331     return (nl, nl)
12332
12333   def _PrepareNicModification(self, params, private, old_ip, old_params,
12334                               cluster, pnode):
12335     update_params_dict = dict([(key, params[key])
12336                                for key in constants.NICS_PARAMETERS
12337                                if key in params])
12338
12339     if "bridge" in params:
12340       update_params_dict[constants.NIC_LINK] = params["bridge"]
12341
12342     new_params = _GetUpdatedParams(old_params, update_params_dict)
12343     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12344
12345     new_filled_params = cluster.SimpleFillNIC(new_params)
12346     objects.NIC.CheckParameterSyntax(new_filled_params)
12347
12348     new_mode = new_filled_params[constants.NIC_MODE]
12349     if new_mode == constants.NIC_MODE_BRIDGED:
12350       bridge = new_filled_params[constants.NIC_LINK]
12351       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12352       if msg:
12353         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12354         if self.op.force:
12355           self.warn.append(msg)
12356         else:
12357           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12358
12359     elif new_mode == constants.NIC_MODE_ROUTED:
12360       ip = params.get(constants.INIC_IP, old_ip)
12361       if ip is None:
12362         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12363                                    " on a routed NIC", errors.ECODE_INVAL)
12364
12365     if constants.INIC_MAC in params:
12366       mac = params[constants.INIC_MAC]
12367       if mac is None:
12368         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12369                                    errors.ECODE_INVAL)
12370       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12371         # otherwise generate the MAC address
12372         params[constants.INIC_MAC] = \
12373           self.cfg.GenerateMAC(self.proc.GetECId())
12374       else:
12375         # or validate/reserve the current one
12376         try:
12377           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12378         except errors.ReservationError:
12379           raise errors.OpPrereqError("MAC address '%s' already in use"
12380                                      " in cluster" % mac,
12381                                      errors.ECODE_NOTUNIQUE)
12382
12383     logging.info("new_params %s", new_params)
12384     logging.info("new_filled_params %s", new_filled_params)
12385     private.params = new_params
12386     private.filled = new_filled_params
12387
12388   def CheckPrereq(self):
12389     """Check prerequisites.
12390
12391     This only checks the instance list against the existing names.
12392
12393     """
12394     # checking the new params on the primary/secondary nodes
12395
12396     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12397     cluster = self.cluster = self.cfg.GetClusterInfo()
12398     assert self.instance is not None, \
12399       "Cannot retrieve locked instance %s" % self.op.instance_name
12400     pnode = instance.primary_node
12401     nodelist = list(instance.all_nodes)
12402     pnode_info = self.cfg.GetNodeInfo(pnode)
12403     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12404
12405     # Prepare disk/NIC modifications
12406     self.diskmod = PrepareContainerMods(self.op.disks, None)
12407     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12408     logging.info("nicmod %s", self.nicmod)
12409
12410     # OS change
12411     if self.op.os_name and not self.op.force:
12412       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12413                       self.op.force_variant)
12414       instance_os = self.op.os_name
12415     else:
12416       instance_os = instance.os
12417
12418     assert not (self.op.disk_template and self.op.disks), \
12419       "Can't modify disk template and apply disk changes at the same time"
12420
12421     if self.op.disk_template:
12422       if instance.disk_template == self.op.disk_template:
12423         raise errors.OpPrereqError("Instance already has disk template %s" %
12424                                    instance.disk_template, errors.ECODE_INVAL)
12425
12426       if (instance.disk_template,
12427           self.op.disk_template) not in self._DISK_CONVERSIONS:
12428         raise errors.OpPrereqError("Unsupported disk template conversion from"
12429                                    " %s to %s" % (instance.disk_template,
12430                                                   self.op.disk_template),
12431                                    errors.ECODE_INVAL)
12432       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12433                           msg="cannot change disk template")
12434       if self.op.disk_template in constants.DTS_INT_MIRROR:
12435         if self.op.remote_node == pnode:
12436           raise errors.OpPrereqError("Given new secondary node %s is the same"
12437                                      " as the primary node of the instance" %
12438                                      self.op.remote_node, errors.ECODE_STATE)
12439         _CheckNodeOnline(self, self.op.remote_node)
12440         _CheckNodeNotDrained(self, self.op.remote_node)
12441         # FIXME: here we assume that the old instance type is DT_PLAIN
12442         assert instance.disk_template == constants.DT_PLAIN
12443         disks = [{constants.IDISK_SIZE: d.size,
12444                   constants.IDISK_VG: d.logical_id[0]}
12445                  for d in instance.disks]
12446         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12447         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12448
12449         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12450         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12451         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12452         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12453                                 ignore=self.op.ignore_ipolicy)
12454         if pnode_info.group != snode_info.group:
12455           self.LogWarning("The primary and secondary nodes are in two"
12456                           " different node groups; the disk parameters"
12457                           " from the first disk's node group will be"
12458                           " used")
12459
12460     # hvparams processing
12461     if self.op.hvparams:
12462       hv_type = instance.hypervisor
12463       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12464       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12465       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12466
12467       # local check
12468       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12469       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12470       self.hv_proposed = self.hv_new = hv_new # the new actual values
12471       self.hv_inst = i_hvdict # the new dict (without defaults)
12472     else:
12473       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12474                                               instance.hvparams)
12475       self.hv_new = self.hv_inst = {}
12476
12477     # beparams processing
12478     if self.op.beparams:
12479       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12480                                    use_none=True)
12481       objects.UpgradeBeParams(i_bedict)
12482       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12483       be_new = cluster.SimpleFillBE(i_bedict)
12484       self.be_proposed = self.be_new = be_new # the new actual values
12485       self.be_inst = i_bedict # the new dict (without defaults)
12486     else:
12487       self.be_new = self.be_inst = {}
12488       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12489     be_old = cluster.FillBE(instance)
12490
12491     # CPU param validation -- checking every time a parameter is
12492     # changed to cover all cases where either CPU mask or vcpus have
12493     # changed
12494     if (constants.BE_VCPUS in self.be_proposed and
12495         constants.HV_CPU_MASK in self.hv_proposed):
12496       cpu_list = \
12497         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12498       # Verify mask is consistent with number of vCPUs. Can skip this
12499       # test if only 1 entry in the CPU mask, which means same mask
12500       # is applied to all vCPUs.
12501       if (len(cpu_list) > 1 and
12502           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12503         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12504                                    " CPU mask [%s]" %
12505                                    (self.be_proposed[constants.BE_VCPUS],
12506                                     self.hv_proposed[constants.HV_CPU_MASK]),
12507                                    errors.ECODE_INVAL)
12508
12509       # Only perform this test if a new CPU mask is given
12510       if constants.HV_CPU_MASK in self.hv_new:
12511         # Calculate the largest CPU number requested
12512         max_requested_cpu = max(map(max, cpu_list))
12513         # Check that all of the instance's nodes have enough physical CPUs to
12514         # satisfy the requested CPU mask
12515         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12516                                 max_requested_cpu + 1, instance.hypervisor)
12517
12518     # osparams processing
12519     if self.op.osparams:
12520       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12521       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12522       self.os_inst = i_osdict # the new dict (without defaults)
12523     else:
12524       self.os_inst = {}
12525
12526     self.warn = []
12527
12528     #TODO(dynmem): do the appropriate check involving MINMEM
12529     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12530         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12531       mem_check_list = [pnode]
12532       if be_new[constants.BE_AUTO_BALANCE]:
12533         # either we changed auto_balance to yes or it was from before
12534         mem_check_list.extend(instance.secondary_nodes)
12535       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12536                                                   instance.hypervisor)
12537       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12538                                          [instance.hypervisor])
12539       pninfo = nodeinfo[pnode]
12540       msg = pninfo.fail_msg
12541       if msg:
12542         # Assume the primary node is unreachable and go ahead
12543         self.warn.append("Can't get info from primary node %s: %s" %
12544                          (pnode, msg))
12545       else:
12546         (_, _, (pnhvinfo, )) = pninfo.payload
12547         if not isinstance(pnhvinfo.get("memory_free", None), int):
12548           self.warn.append("Node data from primary node %s doesn't contain"
12549                            " free memory information" % pnode)
12550         elif instance_info.fail_msg:
12551           self.warn.append("Can't get instance runtime information: %s" %
12552                           instance_info.fail_msg)
12553         else:
12554           if instance_info.payload:
12555             current_mem = int(instance_info.payload["memory"])
12556           else:
12557             # Assume instance not running
12558             # (there is a slight race condition here, but it's not very
12559             # probable, and we have no other way to check)
12560             # TODO: Describe race condition
12561             current_mem = 0
12562           #TODO(dynmem): do the appropriate check involving MINMEM
12563           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12564                       pnhvinfo["memory_free"])
12565           if miss_mem > 0:
12566             raise errors.OpPrereqError("This change will prevent the instance"
12567                                        " from starting, due to %d MB of memory"
12568                                        " missing on its primary node" %
12569                                        miss_mem,
12570                                        errors.ECODE_NORES)
12571
12572       if be_new[constants.BE_AUTO_BALANCE]:
12573         for node, nres in nodeinfo.items():
12574           if node not in instance.secondary_nodes:
12575             continue
12576           nres.Raise("Can't get info from secondary node %s" % node,
12577                      prereq=True, ecode=errors.ECODE_STATE)
12578           (_, _, (nhvinfo, )) = nres.payload
12579           if not isinstance(nhvinfo.get("memory_free", None), int):
12580             raise errors.OpPrereqError("Secondary node %s didn't return free"
12581                                        " memory information" % node,
12582                                        errors.ECODE_STATE)
12583           #TODO(dynmem): do the appropriate check involving MINMEM
12584           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12585             raise errors.OpPrereqError("This change will prevent the instance"
12586                                        " from failover to its secondary node"
12587                                        " %s, due to not enough memory" % node,
12588                                        errors.ECODE_STATE)
12589
12590     if self.op.runtime_mem:
12591       remote_info = self.rpc.call_instance_info(instance.primary_node,
12592                                                 instance.name,
12593                                                 instance.hypervisor)
12594       remote_info.Raise("Error checking node %s" % instance.primary_node)
12595       if not remote_info.payload: # not running already
12596         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12597                                    errors.ECODE_STATE)
12598
12599       current_memory = remote_info.payload["memory"]
12600       if (not self.op.force and
12601            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12602             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12603         raise errors.OpPrereqError("Instance %s must have memory between %d"
12604                                    " and %d MB of memory unless --force is"
12605                                    " given" % (instance.name,
12606                                     self.be_proposed[constants.BE_MINMEM],
12607                                     self.be_proposed[constants.BE_MAXMEM]),
12608                                    errors.ECODE_INVAL)
12609
12610       if self.op.runtime_mem > current_memory:
12611         _CheckNodeFreeMemory(self, instance.primary_node,
12612                              "ballooning memory for instance %s" %
12613                              instance.name,
12614                              self.op.memory - current_memory,
12615                              instance.hypervisor)
12616
12617     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12618       raise errors.OpPrereqError("Disk operations not supported for"
12619                                  " diskless instances",
12620                                  errors.ECODE_INVAL)
12621
12622     def _PrepareNicCreate(_, params, private):
12623       self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12624       return (None, None)
12625
12626     def _PrepareNicMod(_, nic, params, private):
12627       self._PrepareNicModification(params, private, nic.ip,
12628                                    nic.nicparams, cluster, pnode)
12629       return None
12630
12631     # Verify NIC changes (operating on copy)
12632     nics = instance.nics[:]
12633     ApplyContainerMods("NIC", nics, None, self.nicmod,
12634                        _PrepareNicCreate, _PrepareNicMod, None)
12635     if len(nics) > constants.MAX_NICS:
12636       raise errors.OpPrereqError("Instance has too many network interfaces"
12637                                  " (%d), cannot add more" % constants.MAX_NICS,
12638                                  errors.ECODE_STATE)
12639
12640
12641     # Verify disk changes (operating on a copy)
12642     disks = instance.disks[:]
12643     ApplyContainerMods("disk", disks, None, self.diskmod,
12644                        None, None, None)
12645     if len(disks) > constants.MAX_DISKS:
12646       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12647                                  " more" % constants.MAX_DISKS,
12648                                  errors.ECODE_STATE)
12649
12650     if self.op.offline is not None:
12651       if self.op.offline:
12652         msg = "can't change to offline"
12653       else:
12654         msg = "can't change to online"
12655       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12656
12657     # Pre-compute NIC changes (necessary to use result in hooks)
12658     self._nic_chgdesc = []
12659     if self.nicmod:
12660       # Operate on copies as this is still in prereq
12661       nics = [nic.Copy() for nic in instance.nics]
12662       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12663                          self._CreateNewNic, self._ApplyNicMods,
12664                          self._RemoveNic)
12665       self._new_nics = nics
12666     else:
12667       self._new_nics = None
12668
12669
12670   def _ConvertPlainToDrbd(self, feedback_fn):
12671     """Converts an instance from plain to drbd.
12672
12673     """
12674     feedback_fn("Converting template to drbd")
12675     instance = self.instance
12676     pnode = instance.primary_node
12677     snode = self.op.remote_node
12678
12679     assert instance.disk_template == constants.DT_PLAIN
12680
12681     # create a fake disk info for _GenerateDiskTemplate
12682     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12683                   constants.IDISK_VG: d.logical_id[0]}
12684                  for d in instance.disks]
12685     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12686                                       instance.name, pnode, [snode],
12687                                       disk_info, None, None, 0, feedback_fn,
12688                                       self.diskparams)
12689     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12690                                         self.diskparams)
12691     info = _GetInstanceInfoText(instance)
12692     feedback_fn("Creating additional volumes...")
12693     # first, create the missing data and meta devices
12694     for disk in anno_disks:
12695       # unfortunately this is... not too nice
12696       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12697                             info, True)
12698       for child in disk.children:
12699         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12700     # at this stage, all new LVs have been created, we can rename the
12701     # old ones
12702     feedback_fn("Renaming original volumes...")
12703     rename_list = [(o, n.children[0].logical_id)
12704                    for (o, n) in zip(instance.disks, new_disks)]
12705     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12706     result.Raise("Failed to rename original LVs")
12707
12708     feedback_fn("Initializing DRBD devices...")
12709     # all child devices are in place, we can now create the DRBD devices
12710     for disk in anno_disks:
12711       for node in [pnode, snode]:
12712         f_create = node == pnode
12713         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12714
12715     # at this point, the instance has been modified
12716     instance.disk_template = constants.DT_DRBD8
12717     instance.disks = new_disks
12718     self.cfg.Update(instance, feedback_fn)
12719
12720     # Release node locks while waiting for sync
12721     _ReleaseLocks(self, locking.LEVEL_NODE)
12722
12723     # disks are created, waiting for sync
12724     disk_abort = not _WaitForSync(self, instance,
12725                                   oneshot=not self.op.wait_for_sync)
12726     if disk_abort:
12727       raise errors.OpExecError("There are some degraded disks for"
12728                                " this instance, please cleanup manually")
12729
12730     # Node resource locks will be released by caller
12731
12732   def _ConvertDrbdToPlain(self, feedback_fn):
12733     """Converts an instance from drbd to plain.
12734
12735     """
12736     instance = self.instance
12737
12738     assert len(instance.secondary_nodes) == 1
12739     assert instance.disk_template == constants.DT_DRBD8
12740
12741     pnode = instance.primary_node
12742     snode = instance.secondary_nodes[0]
12743     feedback_fn("Converting template to plain")
12744
12745     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12746     new_disks = [d.children[0] for d in instance.disks]
12747
12748     # copy over size and mode
12749     for parent, child in zip(old_disks, new_disks):
12750       child.size = parent.size
12751       child.mode = parent.mode
12752
12753     # this is a DRBD disk, return its port to the pool
12754     # NOTE: this must be done right before the call to cfg.Update!
12755     for disk in old_disks:
12756       tcp_port = disk.logical_id[2]
12757       self.cfg.AddTcpUdpPort(tcp_port)
12758
12759     # update instance structure
12760     instance.disks = new_disks
12761     instance.disk_template = constants.DT_PLAIN
12762     self.cfg.Update(instance, feedback_fn)
12763
12764     # Release locks in case removing disks takes a while
12765     _ReleaseLocks(self, locking.LEVEL_NODE)
12766
12767     feedback_fn("Removing volumes on the secondary node...")
12768     for disk in old_disks:
12769       self.cfg.SetDiskID(disk, snode)
12770       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12771       if msg:
12772         self.LogWarning("Could not remove block device %s on node %s,"
12773                         " continuing anyway: %s", disk.iv_name, snode, msg)
12774
12775     feedback_fn("Removing unneeded volumes on the primary node...")
12776     for idx, disk in enumerate(old_disks):
12777       meta = disk.children[1]
12778       self.cfg.SetDiskID(meta, pnode)
12779       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12780       if msg:
12781         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12782                         " continuing anyway: %s", idx, pnode, msg)
12783
12784   def _CreateNewDisk(self, idx, params, _):
12785     """Creates a new disk.
12786
12787     """
12788     instance = self.instance
12789
12790     # add a new disk
12791     if instance.disk_template in constants.DTS_FILEBASED:
12792       (file_driver, file_path) = instance.disks[0].logical_id
12793       file_path = os.path.dirname(file_path)
12794     else:
12795       file_driver = file_path = None
12796
12797     disk = \
12798       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12799                             instance.primary_node, instance.secondary_nodes,
12800                             [params], file_path, file_driver, idx,
12801                             self.Log, self.diskparams)[0]
12802
12803     info = _GetInstanceInfoText(instance)
12804
12805     logging.info("Creating volume %s for instance %s",
12806                  disk.iv_name, instance.name)
12807     # Note: this needs to be kept in sync with _CreateDisks
12808     #HARDCODE
12809     for node in instance.all_nodes:
12810       f_create = (node == instance.primary_node)
12811       try:
12812         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12813       except errors.OpExecError, err:
12814         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12815                         disk.iv_name, disk, node, err)
12816
12817     if self.op.hotplug and disk.pci:
12818       disk_ok, device_info = _AssembleInstanceDisks(self, self.instance,
12819                                                     [disk], check=False)
12820       _, _, dev_path = device_info[0]
12821       result = self.rpc.call_hot_add_disk(self.instance.primary_node,
12822                                           self.instance, disk, dev_path, idx)
12823     return (disk, [
12824       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12825       ])
12826
12827   @staticmethod
12828   def _ModifyDisk(idx, disk, params, _):
12829     """Modifies a disk.
12830
12831     """
12832     disk.mode = params[constants.IDISK_MODE]
12833
12834     return [
12835       ("disk.mode/%d" % idx, disk.mode),
12836       ]
12837
12838   def _RemoveDisk(self, idx, root, _):
12839     """Removes a disk.
12840
12841     """
12842     #TODO: log warning in case hotplug is not possible
12843     #      handle errors
12844     if root.pci and not self.op.hotplug:
12845       raise errors.OpPrereqError("Cannot remove a disk that has"
12846                                  " been hotplugged"
12847                                  " without removing it with hotplug",
12848                                  errors.ECODE_INVAL)
12849     if self.op.hotplug and root.pci:
12850       self.rpc.call_hot_del_disk(self.instance.primary_node,
12851                                  self.instance, root, idx)
12852       _ShutdownInstanceDisks(self, self.instance, [root])
12853       self.cfg.UpdatePCIInfo(self.instance.name, root.pci)
12854
12855     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12856     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12857       self.cfg.SetDiskID(disk, node)
12858       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12859       if msg:
12860         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12861                         " continuing anyway", idx, node, msg)
12862
12863     # if this is a DRBD disk, return its port to the pool
12864     if root.dev_type in constants.LDS_DRBD:
12865       self.cfg.AddTcpUdpPort(root.logical_id[2])
12866
12867   def _CreateNewNic(self, idx, params, private):
12868     """Creates data structure for a new network interface.
12869
12870     """
12871     mac = params[constants.INIC_MAC]
12872     ip = params.get(constants.INIC_IP, None)
12873     #TODO: not private.filled?? can a nic be saved without nicparams??
12874     nicparams = private.filled
12875
12876     nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12877
12878     #TODO: log warning in case hotplug is not possible
12879     #      handle errors
12880     #      return changes
12881     if self.op.hotplug:
12882       nic_idx, pci = _GetPCIInfo(self, 'nics')
12883       nic.idx = nic_idx
12884       nic.pci = pci
12885       result = self.rpc.call_hot_add_nic(self.instance.primary_node,
12886                                          self.instance, nic, idx)
12887     desc =  [
12888       ("nic.%d" % idx,
12889        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12890        (mac, ip, private.filled[constants.NIC_MODE],
12891        private.filled[constants.NIC_LINK])),
12892       ]
12893     return (nic, desc)
12894
12895   def _ApplyNicMods(self, idx, nic, params, private):
12896     """Modifies a network interface.
12897
12898     """
12899     changes = []
12900
12901     for key in [constants.INIC_MAC, constants.INIC_IP]:
12902       if key in params:
12903         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12904         setattr(nic, key, params[key])
12905
12906     if private.params:
12907       nic.nicparams = private.params
12908
12909       for (key, val) in params.items():
12910         changes.append(("nic.%s/%d" % (key, idx), val))
12911
12912     #TODO: log warning in case hotplug is not possible
12913     #      handle errors
12914     if self.op.hotplug and nic.pci:
12915       self.rpc.call_hot_del_nic(self.instance.primary_node,
12916                                 self.instance, nic, idx)
12917       result = self.rpc.call_hot_add_nic(self.instance.primary_node,
12918                                          self.instance, nic, idx)
12919     return changes
12920
12921   def _RemoveNic(self, idx, nic, private):
12922     if nic.pci and not self.op.hotplug:
12923       raise errors.OpPrereqError("Cannot remove a nic that has been hotplugged"
12924                                  " without removing it with hotplug",
12925                                  errors.ECODE_INVAL)
12926     #TODO: log warning in case hotplug is not possible
12927     #      handle errors
12928     if self.op.hotplug and nic.pci:
12929       self.rpc.call_hot_del_nic(self.instance.primary_node,
12930                                 self.instance, nic, idx)
12931       self.cfg.UpdatePCIInfo(self.instance.name, nic.pci)
12932
12933
12934   def Exec(self, feedback_fn):
12935     """Modifies an instance.
12936
12937     All parameters take effect only at the next restart of the instance.
12938
12939     """
12940     # Process here the warnings from CheckPrereq, as we don't have a
12941     # feedback_fn there.
12942     # TODO: Replace with self.LogWarning
12943     for warn in self.warn:
12944       feedback_fn("WARNING: %s" % warn)
12945
12946     assert ((self.op.disk_template is None) ^
12947             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12948       "Not owning any node resource locks"
12949
12950     result = []
12951     instance = self.instance
12952
12953     # runtime memory
12954     if self.op.runtime_mem:
12955       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12956                                                      instance,
12957                                                      self.op.runtime_mem)
12958       rpcres.Raise("Cannot modify instance runtime memory")
12959       result.append(("runtime_memory", self.op.runtime_mem))
12960
12961     # Apply disk changes
12962     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12963                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12964     _UpdateIvNames(0, instance.disks)
12965
12966     if self.op.disk_template:
12967       if __debug__:
12968         check_nodes = set(instance.all_nodes)
12969         if self.op.remote_node:
12970           check_nodes.add(self.op.remote_node)
12971         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12972           owned = self.owned_locks(level)
12973           assert not (check_nodes - owned), \
12974             ("Not owning the correct locks, owning %r, expected at least %r" %
12975              (owned, check_nodes))
12976
12977       r_shut = _ShutdownInstanceDisks(self, instance)
12978       if not r_shut:
12979         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12980                                  " proceed with disk template conversion")
12981       mode = (instance.disk_template, self.op.disk_template)
12982       try:
12983         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12984       except:
12985         self.cfg.ReleaseDRBDMinors(instance.name)
12986         raise
12987       result.append(("disk_template", self.op.disk_template))
12988
12989       assert instance.disk_template == self.op.disk_template, \
12990         ("Expected disk template '%s', found '%s'" %
12991          (self.op.disk_template, instance.disk_template))
12992
12993     # Release node and resource locks if there are any (they might already have
12994     # been released during disk conversion)
12995     _ReleaseLocks(self, locking.LEVEL_NODE)
12996     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12997
12998     # Apply NIC changes
12999     if self._new_nics is not None:
13000       instance.nics = self._new_nics
13001       result.extend(self._nic_chgdesc)
13002
13003     # hvparams changes
13004     if self.op.hvparams:
13005       instance.hvparams = self.hv_inst
13006       for key, val in self.op.hvparams.iteritems():
13007         result.append(("hv/%s" % key, val))
13008
13009     # beparams changes
13010     if self.op.beparams:
13011       instance.beparams = self.be_inst
13012       for key, val in self.op.beparams.iteritems():
13013         result.append(("be/%s" % key, val))
13014
13015     # OS change
13016     if self.op.os_name:
13017       instance.os = self.op.os_name
13018
13019     # osparams changes
13020     if self.op.osparams:
13021       instance.osparams = self.os_inst
13022       for key, val in self.op.osparams.iteritems():
13023         result.append(("os/%s" % key, val))
13024
13025     if self.op.offline is None:
13026       # Ignore
13027       pass
13028     elif self.op.offline:
13029       # Mark instance as offline
13030       self.cfg.MarkInstanceOffline(instance.name)
13031       result.append(("admin_state", constants.ADMINST_OFFLINE))
13032     else:
13033       # Mark instance as online, but stopped
13034       self.cfg.MarkInstanceDown(instance.name)
13035       result.append(("admin_state", constants.ADMINST_DOWN))
13036
13037     self.cfg.Update(instance, feedback_fn)
13038
13039     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13040                 self.owned_locks(locking.LEVEL_NODE)), \
13041       "All node locks should have been released by now"
13042
13043     return result
13044
13045   _DISK_CONVERSIONS = {
13046     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13047     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13048     }
13049
13050
13051 class LUInstanceChangeGroup(LogicalUnit):
13052   HPATH = "instance-change-group"
13053   HTYPE = constants.HTYPE_INSTANCE
13054   REQ_BGL = False
13055
13056   def ExpandNames(self):
13057     self.share_locks = _ShareAll()
13058     self.needed_locks = {
13059       locking.LEVEL_NODEGROUP: [],
13060       locking.LEVEL_NODE: [],
13061       }
13062
13063     self._ExpandAndLockInstance()
13064
13065     if self.op.target_groups:
13066       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13067                                   self.op.target_groups)
13068     else:
13069       self.req_target_uuids = None
13070
13071     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13072
13073   def DeclareLocks(self, level):
13074     if level == locking.LEVEL_NODEGROUP:
13075       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13076
13077       if self.req_target_uuids:
13078         lock_groups = set(self.req_target_uuids)
13079
13080         # Lock all groups used by instance optimistically; this requires going
13081         # via the node before it's locked, requiring verification later on
13082         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13083         lock_groups.update(instance_groups)
13084       else:
13085         # No target groups, need to lock all of them
13086         lock_groups = locking.ALL_SET
13087
13088       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13089
13090     elif level == locking.LEVEL_NODE:
13091       if self.req_target_uuids:
13092         # Lock all nodes used by instances
13093         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13094         self._LockInstancesNodes()
13095
13096         # Lock all nodes in all potential target groups
13097         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13098                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13099         member_nodes = [node_name
13100                         for group in lock_groups
13101                         for node_name in self.cfg.GetNodeGroup(group).members]
13102         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13103       else:
13104         # Lock all nodes as all groups are potential targets
13105         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13106
13107   def CheckPrereq(self):
13108     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13109     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13110     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13111
13112     assert (self.req_target_uuids is None or
13113             owned_groups.issuperset(self.req_target_uuids))
13114     assert owned_instances == set([self.op.instance_name])
13115
13116     # Get instance information
13117     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13118
13119     # Check if node groups for locked instance are still correct
13120     assert owned_nodes.issuperset(self.instance.all_nodes), \
13121       ("Instance %s's nodes changed while we kept the lock" %
13122        self.op.instance_name)
13123
13124     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13125                                            owned_groups)
13126
13127     if self.req_target_uuids:
13128       # User requested specific target groups
13129       self.target_uuids = frozenset(self.req_target_uuids)
13130     else:
13131       # All groups except those used by the instance are potential targets
13132       self.target_uuids = owned_groups - inst_groups
13133
13134     conflicting_groups = self.target_uuids & inst_groups
13135     if conflicting_groups:
13136       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13137                                  " used by the instance '%s'" %
13138                                  (utils.CommaJoin(conflicting_groups),
13139                                   self.op.instance_name),
13140                                  errors.ECODE_INVAL)
13141
13142     if not self.target_uuids:
13143       raise errors.OpPrereqError("There are no possible target groups",
13144                                  errors.ECODE_INVAL)
13145
13146   def BuildHooksEnv(self):
13147     """Build hooks env.
13148
13149     """
13150     assert self.target_uuids
13151
13152     env = {
13153       "TARGET_GROUPS": " ".join(self.target_uuids),
13154       }
13155
13156     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13157
13158     return env
13159
13160   def BuildHooksNodes(self):
13161     """Build hooks nodes.
13162
13163     """
13164     mn = self.cfg.GetMasterNode()
13165     return ([mn], [mn])
13166
13167   def Exec(self, feedback_fn):
13168     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13169
13170     assert instances == [self.op.instance_name], "Instance not locked"
13171
13172     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13173                      instances=instances, target_groups=list(self.target_uuids))
13174
13175     ial.Run(self.op.iallocator)
13176
13177     if not ial.success:
13178       raise errors.OpPrereqError("Can't compute solution for changing group of"
13179                                  " instance '%s' using iallocator '%s': %s" %
13180                                  (self.op.instance_name, self.op.iallocator,
13181                                   ial.info),
13182                                  errors.ECODE_NORES)
13183
13184     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13185
13186     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13187                  " instance '%s'", len(jobs), self.op.instance_name)
13188
13189     return ResultWithJobs(jobs)
13190
13191
13192 class LUBackupQuery(NoHooksLU):
13193   """Query the exports list
13194
13195   """
13196   REQ_BGL = False
13197
13198   def CheckArguments(self):
13199     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13200                              ["node", "export"], self.op.use_locking)
13201
13202   def ExpandNames(self):
13203     self.expq.ExpandNames(self)
13204
13205   def DeclareLocks(self, level):
13206     self.expq.DeclareLocks(self, level)
13207
13208   def Exec(self, feedback_fn):
13209     result = {}
13210
13211     for (node, expname) in self.expq.OldStyleQuery(self):
13212       if expname is None:
13213         result[node] = False
13214       else:
13215         result.setdefault(node, []).append(expname)
13216
13217     return result
13218
13219
13220 class _ExportQuery(_QueryBase):
13221   FIELDS = query.EXPORT_FIELDS
13222
13223   #: The node name is not a unique key for this query
13224   SORT_FIELD = "node"
13225
13226   def ExpandNames(self, lu):
13227     lu.needed_locks = {}
13228
13229     # The following variables interact with _QueryBase._GetNames
13230     if self.names:
13231       self.wanted = _GetWantedNodes(lu, self.names)
13232     else:
13233       self.wanted = locking.ALL_SET
13234
13235     self.do_locking = self.use_locking
13236
13237     if self.do_locking:
13238       lu.share_locks = _ShareAll()
13239       lu.needed_locks = {
13240         locking.LEVEL_NODE: self.wanted,
13241         }
13242
13243   def DeclareLocks(self, lu, level):
13244     pass
13245
13246   def _GetQueryData(self, lu):
13247     """Computes the list of nodes and their attributes.
13248
13249     """
13250     # Locking is not used
13251     # TODO
13252     assert not (compat.any(lu.glm.is_owned(level)
13253                            for level in locking.LEVELS
13254                            if level != locking.LEVEL_CLUSTER) or
13255                 self.do_locking or self.use_locking)
13256
13257     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13258
13259     result = []
13260
13261     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13262       if nres.fail_msg:
13263         result.append((node, None))
13264       else:
13265         result.extend((node, expname) for expname in nres.payload)
13266
13267     return result
13268
13269
13270 class LUBackupPrepare(NoHooksLU):
13271   """Prepares an instance for an export and returns useful information.
13272
13273   """
13274   REQ_BGL = False
13275
13276   def ExpandNames(self):
13277     self._ExpandAndLockInstance()
13278
13279   def CheckPrereq(self):
13280     """Check prerequisites.
13281
13282     """
13283     instance_name = self.op.instance_name
13284
13285     self.instance = self.cfg.GetInstanceInfo(instance_name)
13286     assert self.instance is not None, \
13287           "Cannot retrieve locked instance %s" % self.op.instance_name
13288     _CheckNodeOnline(self, self.instance.primary_node)
13289
13290     self._cds = _GetClusterDomainSecret()
13291
13292   def Exec(self, feedback_fn):
13293     """Prepares an instance for an export.
13294
13295     """
13296     instance = self.instance
13297
13298     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13299       salt = utils.GenerateSecret(8)
13300
13301       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13302       result = self.rpc.call_x509_cert_create(instance.primary_node,
13303                                               constants.RIE_CERT_VALIDITY)
13304       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13305
13306       (name, cert_pem) = result.payload
13307
13308       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13309                                              cert_pem)
13310
13311       return {
13312         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13313         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13314                           salt),
13315         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13316         }
13317
13318     return None
13319
13320
13321 class LUBackupExport(LogicalUnit):
13322   """Export an instance to an image in the cluster.
13323
13324   """
13325   HPATH = "instance-export"
13326   HTYPE = constants.HTYPE_INSTANCE
13327   REQ_BGL = False
13328
13329   def CheckArguments(self):
13330     """Check the arguments.
13331
13332     """
13333     self.x509_key_name = self.op.x509_key_name
13334     self.dest_x509_ca_pem = self.op.destination_x509_ca
13335
13336     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13337       if not self.x509_key_name:
13338         raise errors.OpPrereqError("Missing X509 key name for encryption",
13339                                    errors.ECODE_INVAL)
13340
13341       if not self.dest_x509_ca_pem:
13342         raise errors.OpPrereqError("Missing destination X509 CA",
13343                                    errors.ECODE_INVAL)
13344
13345   def ExpandNames(self):
13346     self._ExpandAndLockInstance()
13347
13348     # Lock all nodes for local exports
13349     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13350       # FIXME: lock only instance primary and destination node
13351       #
13352       # Sad but true, for now we have do lock all nodes, as we don't know where
13353       # the previous export might be, and in this LU we search for it and
13354       # remove it from its current node. In the future we could fix this by:
13355       #  - making a tasklet to search (share-lock all), then create the
13356       #    new one, then one to remove, after
13357       #  - removing the removal operation altogether
13358       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13359
13360   def DeclareLocks(self, level):
13361     """Last minute lock declaration."""
13362     # All nodes are locked anyway, so nothing to do here.
13363
13364   def BuildHooksEnv(self):
13365     """Build hooks env.
13366
13367     This will run on the master, primary node and target node.
13368
13369     """
13370     env = {
13371       "EXPORT_MODE": self.op.mode,
13372       "EXPORT_NODE": self.op.target_node,
13373       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13374       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13375       # TODO: Generic function for boolean env variables
13376       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13377       }
13378
13379     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13380
13381     return env
13382
13383   def BuildHooksNodes(self):
13384     """Build hooks nodes.
13385
13386     """
13387     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13388
13389     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13390       nl.append(self.op.target_node)
13391
13392     return (nl, nl)
13393
13394   def CheckPrereq(self):
13395     """Check prerequisites.
13396
13397     This checks that the instance and node names are valid.
13398
13399     """
13400     instance_name = self.op.instance_name
13401
13402     self.instance = self.cfg.GetInstanceInfo(instance_name)
13403     assert self.instance is not None, \
13404           "Cannot retrieve locked instance %s" % self.op.instance_name
13405     _CheckNodeOnline(self, self.instance.primary_node)
13406
13407     if (self.op.remove_instance and
13408         self.instance.admin_state == constants.ADMINST_UP and
13409         not self.op.shutdown):
13410       raise errors.OpPrereqError("Can not remove instance without shutting it"
13411                                  " down before")
13412
13413     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13414       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13415       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13416       assert self.dst_node is not None
13417
13418       _CheckNodeOnline(self, self.dst_node.name)
13419       _CheckNodeNotDrained(self, self.dst_node.name)
13420
13421       self._cds = None
13422       self.dest_disk_info = None
13423       self.dest_x509_ca = None
13424
13425     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13426       self.dst_node = None
13427
13428       if len(self.op.target_node) != len(self.instance.disks):
13429         raise errors.OpPrereqError(("Received destination information for %s"
13430                                     " disks, but instance %s has %s disks") %
13431                                    (len(self.op.target_node), instance_name,
13432                                     len(self.instance.disks)),
13433                                    errors.ECODE_INVAL)
13434
13435       cds = _GetClusterDomainSecret()
13436
13437       # Check X509 key name
13438       try:
13439         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13440       except (TypeError, ValueError), err:
13441         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13442
13443       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13444         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13445                                    errors.ECODE_INVAL)
13446
13447       # Load and verify CA
13448       try:
13449         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13450       except OpenSSL.crypto.Error, err:
13451         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13452                                    (err, ), errors.ECODE_INVAL)
13453
13454       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13455       if errcode is not None:
13456         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13457                                    (msg, ), errors.ECODE_INVAL)
13458
13459       self.dest_x509_ca = cert
13460
13461       # Verify target information
13462       disk_info = []
13463       for idx, disk_data in enumerate(self.op.target_node):
13464         try:
13465           (host, port, magic) = \
13466             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13467         except errors.GenericError, err:
13468           raise errors.OpPrereqError("Target info for disk %s: %s" %
13469                                      (idx, err), errors.ECODE_INVAL)
13470
13471         disk_info.append((host, port, magic))
13472
13473       assert len(disk_info) == len(self.op.target_node)
13474       self.dest_disk_info = disk_info
13475
13476     else:
13477       raise errors.ProgrammerError("Unhandled export mode %r" %
13478                                    self.op.mode)
13479
13480     # instance disk type verification
13481     # TODO: Implement export support for file-based disks
13482     for disk in self.instance.disks:
13483       if disk.dev_type == constants.LD_FILE:
13484         raise errors.OpPrereqError("Export not supported for instances with"
13485                                    " file-based disks", errors.ECODE_INVAL)
13486
13487   def _CleanupExports(self, feedback_fn):
13488     """Removes exports of current instance from all other nodes.
13489
13490     If an instance in a cluster with nodes A..D was exported to node C, its
13491     exports will be removed from the nodes A, B and D.
13492
13493     """
13494     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13495
13496     nodelist = self.cfg.GetNodeList()
13497     nodelist.remove(self.dst_node.name)
13498
13499     # on one-node clusters nodelist will be empty after the removal
13500     # if we proceed the backup would be removed because OpBackupQuery
13501     # substitutes an empty list with the full cluster node list.
13502     iname = self.instance.name
13503     if nodelist:
13504       feedback_fn("Removing old exports for instance %s" % iname)
13505       exportlist = self.rpc.call_export_list(nodelist)
13506       for node in exportlist:
13507         if exportlist[node].fail_msg:
13508           continue
13509         if iname in exportlist[node].payload:
13510           msg = self.rpc.call_export_remove(node, iname).fail_msg
13511           if msg:
13512             self.LogWarning("Could not remove older export for instance %s"
13513                             " on node %s: %s", iname, node, msg)
13514
13515   def Exec(self, feedback_fn):
13516     """Export an instance to an image in the cluster.
13517
13518     """
13519     assert self.op.mode in constants.EXPORT_MODES
13520
13521     instance = self.instance
13522     src_node = instance.primary_node
13523
13524     if self.op.shutdown:
13525       # shutdown the instance, but not the disks
13526       feedback_fn("Shutting down instance %s" % instance.name)
13527       result = self.rpc.call_instance_shutdown(src_node, instance,
13528                                                self.op.shutdown_timeout)
13529       # TODO: Maybe ignore failures if ignore_remove_failures is set
13530       result.Raise("Could not shutdown instance %s on"
13531                    " node %s" % (instance.name, src_node))
13532
13533     # set the disks ID correctly since call_instance_start needs the
13534     # correct drbd minor to create the symlinks
13535     for disk in instance.disks:
13536       self.cfg.SetDiskID(disk, src_node)
13537
13538     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13539
13540     if activate_disks:
13541       # Activate the instance disks if we'exporting a stopped instance
13542       feedback_fn("Activating disks for %s" % instance.name)
13543       _StartInstanceDisks(self, instance, None)
13544
13545     try:
13546       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13547                                                      instance)
13548
13549       helper.CreateSnapshots()
13550       try:
13551         if (self.op.shutdown and
13552             instance.admin_state == constants.ADMINST_UP and
13553             not self.op.remove_instance):
13554           assert not activate_disks
13555           feedback_fn("Starting instance %s" % instance.name)
13556           result = self.rpc.call_instance_start(src_node,
13557                                                 (instance, None, None), False)
13558           msg = result.fail_msg
13559           if msg:
13560             feedback_fn("Failed to start instance: %s" % msg)
13561             _ShutdownInstanceDisks(self, instance)
13562             raise errors.OpExecError("Could not start instance: %s" % msg)
13563
13564         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13565           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13566         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13567           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13568           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13569
13570           (key_name, _, _) = self.x509_key_name
13571
13572           dest_ca_pem = \
13573             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13574                                             self.dest_x509_ca)
13575
13576           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13577                                                      key_name, dest_ca_pem,
13578                                                      timeouts)
13579       finally:
13580         helper.Cleanup()
13581
13582       # Check for backwards compatibility
13583       assert len(dresults) == len(instance.disks)
13584       assert compat.all(isinstance(i, bool) for i in dresults), \
13585              "Not all results are boolean: %r" % dresults
13586
13587     finally:
13588       if activate_disks:
13589         feedback_fn("Deactivating disks for %s" % instance.name)
13590         _ShutdownInstanceDisks(self, instance)
13591
13592     if not (compat.all(dresults) and fin_resu):
13593       failures = []
13594       if not fin_resu:
13595         failures.append("export finalization")
13596       if not compat.all(dresults):
13597         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13598                                if not dsk)
13599         failures.append("disk export: disk(s) %s" % fdsk)
13600
13601       raise errors.OpExecError("Export failed, errors in %s" %
13602                                utils.CommaJoin(failures))
13603
13604     # At this point, the export was successful, we can cleanup/finish
13605
13606     # Remove instance if requested
13607     if self.op.remove_instance:
13608       feedback_fn("Removing instance %s" % instance.name)
13609       _RemoveInstance(self, feedback_fn, instance,
13610                       self.op.ignore_remove_failures)
13611
13612     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13613       self._CleanupExports(feedback_fn)
13614
13615     return fin_resu, dresults
13616
13617
13618 class LUBackupRemove(NoHooksLU):
13619   """Remove exports related to the named instance.
13620
13621   """
13622   REQ_BGL = False
13623
13624   def ExpandNames(self):
13625     self.needed_locks = {}
13626     # We need all nodes to be locked in order for RemoveExport to work, but we
13627     # don't need to lock the instance itself, as nothing will happen to it (and
13628     # we can remove exports also for a removed instance)
13629     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13630
13631   def Exec(self, feedback_fn):
13632     """Remove any export.
13633
13634     """
13635     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13636     # If the instance was not found we'll try with the name that was passed in.
13637     # This will only work if it was an FQDN, though.
13638     fqdn_warn = False
13639     if not instance_name:
13640       fqdn_warn = True
13641       instance_name = self.op.instance_name
13642
13643     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13644     exportlist = self.rpc.call_export_list(locked_nodes)
13645     found = False
13646     for node in exportlist:
13647       msg = exportlist[node].fail_msg
13648       if msg:
13649         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13650         continue
13651       if instance_name in exportlist[node].payload:
13652         found = True
13653         result = self.rpc.call_export_remove(node, instance_name)
13654         msg = result.fail_msg
13655         if msg:
13656           logging.error("Could not remove export for instance %s"
13657                         " on node %s: %s", instance_name, node, msg)
13658
13659     if fqdn_warn and not found:
13660       feedback_fn("Export not found. If trying to remove an export belonging"
13661                   " to a deleted instance please use its Fully Qualified"
13662                   " Domain Name.")
13663
13664
13665 class LUGroupAdd(LogicalUnit):
13666   """Logical unit for creating node groups.
13667
13668   """
13669   HPATH = "group-add"
13670   HTYPE = constants.HTYPE_GROUP
13671   REQ_BGL = False
13672
13673   def ExpandNames(self):
13674     # We need the new group's UUID here so that we can create and acquire the
13675     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13676     # that it should not check whether the UUID exists in the configuration.
13677     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13678     self.needed_locks = {}
13679     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13680
13681   def CheckPrereq(self):
13682     """Check prerequisites.
13683
13684     This checks that the given group name is not an existing node group
13685     already.
13686
13687     """
13688     try:
13689       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13690     except errors.OpPrereqError:
13691       pass
13692     else:
13693       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13694                                  " node group (UUID: %s)" %
13695                                  (self.op.group_name, existing_uuid),
13696                                  errors.ECODE_EXISTS)
13697
13698     if self.op.ndparams:
13699       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13700
13701     if self.op.hv_state:
13702       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13703     else:
13704       self.new_hv_state = None
13705
13706     if self.op.disk_state:
13707       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13708     else:
13709       self.new_disk_state = None
13710
13711     if self.op.diskparams:
13712       for templ in constants.DISK_TEMPLATES:
13713         if templ in self.op.diskparams:
13714           utils.ForceDictType(self.op.diskparams[templ],
13715                               constants.DISK_DT_TYPES)
13716       self.new_diskparams = self.op.diskparams
13717       try:
13718         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13719       except errors.OpPrereqError, err:
13720         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13721                                    errors.ECODE_INVAL)
13722     else:
13723       self.new_diskparams = {}
13724
13725     if self.op.ipolicy:
13726       cluster = self.cfg.GetClusterInfo()
13727       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13728       try:
13729         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13730       except errors.ConfigurationError, err:
13731         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13732                                    errors.ECODE_INVAL)
13733
13734   def BuildHooksEnv(self):
13735     """Build hooks env.
13736
13737     """
13738     return {
13739       "GROUP_NAME": self.op.group_name,
13740       }
13741
13742   def BuildHooksNodes(self):
13743     """Build hooks nodes.
13744
13745     """
13746     mn = self.cfg.GetMasterNode()
13747     return ([mn], [mn])
13748
13749   def Exec(self, feedback_fn):
13750     """Add the node group to the cluster.
13751
13752     """
13753     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13754                                   uuid=self.group_uuid,
13755                                   alloc_policy=self.op.alloc_policy,
13756                                   ndparams=self.op.ndparams,
13757                                   diskparams=self.new_diskparams,
13758                                   ipolicy=self.op.ipolicy,
13759                                   hv_state_static=self.new_hv_state,
13760                                   disk_state_static=self.new_disk_state)
13761
13762     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13763     del self.remove_locks[locking.LEVEL_NODEGROUP]
13764
13765
13766 class LUGroupAssignNodes(NoHooksLU):
13767   """Logical unit for assigning nodes to groups.
13768
13769   """
13770   REQ_BGL = False
13771
13772   def ExpandNames(self):
13773     # These raise errors.OpPrereqError on their own:
13774     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13775     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13776
13777     # We want to lock all the affected nodes and groups. We have readily
13778     # available the list of nodes, and the *destination* group. To gather the
13779     # list of "source" groups, we need to fetch node information later on.
13780     self.needed_locks = {
13781       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13782       locking.LEVEL_NODE: self.op.nodes,
13783       }
13784
13785   def DeclareLocks(self, level):
13786     if level == locking.LEVEL_NODEGROUP:
13787       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13788
13789       # Try to get all affected nodes' groups without having the group or node
13790       # lock yet. Needs verification later in the code flow.
13791       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13792
13793       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13794
13795   def CheckPrereq(self):
13796     """Check prerequisites.
13797
13798     """
13799     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13800     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13801             frozenset(self.op.nodes))
13802
13803     expected_locks = (set([self.group_uuid]) |
13804                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13805     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13806     if actual_locks != expected_locks:
13807       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13808                                " current groups are '%s', used to be '%s'" %
13809                                (utils.CommaJoin(expected_locks),
13810                                 utils.CommaJoin(actual_locks)))
13811
13812     self.node_data = self.cfg.GetAllNodesInfo()
13813     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13814     instance_data = self.cfg.GetAllInstancesInfo()
13815
13816     if self.group is None:
13817       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13818                                (self.op.group_name, self.group_uuid))
13819
13820     (new_splits, previous_splits) = \
13821       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13822                                              for node in self.op.nodes],
13823                                             self.node_data, instance_data)
13824
13825     if new_splits:
13826       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13827
13828       if not self.op.force:
13829         raise errors.OpExecError("The following instances get split by this"
13830                                  " change and --force was not given: %s" %
13831                                  fmt_new_splits)
13832       else:
13833         self.LogWarning("This operation will split the following instances: %s",
13834                         fmt_new_splits)
13835
13836         if previous_splits:
13837           self.LogWarning("In addition, these already-split instances continue"
13838                           " to be split across groups: %s",
13839                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13840
13841   def Exec(self, feedback_fn):
13842     """Assign nodes to a new group.
13843
13844     """
13845     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13846
13847     self.cfg.AssignGroupNodes(mods)
13848
13849   @staticmethod
13850   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13851     """Check for split instances after a node assignment.
13852
13853     This method considers a series of node assignments as an atomic operation,
13854     and returns information about split instances after applying the set of
13855     changes.
13856
13857     In particular, it returns information about newly split instances, and
13858     instances that were already split, and remain so after the change.
13859
13860     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13861     considered.
13862
13863     @type changes: list of (node_name, new_group_uuid) pairs.
13864     @param changes: list of node assignments to consider.
13865     @param node_data: a dict with data for all nodes
13866     @param instance_data: a dict with all instances to consider
13867     @rtype: a two-tuple
13868     @return: a list of instances that were previously okay and result split as a
13869       consequence of this change, and a list of instances that were previously
13870       split and this change does not fix.
13871
13872     """
13873     changed_nodes = dict((node, group) for node, group in changes
13874                          if node_data[node].group != group)
13875
13876     all_split_instances = set()
13877     previously_split_instances = set()
13878
13879     def InstanceNodes(instance):
13880       return [instance.primary_node] + list(instance.secondary_nodes)
13881
13882     for inst in instance_data.values():
13883       if inst.disk_template not in constants.DTS_INT_MIRROR:
13884         continue
13885
13886       instance_nodes = InstanceNodes(inst)
13887
13888       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13889         previously_split_instances.add(inst.name)
13890
13891       if len(set(changed_nodes.get(node, node_data[node].group)
13892                  for node in instance_nodes)) > 1:
13893         all_split_instances.add(inst.name)
13894
13895     return (list(all_split_instances - previously_split_instances),
13896             list(previously_split_instances & all_split_instances))
13897
13898
13899 class _GroupQuery(_QueryBase):
13900   FIELDS = query.GROUP_FIELDS
13901
13902   def ExpandNames(self, lu):
13903     lu.needed_locks = {}
13904
13905     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13906     self._cluster = lu.cfg.GetClusterInfo()
13907     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13908
13909     if not self.names:
13910       self.wanted = [name_to_uuid[name]
13911                      for name in utils.NiceSort(name_to_uuid.keys())]
13912     else:
13913       # Accept names to be either names or UUIDs.
13914       missing = []
13915       self.wanted = []
13916       all_uuid = frozenset(self._all_groups.keys())
13917
13918       for name in self.names:
13919         if name in all_uuid:
13920           self.wanted.append(name)
13921         elif name in name_to_uuid:
13922           self.wanted.append(name_to_uuid[name])
13923         else:
13924           missing.append(name)
13925
13926       if missing:
13927         raise errors.OpPrereqError("Some groups do not exist: %s" %
13928                                    utils.CommaJoin(missing),
13929                                    errors.ECODE_NOENT)
13930
13931   def DeclareLocks(self, lu, level):
13932     pass
13933
13934   def _GetQueryData(self, lu):
13935     """Computes the list of node groups and their attributes.
13936
13937     """
13938     do_nodes = query.GQ_NODE in self.requested_data
13939     do_instances = query.GQ_INST in self.requested_data
13940
13941     group_to_nodes = None
13942     group_to_instances = None
13943
13944     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13945     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13946     # latter GetAllInstancesInfo() is not enough, for we have to go through
13947     # instance->node. Hence, we will need to process nodes even if we only need
13948     # instance information.
13949     if do_nodes or do_instances:
13950       all_nodes = lu.cfg.GetAllNodesInfo()
13951       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13952       node_to_group = {}
13953
13954       for node in all_nodes.values():
13955         if node.group in group_to_nodes:
13956           group_to_nodes[node.group].append(node.name)
13957           node_to_group[node.name] = node.group
13958
13959       if do_instances:
13960         all_instances = lu.cfg.GetAllInstancesInfo()
13961         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13962
13963         for instance in all_instances.values():
13964           node = instance.primary_node
13965           if node in node_to_group:
13966             group_to_instances[node_to_group[node]].append(instance.name)
13967
13968         if not do_nodes:
13969           # Do not pass on node information if it was not requested.
13970           group_to_nodes = None
13971
13972     return query.GroupQueryData(self._cluster,
13973                                 [self._all_groups[uuid]
13974                                  for uuid in self.wanted],
13975                                 group_to_nodes, group_to_instances,
13976                                 query.GQ_DISKPARAMS in self.requested_data)
13977
13978
13979 class LUGroupQuery(NoHooksLU):
13980   """Logical unit for querying node groups.
13981
13982   """
13983   REQ_BGL = False
13984
13985   def CheckArguments(self):
13986     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13987                           self.op.output_fields, False)
13988
13989   def ExpandNames(self):
13990     self.gq.ExpandNames(self)
13991
13992   def DeclareLocks(self, level):
13993     self.gq.DeclareLocks(self, level)
13994
13995   def Exec(self, feedback_fn):
13996     return self.gq.OldStyleQuery(self)
13997
13998
13999 class LUGroupSetParams(LogicalUnit):
14000   """Modifies the parameters of a node group.
14001
14002   """
14003   HPATH = "group-modify"
14004   HTYPE = constants.HTYPE_GROUP
14005   REQ_BGL = False
14006
14007   def CheckArguments(self):
14008     all_changes = [
14009       self.op.ndparams,
14010       self.op.diskparams,
14011       self.op.alloc_policy,
14012       self.op.hv_state,
14013       self.op.disk_state,
14014       self.op.ipolicy,
14015       ]
14016
14017     if all_changes.count(None) == len(all_changes):
14018       raise errors.OpPrereqError("Please pass at least one modification",
14019                                  errors.ECODE_INVAL)
14020
14021   def ExpandNames(self):
14022     # This raises errors.OpPrereqError on its own:
14023     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14024
14025     self.needed_locks = {
14026       locking.LEVEL_INSTANCE: [],
14027       locking.LEVEL_NODEGROUP: [self.group_uuid],
14028       }
14029
14030     self.share_locks[locking.LEVEL_INSTANCE] = 1
14031
14032   def DeclareLocks(self, level):
14033     if level == locking.LEVEL_INSTANCE:
14034       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14035
14036       # Lock instances optimistically, needs verification once group lock has
14037       # been acquired
14038       self.needed_locks[locking.LEVEL_INSTANCE] = \
14039           self.cfg.GetNodeGroupInstances(self.group_uuid)
14040
14041   @staticmethod
14042   def _UpdateAndVerifyDiskParams(old, new):
14043     """Updates and verifies disk parameters.
14044
14045     """
14046     new_params = _GetUpdatedParams(old, new)
14047     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14048     return new_params
14049
14050   def CheckPrereq(self):
14051     """Check prerequisites.
14052
14053     """
14054     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14055
14056     # Check if locked instances are still correct
14057     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14058
14059     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14060     cluster = self.cfg.GetClusterInfo()
14061
14062     if self.group is None:
14063       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14064                                (self.op.group_name, self.group_uuid))
14065
14066     if self.op.ndparams:
14067       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14068       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14069       self.new_ndparams = new_ndparams
14070
14071     if self.op.diskparams:
14072       diskparams = self.group.diskparams
14073       uavdp = self._UpdateAndVerifyDiskParams
14074       # For each disktemplate subdict update and verify the values
14075       new_diskparams = dict((dt,
14076                              uavdp(diskparams.get(dt, {}),
14077                                    self.op.diskparams[dt]))
14078                             for dt in constants.DISK_TEMPLATES
14079                             if dt in self.op.diskparams)
14080       # As we've all subdicts of diskparams ready, lets merge the actual
14081       # dict with all updated subdicts
14082       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14083       try:
14084         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14085       except errors.OpPrereqError, err:
14086         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14087                                    errors.ECODE_INVAL)
14088
14089     if self.op.hv_state:
14090       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14091                                                  self.group.hv_state_static)
14092
14093     if self.op.disk_state:
14094       self.new_disk_state = \
14095         _MergeAndVerifyDiskState(self.op.disk_state,
14096                                  self.group.disk_state_static)
14097
14098     if self.op.ipolicy:
14099       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14100                                             self.op.ipolicy,
14101                                             group_policy=True)
14102
14103       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14104       inst_filter = lambda inst: inst.name in owned_instances
14105       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14106       violations = \
14107           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14108                                                                self.group),
14109                                         new_ipolicy, instances)
14110
14111       if violations:
14112         self.LogWarning("After the ipolicy change the following instances"
14113                         " violate them: %s",
14114                         utils.CommaJoin(violations))
14115
14116   def BuildHooksEnv(self):
14117     """Build hooks env.
14118
14119     """
14120     return {
14121       "GROUP_NAME": self.op.group_name,
14122       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14123       }
14124
14125   def BuildHooksNodes(self):
14126     """Build hooks nodes.
14127
14128     """
14129     mn = self.cfg.GetMasterNode()
14130     return ([mn], [mn])
14131
14132   def Exec(self, feedback_fn):
14133     """Modifies the node group.
14134
14135     """
14136     result = []
14137
14138     if self.op.ndparams:
14139       self.group.ndparams = self.new_ndparams
14140       result.append(("ndparams", str(self.group.ndparams)))
14141
14142     if self.op.diskparams:
14143       self.group.diskparams = self.new_diskparams
14144       result.append(("diskparams", str(self.group.diskparams)))
14145
14146     if self.op.alloc_policy:
14147       self.group.alloc_policy = self.op.alloc_policy
14148
14149     if self.op.hv_state:
14150       self.group.hv_state_static = self.new_hv_state
14151
14152     if self.op.disk_state:
14153       self.group.disk_state_static = self.new_disk_state
14154
14155     if self.op.ipolicy:
14156       self.group.ipolicy = self.new_ipolicy
14157
14158     self.cfg.Update(self.group, feedback_fn)
14159     return result
14160
14161
14162 class LUGroupRemove(LogicalUnit):
14163   HPATH = "group-remove"
14164   HTYPE = constants.HTYPE_GROUP
14165   REQ_BGL = False
14166
14167   def ExpandNames(self):
14168     # This will raises errors.OpPrereqError on its own:
14169     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14170     self.needed_locks = {
14171       locking.LEVEL_NODEGROUP: [self.group_uuid],
14172       }
14173
14174   def CheckPrereq(self):
14175     """Check prerequisites.
14176
14177     This checks that the given group name exists as a node group, that is
14178     empty (i.e., contains no nodes), and that is not the last group of the
14179     cluster.
14180
14181     """
14182     # Verify that the group is empty.
14183     group_nodes = [node.name
14184                    for node in self.cfg.GetAllNodesInfo().values()
14185                    if node.group == self.group_uuid]
14186
14187     if group_nodes:
14188       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14189                                  " nodes: %s" %
14190                                  (self.op.group_name,
14191                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14192                                  errors.ECODE_STATE)
14193
14194     # Verify the cluster would not be left group-less.
14195     if len(self.cfg.GetNodeGroupList()) == 1:
14196       raise errors.OpPrereqError("Group '%s' is the only group,"
14197                                  " cannot be removed" %
14198                                  self.op.group_name,
14199                                  errors.ECODE_STATE)
14200
14201   def BuildHooksEnv(self):
14202     """Build hooks env.
14203
14204     """
14205     return {
14206       "GROUP_NAME": self.op.group_name,
14207       }
14208
14209   def BuildHooksNodes(self):
14210     """Build hooks nodes.
14211
14212     """
14213     mn = self.cfg.GetMasterNode()
14214     return ([mn], [mn])
14215
14216   def Exec(self, feedback_fn):
14217     """Remove the node group.
14218
14219     """
14220     try:
14221       self.cfg.RemoveNodeGroup(self.group_uuid)
14222     except errors.ConfigurationError:
14223       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14224                                (self.op.group_name, self.group_uuid))
14225
14226     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14227
14228
14229 class LUGroupRename(LogicalUnit):
14230   HPATH = "group-rename"
14231   HTYPE = constants.HTYPE_GROUP
14232   REQ_BGL = False
14233
14234   def ExpandNames(self):
14235     # This raises errors.OpPrereqError on its own:
14236     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14237
14238     self.needed_locks = {
14239       locking.LEVEL_NODEGROUP: [self.group_uuid],
14240       }
14241
14242   def CheckPrereq(self):
14243     """Check prerequisites.
14244
14245     Ensures requested new name is not yet used.
14246
14247     """
14248     try:
14249       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14250     except errors.OpPrereqError:
14251       pass
14252     else:
14253       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14254                                  " node group (UUID: %s)" %
14255                                  (self.op.new_name, new_name_uuid),
14256                                  errors.ECODE_EXISTS)
14257
14258   def BuildHooksEnv(self):
14259     """Build hooks env.
14260
14261     """
14262     return {
14263       "OLD_NAME": self.op.group_name,
14264       "NEW_NAME": self.op.new_name,
14265       }
14266
14267   def BuildHooksNodes(self):
14268     """Build hooks nodes.
14269
14270     """
14271     mn = self.cfg.GetMasterNode()
14272
14273     all_nodes = self.cfg.GetAllNodesInfo()
14274     all_nodes.pop(mn, None)
14275
14276     run_nodes = [mn]
14277     run_nodes.extend(node.name for node in all_nodes.values()
14278                      if node.group == self.group_uuid)
14279
14280     return (run_nodes, run_nodes)
14281
14282   def Exec(self, feedback_fn):
14283     """Rename the node group.
14284
14285     """
14286     group = self.cfg.GetNodeGroup(self.group_uuid)
14287
14288     if group is None:
14289       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14290                                (self.op.group_name, self.group_uuid))
14291
14292     group.name = self.op.new_name
14293     self.cfg.Update(group, feedback_fn)
14294
14295     return self.op.new_name
14296
14297
14298 class LUGroupEvacuate(LogicalUnit):
14299   HPATH = "group-evacuate"
14300   HTYPE = constants.HTYPE_GROUP
14301   REQ_BGL = False
14302
14303   def ExpandNames(self):
14304     # This raises errors.OpPrereqError on its own:
14305     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14306
14307     if self.op.target_groups:
14308       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14309                                   self.op.target_groups)
14310     else:
14311       self.req_target_uuids = []
14312
14313     if self.group_uuid in self.req_target_uuids:
14314       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14315                                  " as a target group (targets are %s)" %
14316                                  (self.group_uuid,
14317                                   utils.CommaJoin(self.req_target_uuids)),
14318                                  errors.ECODE_INVAL)
14319
14320     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14321
14322     self.share_locks = _ShareAll()
14323     self.needed_locks = {
14324       locking.LEVEL_INSTANCE: [],
14325       locking.LEVEL_NODEGROUP: [],
14326       locking.LEVEL_NODE: [],
14327       }
14328
14329   def DeclareLocks(self, level):
14330     if level == locking.LEVEL_INSTANCE:
14331       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14332
14333       # Lock instances optimistically, needs verification once node and group
14334       # locks have been acquired
14335       self.needed_locks[locking.LEVEL_INSTANCE] = \
14336         self.cfg.GetNodeGroupInstances(self.group_uuid)
14337
14338     elif level == locking.LEVEL_NODEGROUP:
14339       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14340
14341       if self.req_target_uuids:
14342         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14343
14344         # Lock all groups used by instances optimistically; this requires going
14345         # via the node before it's locked, requiring verification later on
14346         lock_groups.update(group_uuid
14347                            for instance_name in
14348                              self.owned_locks(locking.LEVEL_INSTANCE)
14349                            for group_uuid in
14350                              self.cfg.GetInstanceNodeGroups(instance_name))
14351       else:
14352         # No target groups, need to lock all of them
14353         lock_groups = locking.ALL_SET
14354
14355       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14356
14357     elif level == locking.LEVEL_NODE:
14358       # This will only lock the nodes in the group to be evacuated which
14359       # contain actual instances
14360       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14361       self._LockInstancesNodes()
14362
14363       # Lock all nodes in group to be evacuated and target groups
14364       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14365       assert self.group_uuid in owned_groups
14366       member_nodes = [node_name
14367                       for group in owned_groups
14368                       for node_name in self.cfg.GetNodeGroup(group).members]
14369       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14370
14371   def CheckPrereq(self):
14372     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14373     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14374     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14375
14376     assert owned_groups.issuperset(self.req_target_uuids)
14377     assert self.group_uuid in owned_groups
14378
14379     # Check if locked instances are still correct
14380     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14381
14382     # Get instance information
14383     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14384
14385     # Check if node groups for locked instances are still correct
14386     _CheckInstancesNodeGroups(self.cfg, self.instances,
14387                               owned_groups, owned_nodes, self.group_uuid)
14388
14389     if self.req_target_uuids:
14390       # User requested specific target groups
14391       self.target_uuids = self.req_target_uuids
14392     else:
14393       # All groups except the one to be evacuated are potential targets
14394       self.target_uuids = [group_uuid for group_uuid in owned_groups
14395                            if group_uuid != self.group_uuid]
14396
14397       if not self.target_uuids:
14398         raise errors.OpPrereqError("There are no possible target groups",
14399                                    errors.ECODE_INVAL)
14400
14401   def BuildHooksEnv(self):
14402     """Build hooks env.
14403
14404     """
14405     return {
14406       "GROUP_NAME": self.op.group_name,
14407       "TARGET_GROUPS": " ".join(self.target_uuids),
14408       }
14409
14410   def BuildHooksNodes(self):
14411     """Build hooks nodes.
14412
14413     """
14414     mn = self.cfg.GetMasterNode()
14415
14416     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14417
14418     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14419
14420     return (run_nodes, run_nodes)
14421
14422   def Exec(self, feedback_fn):
14423     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14424
14425     assert self.group_uuid not in self.target_uuids
14426
14427     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14428                      instances=instances, target_groups=self.target_uuids)
14429
14430     ial.Run(self.op.iallocator)
14431
14432     if not ial.success:
14433       raise errors.OpPrereqError("Can't compute group evacuation using"
14434                                  " iallocator '%s': %s" %
14435                                  (self.op.iallocator, ial.info),
14436                                  errors.ECODE_NORES)
14437
14438     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14439
14440     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14441                  len(jobs), self.op.group_name)
14442
14443     return ResultWithJobs(jobs)
14444
14445
14446 class TagsLU(NoHooksLU): # pylint: disable=W0223
14447   """Generic tags LU.
14448
14449   This is an abstract class which is the parent of all the other tags LUs.
14450
14451   """
14452   def ExpandNames(self):
14453     self.group_uuid = None
14454     self.needed_locks = {}
14455
14456     if self.op.kind == constants.TAG_NODE:
14457       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14458       lock_level = locking.LEVEL_NODE
14459       lock_name = self.op.name
14460     elif self.op.kind == constants.TAG_INSTANCE:
14461       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14462       lock_level = locking.LEVEL_INSTANCE
14463       lock_name = self.op.name
14464     elif self.op.kind == constants.TAG_NODEGROUP:
14465       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14466       lock_level = locking.LEVEL_NODEGROUP
14467       lock_name = self.group_uuid
14468     else:
14469       lock_level = None
14470       lock_name = None
14471
14472     if lock_level and getattr(self.op, "use_locking", True):
14473       self.needed_locks[lock_level] = lock_name
14474
14475     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14476     # not possible to acquire the BGL based on opcode parameters)
14477
14478   def CheckPrereq(self):
14479     """Check prerequisites.
14480
14481     """
14482     if self.op.kind == constants.TAG_CLUSTER:
14483       self.target = self.cfg.GetClusterInfo()
14484     elif self.op.kind == constants.TAG_NODE:
14485       self.target = self.cfg.GetNodeInfo(self.op.name)
14486     elif self.op.kind == constants.TAG_INSTANCE:
14487       self.target = self.cfg.GetInstanceInfo(self.op.name)
14488     elif self.op.kind == constants.TAG_NODEGROUP:
14489       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14490     else:
14491       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14492                                  str(self.op.kind), errors.ECODE_INVAL)
14493
14494
14495 class LUTagsGet(TagsLU):
14496   """Returns the tags of a given object.
14497
14498   """
14499   REQ_BGL = False
14500
14501   def ExpandNames(self):
14502     TagsLU.ExpandNames(self)
14503
14504     # Share locks as this is only a read operation
14505     self.share_locks = _ShareAll()
14506
14507   def Exec(self, feedback_fn):
14508     """Returns the tag list.
14509
14510     """
14511     return list(self.target.GetTags())
14512
14513
14514 class LUTagsSearch(NoHooksLU):
14515   """Searches the tags for a given pattern.
14516
14517   """
14518   REQ_BGL = False
14519
14520   def ExpandNames(self):
14521     self.needed_locks = {}
14522
14523   def CheckPrereq(self):
14524     """Check prerequisites.
14525
14526     This checks the pattern passed for validity by compiling it.
14527
14528     """
14529     try:
14530       self.re = re.compile(self.op.pattern)
14531     except re.error, err:
14532       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14533                                  (self.op.pattern, err), errors.ECODE_INVAL)
14534
14535   def Exec(self, feedback_fn):
14536     """Returns the tag list.
14537
14538     """
14539     cfg = self.cfg
14540     tgts = [("/cluster", cfg.GetClusterInfo())]
14541     ilist = cfg.GetAllInstancesInfo().values()
14542     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14543     nlist = cfg.GetAllNodesInfo().values()
14544     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14545     tgts.extend(("/nodegroup/%s" % n.name, n)
14546                 for n in cfg.GetAllNodeGroupsInfo().values())
14547     results = []
14548     for path, target in tgts:
14549       for tag in target.GetTags():
14550         if self.re.search(tag):
14551           results.append((path, tag))
14552     return results
14553
14554
14555 class LUTagsSet(TagsLU):
14556   """Sets a tag on a given object.
14557
14558   """
14559   REQ_BGL = False
14560
14561   def CheckPrereq(self):
14562     """Check prerequisites.
14563
14564     This checks the type and length of the tag name and value.
14565
14566     """
14567     TagsLU.CheckPrereq(self)
14568     for tag in self.op.tags:
14569       objects.TaggableObject.ValidateTag(tag)
14570
14571   def Exec(self, feedback_fn):
14572     """Sets the tag.
14573
14574     """
14575     try:
14576       for tag in self.op.tags:
14577         self.target.AddTag(tag)
14578     except errors.TagError, err:
14579       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14580     self.cfg.Update(self.target, feedback_fn)
14581
14582
14583 class LUTagsDel(TagsLU):
14584   """Delete a list of tags from a given object.
14585
14586   """
14587   REQ_BGL = False
14588
14589   def CheckPrereq(self):
14590     """Check prerequisites.
14591
14592     This checks that we have the given tag.
14593
14594     """
14595     TagsLU.CheckPrereq(self)
14596     for tag in self.op.tags:
14597       objects.TaggableObject.ValidateTag(tag)
14598     del_tags = frozenset(self.op.tags)
14599     cur_tags = self.target.GetTags()
14600
14601     diff_tags = del_tags - cur_tags
14602     if diff_tags:
14603       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14604       raise errors.OpPrereqError("Tag(s) %s not found" %
14605                                  (utils.CommaJoin(diff_names), ),
14606                                  errors.ECODE_NOENT)
14607
14608   def Exec(self, feedback_fn):
14609     """Remove the tag from the object.
14610
14611     """
14612     for tag in self.op.tags:
14613       self.target.RemoveTag(tag)
14614     self.cfg.Update(self.target, feedback_fn)
14615
14616
14617 class LUTestDelay(NoHooksLU):
14618   """Sleep for a specified amount of time.
14619
14620   This LU sleeps on the master and/or nodes for a specified amount of
14621   time.
14622
14623   """
14624   REQ_BGL = False
14625
14626   def ExpandNames(self):
14627     """Expand names and set required locks.
14628
14629     This expands the node list, if any.
14630
14631     """
14632     self.needed_locks = {}
14633     if self.op.on_nodes:
14634       # _GetWantedNodes can be used here, but is not always appropriate to use
14635       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14636       # more information.
14637       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14638       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14639
14640   def _TestDelay(self):
14641     """Do the actual sleep.
14642
14643     """
14644     if self.op.on_master:
14645       if not utils.TestDelay(self.op.duration):
14646         raise errors.OpExecError("Error during master delay test")
14647     if self.op.on_nodes:
14648       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14649       for node, node_result in result.items():
14650         node_result.Raise("Failure during rpc call to node %s" % node)
14651
14652   def Exec(self, feedback_fn):
14653     """Execute the test delay opcode, with the wanted repetitions.
14654
14655     """
14656     if self.op.repeat == 0:
14657       self._TestDelay()
14658     else:
14659       top_value = self.op.repeat - 1
14660       for i in range(self.op.repeat):
14661         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14662         self._TestDelay()
14663
14664
14665 class LUTestJqueue(NoHooksLU):
14666   """Utility LU to test some aspects of the job queue.
14667
14668   """
14669   REQ_BGL = False
14670
14671   # Must be lower than default timeout for WaitForJobChange to see whether it
14672   # notices changed jobs
14673   _CLIENT_CONNECT_TIMEOUT = 20.0
14674   _CLIENT_CONFIRM_TIMEOUT = 60.0
14675
14676   @classmethod
14677   def _NotifyUsingSocket(cls, cb, errcls):
14678     """Opens a Unix socket and waits for another program to connect.
14679
14680     @type cb: callable
14681     @param cb: Callback to send socket name to client
14682     @type errcls: class
14683     @param errcls: Exception class to use for errors
14684
14685     """
14686     # Using a temporary directory as there's no easy way to create temporary
14687     # sockets without writing a custom loop around tempfile.mktemp and
14688     # socket.bind
14689     tmpdir = tempfile.mkdtemp()
14690     try:
14691       tmpsock = utils.PathJoin(tmpdir, "sock")
14692
14693       logging.debug("Creating temporary socket at %s", tmpsock)
14694       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14695       try:
14696         sock.bind(tmpsock)
14697         sock.listen(1)
14698
14699         # Send details to client
14700         cb(tmpsock)
14701
14702         # Wait for client to connect before continuing
14703         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14704         try:
14705           (conn, _) = sock.accept()
14706         except socket.error, err:
14707           raise errcls("Client didn't connect in time (%s)" % err)
14708       finally:
14709         sock.close()
14710     finally:
14711       # Remove as soon as client is connected
14712       shutil.rmtree(tmpdir)
14713
14714     # Wait for client to close
14715     try:
14716       try:
14717         # pylint: disable=E1101
14718         # Instance of '_socketobject' has no ... member
14719         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14720         conn.recv(1)
14721       except socket.error, err:
14722         raise errcls("Client failed to confirm notification (%s)" % err)
14723     finally:
14724       conn.close()
14725
14726   def _SendNotification(self, test, arg, sockname):
14727     """Sends a notification to the client.
14728
14729     @type test: string
14730     @param test: Test name
14731     @param arg: Test argument (depends on test)
14732     @type sockname: string
14733     @param sockname: Socket path
14734
14735     """
14736     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14737
14738   def _Notify(self, prereq, test, arg):
14739     """Notifies the client of a test.
14740
14741     @type prereq: bool
14742     @param prereq: Whether this is a prereq-phase test
14743     @type test: string
14744     @param test: Test name
14745     @param arg: Test argument (depends on test)
14746
14747     """
14748     if prereq:
14749       errcls = errors.OpPrereqError
14750     else:
14751       errcls = errors.OpExecError
14752
14753     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14754                                                   test, arg),
14755                                    errcls)
14756
14757   def CheckArguments(self):
14758     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14759     self.expandnames_calls = 0
14760
14761   def ExpandNames(self):
14762     checkargs_calls = getattr(self, "checkargs_calls", 0)
14763     if checkargs_calls < 1:
14764       raise errors.ProgrammerError("CheckArguments was not called")
14765
14766     self.expandnames_calls += 1
14767
14768     if self.op.notify_waitlock:
14769       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14770
14771     self.LogInfo("Expanding names")
14772
14773     # Get lock on master node (just to get a lock, not for a particular reason)
14774     self.needed_locks = {
14775       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14776       }
14777
14778   def Exec(self, feedback_fn):
14779     if self.expandnames_calls < 1:
14780       raise errors.ProgrammerError("ExpandNames was not called")
14781
14782     if self.op.notify_exec:
14783       self._Notify(False, constants.JQT_EXEC, None)
14784
14785     self.LogInfo("Executing")
14786
14787     if self.op.log_messages:
14788       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14789       for idx, msg in enumerate(self.op.log_messages):
14790         self.LogInfo("Sending log message %s", idx + 1)
14791         feedback_fn(constants.JQT_MSGPREFIX + msg)
14792         # Report how many test messages have been sent
14793         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14794
14795     if self.op.fail:
14796       raise errors.OpExecError("Opcode failure was requested")
14797
14798     return True
14799
14800
14801 class IAllocator(object):
14802   """IAllocator framework.
14803
14804   An IAllocator instance has three sets of attributes:
14805     - cfg that is needed to query the cluster
14806     - input data (all members of the _KEYS class attribute are required)
14807     - four buffer attributes (in|out_data|text), that represent the
14808       input (to the external script) in text and data structure format,
14809       and the output from it, again in two formats
14810     - the result variables from the script (success, info, nodes) for
14811       easy usage
14812
14813   """
14814   # pylint: disable=R0902
14815   # lots of instance attributes
14816
14817   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14818     self.cfg = cfg
14819     self.rpc = rpc_runner
14820     # init buffer variables
14821     self.in_text = self.out_text = self.in_data = self.out_data = None
14822     # init all input fields so that pylint is happy
14823     self.mode = mode
14824     self.memory = self.disks = self.disk_template = self.spindle_use = None
14825     self.os = self.tags = self.nics = self.vcpus = None
14826     self.hypervisor = None
14827     self.relocate_from = None
14828     self.name = None
14829     self.instances = None
14830     self.evac_mode = None
14831     self.target_groups = []
14832     # computed fields
14833     self.required_nodes = None
14834     # init result fields
14835     self.success = self.info = self.result = None
14836
14837     try:
14838       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14839     except KeyError:
14840       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14841                                    " IAllocator" % self.mode)
14842
14843     keyset = [n for (n, _) in keydata]
14844
14845     for key in kwargs:
14846       if key not in keyset:
14847         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14848                                      " IAllocator" % key)
14849       setattr(self, key, kwargs[key])
14850
14851     for key in keyset:
14852       if key not in kwargs:
14853         raise errors.ProgrammerError("Missing input parameter '%s' to"
14854                                      " IAllocator" % key)
14855     self._BuildInputData(compat.partial(fn, self), keydata)
14856
14857   def _ComputeClusterData(self):
14858     """Compute the generic allocator input data.
14859
14860     This is the data that is independent of the actual operation.
14861
14862     """
14863     cfg = self.cfg
14864     cluster_info = cfg.GetClusterInfo()
14865     # cluster data
14866     data = {
14867       "version": constants.IALLOCATOR_VERSION,
14868       "cluster_name": cfg.GetClusterName(),
14869       "cluster_tags": list(cluster_info.GetTags()),
14870       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14871       "ipolicy": cluster_info.ipolicy,
14872       }
14873     ninfo = cfg.GetAllNodesInfo()
14874     iinfo = cfg.GetAllInstancesInfo().values()
14875     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14876
14877     # node data
14878     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14879
14880     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14881       hypervisor_name = self.hypervisor
14882     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14883       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14884     else:
14885       hypervisor_name = cluster_info.primary_hypervisor
14886
14887     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14888                                         [hypervisor_name])
14889     node_iinfo = \
14890       self.rpc.call_all_instances_info(node_list,
14891                                        cluster_info.enabled_hypervisors)
14892
14893     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14894
14895     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14896     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14897                                                  i_list, config_ndata)
14898     assert len(data["nodes"]) == len(ninfo), \
14899         "Incomplete node data computed"
14900
14901     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14902
14903     self.in_data = data
14904
14905   @staticmethod
14906   def _ComputeNodeGroupData(cfg):
14907     """Compute node groups data.
14908
14909     """
14910     cluster = cfg.GetClusterInfo()
14911     ng = dict((guuid, {
14912       "name": gdata.name,
14913       "alloc_policy": gdata.alloc_policy,
14914       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14915       })
14916       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14917
14918     return ng
14919
14920   @staticmethod
14921   def _ComputeBasicNodeData(cfg, node_cfg):
14922     """Compute global node data.
14923
14924     @rtype: dict
14925     @returns: a dict of name: (node dict, node config)
14926
14927     """
14928     # fill in static (config-based) values
14929     node_results = dict((ninfo.name, {
14930       "tags": list(ninfo.GetTags()),
14931       "primary_ip": ninfo.primary_ip,
14932       "secondary_ip": ninfo.secondary_ip,
14933       "offline": ninfo.offline,
14934       "drained": ninfo.drained,
14935       "master_candidate": ninfo.master_candidate,
14936       "group": ninfo.group,
14937       "master_capable": ninfo.master_capable,
14938       "vm_capable": ninfo.vm_capable,
14939       "ndparams": cfg.GetNdParams(ninfo),
14940       })
14941       for ninfo in node_cfg.values())
14942
14943     return node_results
14944
14945   @staticmethod
14946   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14947                               node_results):
14948     """Compute global node data.
14949
14950     @param node_results: the basic node structures as filled from the config
14951
14952     """
14953     #TODO(dynmem): compute the right data on MAX and MIN memory
14954     # make a copy of the current dict
14955     node_results = dict(node_results)
14956     for nname, nresult in node_data.items():
14957       assert nname in node_results, "Missing basic data for node %s" % nname
14958       ninfo = node_cfg[nname]
14959
14960       if not (ninfo.offline or ninfo.drained):
14961         nresult.Raise("Can't get data for node %s" % nname)
14962         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14963                                 nname)
14964         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14965
14966         for attr in ["memory_total", "memory_free", "memory_dom0",
14967                      "vg_size", "vg_free", "cpu_total"]:
14968           if attr not in remote_info:
14969             raise errors.OpExecError("Node '%s' didn't return attribute"
14970                                      " '%s'" % (nname, attr))
14971           if not isinstance(remote_info[attr], int):
14972             raise errors.OpExecError("Node '%s' returned invalid value"
14973                                      " for '%s': %s" %
14974                                      (nname, attr, remote_info[attr]))
14975         # compute memory used by primary instances
14976         i_p_mem = i_p_up_mem = 0
14977         for iinfo, beinfo in i_list:
14978           if iinfo.primary_node == nname:
14979             i_p_mem += beinfo[constants.BE_MAXMEM]
14980             if iinfo.name not in node_iinfo[nname].payload:
14981               i_used_mem = 0
14982             else:
14983               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14984             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14985             remote_info["memory_free"] -= max(0, i_mem_diff)
14986
14987             if iinfo.admin_state == constants.ADMINST_UP:
14988               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14989
14990         # compute memory used by instances
14991         pnr_dyn = {
14992           "total_memory": remote_info["memory_total"],
14993           "reserved_memory": remote_info["memory_dom0"],
14994           "free_memory": remote_info["memory_free"],
14995           "total_disk": remote_info["vg_size"],
14996           "free_disk": remote_info["vg_free"],
14997           "total_cpus": remote_info["cpu_total"],
14998           "i_pri_memory": i_p_mem,
14999           "i_pri_up_memory": i_p_up_mem,
15000           }
15001         pnr_dyn.update(node_results[nname])
15002         node_results[nname] = pnr_dyn
15003
15004     return node_results
15005
15006   @staticmethod
15007   def _ComputeInstanceData(cluster_info, i_list):
15008     """Compute global instance data.
15009
15010     """
15011     instance_data = {}
15012     for iinfo, beinfo in i_list:
15013       nic_data = []
15014       for nic in iinfo.nics:
15015         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
15016         nic_dict = {
15017           "mac": nic.mac,
15018           "ip": nic.ip,
15019           "mode": filled_params[constants.NIC_MODE],
15020           "link": filled_params[constants.NIC_LINK],
15021           }
15022         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
15023           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
15024         nic_data.append(nic_dict)
15025       pir = {
15026         "tags": list(iinfo.GetTags()),
15027         "admin_state": iinfo.admin_state,
15028         "vcpus": beinfo[constants.BE_VCPUS],
15029         "memory": beinfo[constants.BE_MAXMEM],
15030         "spindle_use": beinfo[constants.BE_SPINDLE_USE],
15031         "os": iinfo.os,
15032         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
15033         "nics": nic_data,
15034         "disks": [{constants.IDISK_SIZE: dsk.size,
15035                    constants.IDISK_MODE: dsk.mode}
15036                   for dsk in iinfo.disks],
15037         "disk_template": iinfo.disk_template,
15038         "hypervisor": iinfo.hypervisor,
15039         }
15040       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
15041                                                  pir["disks"])
15042       instance_data[iinfo.name] = pir
15043
15044     return instance_data
15045
15046   def _AddNewInstance(self):
15047     """Add new instance data to allocator structure.
15048
15049     This in combination with _AllocatorGetClusterData will create the
15050     correct structure needed as input for the allocator.
15051
15052     The checks for the completeness of the opcode must have already been
15053     done.
15054
15055     """
15056     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
15057
15058     if self.disk_template in constants.DTS_INT_MIRROR:
15059       self.required_nodes = 2
15060     else:
15061       self.required_nodes = 1
15062
15063     request = {
15064       "name": self.name,
15065       "disk_template": self.disk_template,
15066       "tags": self.tags,
15067       "os": self.os,
15068       "vcpus": self.vcpus,
15069       "memory": self.memory,
15070       "spindle_use": self.spindle_use,
15071       "disks": self.disks,
15072       "disk_space_total": disk_space,
15073       "nics": self.nics,
15074       "required_nodes": self.required_nodes,
15075       "hypervisor": self.hypervisor,
15076       }
15077
15078     return request
15079
15080   def _AddRelocateInstance(self):
15081     """Add relocate instance data to allocator structure.
15082
15083     This in combination with _IAllocatorGetClusterData will create the
15084     correct structure needed as input for the allocator.
15085
15086     The checks for the completeness of the opcode must have already been
15087     done.
15088
15089     """
15090     instance = self.cfg.GetInstanceInfo(self.name)
15091     if instance is None:
15092       raise errors.ProgrammerError("Unknown instance '%s' passed to"
15093                                    " IAllocator" % self.name)
15094
15095     if instance.disk_template not in constants.DTS_MIRRORED:
15096       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15097                                  errors.ECODE_INVAL)
15098
15099     if instance.disk_template in constants.DTS_INT_MIRROR and \
15100         len(instance.secondary_nodes) != 1:
15101       raise errors.OpPrereqError("Instance has not exactly one secondary node",
15102                                  errors.ECODE_STATE)
15103
15104     self.required_nodes = 1
15105     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15106     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15107
15108     request = {
15109       "name": self.name,
15110       "disk_space_total": disk_space,
15111       "required_nodes": self.required_nodes,
15112       "relocate_from": self.relocate_from,
15113       }
15114     return request
15115
15116   def _AddNodeEvacuate(self):
15117     """Get data for node-evacuate requests.
15118
15119     """
15120     return {
15121       "instances": self.instances,
15122       "evac_mode": self.evac_mode,
15123       }
15124
15125   def _AddChangeGroup(self):
15126     """Get data for node-evacuate requests.
15127
15128     """
15129     return {
15130       "instances": self.instances,
15131       "target_groups": self.target_groups,
15132       }
15133
15134   def _BuildInputData(self, fn, keydata):
15135     """Build input data structures.
15136
15137     """
15138     self._ComputeClusterData()
15139
15140     request = fn()
15141     request["type"] = self.mode
15142     for keyname, keytype in keydata:
15143       if keyname not in request:
15144         raise errors.ProgrammerError("Request parameter %s is missing" %
15145                                      keyname)
15146       val = request[keyname]
15147       if not keytype(val):
15148         raise errors.ProgrammerError("Request parameter %s doesn't pass"
15149                                      " validation, value %s, expected"
15150                                      " type %s" % (keyname, val, keytype))
15151     self.in_data["request"] = request
15152
15153     self.in_text = serializer.Dump(self.in_data)
15154
15155   _STRING_LIST = ht.TListOf(ht.TString)
15156   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15157      # pylint: disable=E1101
15158      # Class '...' has no 'OP_ID' member
15159      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15160                           opcodes.OpInstanceMigrate.OP_ID,
15161                           opcodes.OpInstanceReplaceDisks.OP_ID])
15162      })))
15163
15164   _NEVAC_MOVED = \
15165     ht.TListOf(ht.TAnd(ht.TIsLength(3),
15166                        ht.TItems([ht.TNonEmptyString,
15167                                   ht.TNonEmptyString,
15168                                   ht.TListOf(ht.TNonEmptyString),
15169                                  ])))
15170   _NEVAC_FAILED = \
15171     ht.TListOf(ht.TAnd(ht.TIsLength(2),
15172                        ht.TItems([ht.TNonEmptyString,
15173                                   ht.TMaybeString,
15174                                  ])))
15175   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15176                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15177
15178   _MODE_DATA = {
15179     constants.IALLOCATOR_MODE_ALLOC:
15180       (_AddNewInstance,
15181        [
15182         ("name", ht.TString),
15183         ("memory", ht.TInt),
15184         ("spindle_use", ht.TInt),
15185         ("disks", ht.TListOf(ht.TDict)),
15186         ("disk_template", ht.TString),
15187         ("os", ht.TString),
15188         ("tags", _STRING_LIST),
15189         ("nics", ht.TListOf(ht.TDict)),
15190         ("vcpus", ht.TInt),
15191         ("hypervisor", ht.TString),
15192         ], ht.TList),
15193     constants.IALLOCATOR_MODE_RELOC:
15194       (_AddRelocateInstance,
15195        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15196        ht.TList),
15197      constants.IALLOCATOR_MODE_NODE_EVAC:
15198       (_AddNodeEvacuate, [
15199         ("instances", _STRING_LIST),
15200         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15201         ], _NEVAC_RESULT),
15202      constants.IALLOCATOR_MODE_CHG_GROUP:
15203       (_AddChangeGroup, [
15204         ("instances", _STRING_LIST),
15205         ("target_groups", _STRING_LIST),
15206         ], _NEVAC_RESULT),
15207     }
15208
15209   def Run(self, name, validate=True, call_fn=None):
15210     """Run an instance allocator and return the results.
15211
15212     """
15213     if call_fn is None:
15214       call_fn = self.rpc.call_iallocator_runner
15215
15216     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15217     result.Raise("Failure while running the iallocator script")
15218
15219     self.out_text = result.payload
15220     if validate:
15221       self._ValidateResult()
15222
15223   def _ValidateResult(self):
15224     """Process the allocator results.
15225
15226     This will process and if successful save the result in
15227     self.out_data and the other parameters.
15228
15229     """
15230     try:
15231       rdict = serializer.Load(self.out_text)
15232     except Exception, err:
15233       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15234
15235     if not isinstance(rdict, dict):
15236       raise errors.OpExecError("Can't parse iallocator results: not a dict")
15237
15238     # TODO: remove backwards compatiblity in later versions
15239     if "nodes" in rdict and "result" not in rdict:
15240       rdict["result"] = rdict["nodes"]
15241       del rdict["nodes"]
15242
15243     for key in "success", "info", "result":
15244       if key not in rdict:
15245         raise errors.OpExecError("Can't parse iallocator results:"
15246                                  " missing key '%s'" % key)
15247       setattr(self, key, rdict[key])
15248
15249     if not self._result_check(self.result):
15250       raise errors.OpExecError("Iallocator returned invalid result,"
15251                                " expected %s, got %s" %
15252                                (self._result_check, self.result),
15253                                errors.ECODE_INVAL)
15254
15255     if self.mode == constants.IALLOCATOR_MODE_RELOC:
15256       assert self.relocate_from is not None
15257       assert self.required_nodes == 1
15258
15259       node2group = dict((name, ndata["group"])
15260                         for (name, ndata) in self.in_data["nodes"].items())
15261
15262       fn = compat.partial(self._NodesToGroups, node2group,
15263                           self.in_data["nodegroups"])
15264
15265       instance = self.cfg.GetInstanceInfo(self.name)
15266       request_groups = fn(self.relocate_from + [instance.primary_node])
15267       result_groups = fn(rdict["result"] + [instance.primary_node])
15268
15269       if self.success and not set(result_groups).issubset(request_groups):
15270         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15271                                  " differ from original groups (%s)" %
15272                                  (utils.CommaJoin(result_groups),
15273                                   utils.CommaJoin(request_groups)))
15274
15275     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15276       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15277
15278     self.out_data = rdict
15279
15280   @staticmethod
15281   def _NodesToGroups(node2group, groups, nodes):
15282     """Returns a list of unique group names for a list of nodes.
15283
15284     @type node2group: dict
15285     @param node2group: Map from node name to group UUID
15286     @type groups: dict
15287     @param groups: Group information
15288     @type nodes: list
15289     @param nodes: Node names
15290
15291     """
15292     result = set()
15293
15294     for node in nodes:
15295       try:
15296         group_uuid = node2group[node]
15297       except KeyError:
15298         # Ignore unknown node
15299         pass
15300       else:
15301         try:
15302           group = groups[group_uuid]
15303         except KeyError:
15304           # Can't find group, let's use UUID
15305           group_name = group_uuid
15306         else:
15307           group_name = group["name"]
15308
15309         result.add(group_name)
15310
15311     return sorted(result)
15312
15313
15314 class LUTestAllocator(NoHooksLU):
15315   """Run allocator tests.
15316
15317   This LU runs the allocator tests
15318
15319   """
15320   def CheckPrereq(self):
15321     """Check prerequisites.
15322
15323     This checks the opcode parameters depending on the director and mode test.
15324
15325     """
15326     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15327       for attr in ["memory", "disks", "disk_template",
15328                    "os", "tags", "nics", "vcpus"]:
15329         if not hasattr(self.op, attr):
15330           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15331                                      attr, errors.ECODE_INVAL)
15332       iname = self.cfg.ExpandInstanceName(self.op.name)
15333       if iname is not None:
15334         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15335                                    iname, errors.ECODE_EXISTS)
15336       if not isinstance(self.op.nics, list):
15337         raise errors.OpPrereqError("Invalid parameter 'nics'",
15338                                    errors.ECODE_INVAL)
15339       if not isinstance(self.op.disks, list):
15340         raise errors.OpPrereqError("Invalid parameter 'disks'",
15341                                    errors.ECODE_INVAL)
15342       for row in self.op.disks:
15343         if (not isinstance(row, dict) or
15344             constants.IDISK_SIZE not in row or
15345             not isinstance(row[constants.IDISK_SIZE], int) or
15346             constants.IDISK_MODE not in row or
15347             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15348           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15349                                      " parameter", errors.ECODE_INVAL)
15350       if self.op.hypervisor is None:
15351         self.op.hypervisor = self.cfg.GetHypervisorType()
15352     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15353       fname = _ExpandInstanceName(self.cfg, self.op.name)
15354       self.op.name = fname
15355       self.relocate_from = \
15356           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15357     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15358                           constants.IALLOCATOR_MODE_NODE_EVAC):
15359       if not self.op.instances:
15360         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15361       self.op.instances = _GetWantedInstances(self, self.op.instances)
15362     else:
15363       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15364                                  self.op.mode, errors.ECODE_INVAL)
15365
15366     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15367       if self.op.allocator is None:
15368         raise errors.OpPrereqError("Missing allocator name",
15369                                    errors.ECODE_INVAL)
15370     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15371       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15372                                  self.op.direction, errors.ECODE_INVAL)
15373
15374   def Exec(self, feedback_fn):
15375     """Run the allocator test.
15376
15377     """
15378     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15379       ial = IAllocator(self.cfg, self.rpc,
15380                        mode=self.op.mode,
15381                        name=self.op.name,
15382                        memory=self.op.memory,
15383                        disks=self.op.disks,
15384                        disk_template=self.op.disk_template,
15385                        os=self.op.os,
15386                        tags=self.op.tags,
15387                        nics=self.op.nics,
15388                        vcpus=self.op.vcpus,
15389                        hypervisor=self.op.hypervisor,
15390                        )
15391     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15392       ial = IAllocator(self.cfg, self.rpc,
15393                        mode=self.op.mode,
15394                        name=self.op.name,
15395                        relocate_from=list(self.relocate_from),
15396                        )
15397     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15398       ial = IAllocator(self.cfg, self.rpc,
15399                        mode=self.op.mode,
15400                        instances=self.op.instances,
15401                        target_groups=self.op.target_groups)
15402     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15403       ial = IAllocator(self.cfg, self.rpc,
15404                        mode=self.op.mode,
15405                        instances=self.op.instances,
15406                        evac_mode=self.op.evac_mode)
15407     else:
15408       raise errors.ProgrammerError("Uncatched mode %s in"
15409                                    " LUTestAllocator.Exec", self.op.mode)
15410
15411     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15412       result = ial.in_text
15413     else:
15414       ial.Run(self.op.allocator, validate=False)
15415       result = ial.out_text
15416     return result
15417
15418
15419 #: Query type implementations
15420 _QUERY_IMPL = {
15421   constants.QR_CLUSTER: _ClusterQuery,
15422   constants.QR_INSTANCE: _InstanceQuery,
15423   constants.QR_NODE: _NodeQuery,
15424   constants.QR_GROUP: _GroupQuery,
15425   constants.QR_OS: _OsQuery,
15426   constants.QR_EXPORT: _ExportQuery,
15427   }
15428
15429 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15430
15431
15432 def _GetQueryImplementation(name):
15433   """Returns the implemtnation for a query type.
15434
15435   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15436
15437   """
15438   try:
15439     return _QUERY_IMPL[name]
15440   except KeyError:
15441     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15442                                errors.ECODE_INVAL)