code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60 from ganeti import ht
  61 from ganeti import rpc
  62 from ganeti import runtime
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   #: Field to sort by
 497   SORT_FIELD = "name"
 498
 499   def __init__(self, qfilter, fields, use_locking):
 500     """Initializes this class.
 501
 502     """
 503     self.use_locking = use_locking
 504
 505     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 506                              namefield=self.SORT_FIELD)
 507     self.requested_data = self.query.RequestedData()
 508     self.names = self.query.RequestedNames()
 509
 510     # Sort only if no names were requested
 511     self.sort_by_name = not self.names
 512
 513     self.do_locking = None
 514     self.wanted = None
 515
 516   def _GetNames(self, lu, all_names, lock_level):
 517     """Helper function to determine names asked for in the query.
 518
 519     """
 520     if self.do_locking:
 521       names = lu.owned_locks(lock_level)
 522     else:
 523       names = all_names
 524
 525     if self.wanted == locking.ALL_SET:
 526       assert not self.names
 527       # caller didn't specify names, so ordering is not important
 528       return utils.NiceSort(names)
 529
 530     # caller specified names and we must keep the same order
 531     assert self.names
 532     assert not self.do_locking or lu.glm.is_owned(lock_level)
 533
 534     missing = set(self.wanted).difference(names)
 535     if missing:
 536       raise errors.OpExecError("Some items were removed before retrieving"
 537                                " their data: %s" % missing)
 538
 539     # Return expanded names
 540     return self.wanted
 541
 542   def ExpandNames(self, lu):
 543     """Expand names for this query.
 544
 545     See L{LogicalUnit.ExpandNames}.
 546
 547     """
 548     raise NotImplementedError()
 549
 550   def DeclareLocks(self, lu, level):
 551     """Declare locks for this query.
 552
 553     See L{LogicalUnit.DeclareLocks}.
 554
 555     """
 556     raise NotImplementedError()
 557
 558   def _GetQueryData(self, lu):
 559     """Collects all data for this query.
 560
 561     @return: Query data object
 562
 563     """
 564     raise NotImplementedError()
 565
 566   def NewStyleQuery(self, lu):
 567     """Collect data and execute query.
 568
 569     """
 570     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 571                                   sort_by_name=self.sort_by_name)
 572
 573   def OldStyleQuery(self, lu):
 574     """Collect data and execute query.
 575
 576     """
 577     return self.query.OldStyleQuery(self._GetQueryData(lu),
 578                                     sort_by_name=self.sort_by_name)
 579
 580
 581 def _ShareAll():
 582   """Returns a dict declaring all lock levels shared.
 583
 584   """
 585   return dict.fromkeys(locking.LEVELS, 1)
 586
 587
 588 def _MakeLegacyNodeInfo(data):
 589   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 590
 591   Converts the data into a single dictionary. This is fine for most use cases,
 592   but some require information from more than one volume group or hypervisor.
 593
 594   """
 595   (bootid, (vg_info, ), (hv_info, )) = data
 596
 597   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 598     "bootid": bootid,
 599     })
 600
 601
 602 def _AnnotateDiskParams(instance, devs, cfg):
 603   """Little helper wrapper to the rpc annotation method.
 604
 605   @param instance: The instance object
 606   @type devs: List of L{objects.Disk}
 607   @param devs: The root devices (not any of its children!)
 608   @param cfg: The config object
 609   @returns The annotated disk copies
 610   @see L{rpc.AnnotateDiskParams}
 611
 612   """
 613   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 614                                 cfg.GetInstanceDiskParams(instance))
 615
 616
 617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 618                               cur_group_uuid):
 619   """Checks if node groups for locked instances are still correct.
 620
 621   @type cfg: L{config.ConfigWriter}
 622   @param cfg: Cluster configuration
 623   @type instances: dict; string as key, L{objects.Instance} as value
 624   @param instances: Dictionary, instance name as key, instance object as value
 625   @type owned_groups: iterable of string
 626   @param owned_groups: List of owned groups
 627   @type owned_nodes: iterable of string
 628   @param owned_nodes: List of owned nodes
 629   @type cur_group_uuid: string or None
 630   @param cur_group_uuid: Optional group UUID to check against instance's groups
 631
 632   """
 633   for (name, inst) in instances.items():
 634     assert owned_nodes.issuperset(inst.all_nodes), \
 635       "Instance %s's nodes changed while we kept the lock" % name
 636
 637     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 638
 639     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 640       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 641
 642
 643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 644   """Checks if the owned node groups are still correct for an instance.
 645
 646   @type cfg: L{config.ConfigWriter}
 647   @param cfg: The cluster configuration
 648   @type instance_name: string
 649   @param instance_name: Instance name
 650   @type owned_groups: set or frozenset
 651   @param owned_groups: List of currently owned node groups
 652
 653   """
 654   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 655
 656   if not owned_groups.issuperset(inst_groups):
 657     raise errors.OpPrereqError("Instance %s's node groups changed since"
 658                                " locks were acquired, current groups are"
 659                                " are '%s', owning groups '%s'; retry the"
 660                                " operation" %
 661                                (instance_name,
 662                                 utils.CommaJoin(inst_groups),
 663                                 utils.CommaJoin(owned_groups)),
 664                                errors.ECODE_STATE)
 665
 666   return inst_groups
 667
 668
 669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 670   """Checks if the instances in a node group are still correct.
 671
 672   @type cfg: L{config.ConfigWriter}
 673   @param cfg: The cluster configuration
 674   @type group_uuid: string
 675   @param group_uuid: Node group UUID
 676   @type owned_instances: set or frozenset
 677   @param owned_instances: List of currently owned instances
 678
 679   """
 680   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 681   if owned_instances != wanted_instances:
 682     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 683                                " locks were acquired, wanted '%s', have '%s';"
 684                                " retry the operation" %
 685                                (group_uuid,
 686                                 utils.CommaJoin(wanted_instances),
 687                                 utils.CommaJoin(owned_instances)),
 688                                errors.ECODE_STATE)
 689
 690   return wanted_instances
 691
 692
 693 def _SupportsOob(cfg, node):
 694   """Tells if node supports OOB.
 695
 696   @type cfg: L{config.ConfigWriter}
 697   @param cfg: The cluster configuration
 698   @type node: L{objects.Node}
 699   @param node: The node
 700   @return: The OOB script if supported or an empty string otherwise
 701
 702   """
 703   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 704
 705
 706 def _GetWantedNodes(lu, nodes):
 707   """Returns list of checked and expanded node names.
 708
 709   @type lu: L{LogicalUnit}
 710   @param lu: the logical unit on whose behalf we execute
 711   @type nodes: list
 712   @param nodes: list of node names or None for all nodes
 713   @rtype: list
 714   @return: the list of nodes, sorted
 715   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 716
 717   """
 718   if nodes:
 719     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 720
 721   return utils.NiceSort(lu.cfg.GetNodeList())
 722
 723
 724 def _GetWantedInstances(lu, instances):
 725   """Returns list of checked and expanded instance names.
 726
 727   @type lu: L{LogicalUnit}
 728   @param lu: the logical unit on whose behalf we execute
 729   @type instances: list
 730   @param instances: list of instance names or None for all instances
 731   @rtype: list
 732   @return: the list of instances, sorted
 733   @raise errors.OpPrereqError: if the instances parameter is wrong type
 734   @raise errors.OpPrereqError: if any of the passed instances is not found
 735
 736   """
 737   if instances:
 738     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 739   else:
 740     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 741   return wanted
 742
 743
 744 def _GetUpdatedParams(old_params, update_dict,
 745                       use_default=True, use_none=False):
 746   """Return the new version of a parameter dictionary.
 747
 748   @type old_params: dict
 749   @param old_params: old parameters
 750   @type update_dict: dict
 751   @param update_dict: dict containing new parameter values, or
 752       constants.VALUE_DEFAULT to reset the parameter to its default
 753       value
 754   @param use_default: boolean
 755   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 756       values as 'to be deleted' values
 757   @param use_none: boolean
 758   @type use_none: whether to recognise C{None} values as 'to be
 759       deleted' values
 760   @rtype: dict
 761   @return: the new parameter dictionary
 762
 763   """
 764   params_copy = copy.deepcopy(old_params)
 765   for key, val in update_dict.iteritems():
 766     if ((use_default and val == constants.VALUE_DEFAULT) or
 767         (use_none and val is None)):
 768       try:
 769         del params_copy[key]
 770       except KeyError:
 771         pass
 772     else:
 773       params_copy[key] = val
 774   return params_copy
 775
 776
 777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 778   """Return the new version of a instance policy.
 779
 780   @param group_policy: whether this policy applies to a group and thus
 781     we should support removal of policy entries
 782
 783   """
 784   use_none = use_default = group_policy
 785   ipolicy = copy.deepcopy(old_ipolicy)
 786   for key, value in new_ipolicy.items():
 787     if key not in constants.IPOLICY_ALL_KEYS:
 788       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 789                                  errors.ECODE_INVAL)
 790     if key in constants.IPOLICY_ISPECS:
 791       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 792       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 793                                        use_none=use_none,
 794                                        use_default=use_default)
 795     else:
 796       if not value or value == [constants.VALUE_DEFAULT]:
 797         if group_policy:
 798           del ipolicy[key]
 799         else:
 800           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 801                                      " on the cluster'" % key,
 802                                      errors.ECODE_INVAL)
 803       else:
 804         if key in constants.IPOLICY_PARAMETERS:
 805           # FIXME: we assume all such values are float
 806           try:
 807             ipolicy[key] = float(value)
 808           except (TypeError, ValueError), err:
 809             raise errors.OpPrereqError("Invalid value for attribute"
 810                                        " '%s': '%s', error: %s" %
 811                                        (key, value, err), errors.ECODE_INVAL)
 812         else:
 813           # FIXME: we assume all others are lists; this should be redone
 814           # in a nicer way
 815           ipolicy[key] = list(value)
 816   try:
 817     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 818   except errors.ConfigurationError, err:
 819     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 820                                errors.ECODE_INVAL)
 821   return ipolicy
 822
 823
 824 def _UpdateAndVerifySubDict(base, updates, type_check):
 825   """Updates and verifies a dict with sub dicts of the same type.
 826
 827   @param base: The dict with the old data
 828   @param updates: The dict with the new data
 829   @param type_check: Dict suitable to ForceDictType to verify correct types
 830   @returns: A new dict with updated and verified values
 831
 832   """
 833   def fn(old, value):
 834     new = _GetUpdatedParams(old, value)
 835     utils.ForceDictType(new, type_check)
 836     return new
 837
 838   ret = copy.deepcopy(base)
 839   ret.update(dict((key, fn(base.get(key, {}), value))
 840                   for key, value in updates.items()))
 841   return ret
 842
 843
 844 def _MergeAndVerifyHvState(op_input, obj_input):
 845   """Combines the hv state from an opcode with the one of the object
 846
 847   @param op_input: The input dict from the opcode
 848   @param obj_input: The input dict from the objects
 849   @return: The verified and updated dict
 850
 851   """
 852   if op_input:
 853     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 854     if invalid_hvs:
 855       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 856                                  " %s" % utils.CommaJoin(invalid_hvs),
 857                                  errors.ECODE_INVAL)
 858     if obj_input is None:
 859       obj_input = {}
 860     type_check = constants.HVSTS_PARAMETER_TYPES
 861     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 862
 863   return None
 864
 865
 866 def _MergeAndVerifyDiskState(op_input, obj_input):
 867   """Combines the disk state from an opcode with the one of the object
 868
 869   @param op_input: The input dict from the opcode
 870   @param obj_input: The input dict from the objects
 871   @return: The verified and updated dict
 872   """
 873   if op_input:
 874     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 875     if invalid_dst:
 876       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 877                                  utils.CommaJoin(invalid_dst),
 878                                  errors.ECODE_INVAL)
 879     type_check = constants.DSS_PARAMETER_TYPES
 880     if obj_input is None:
 881       obj_input = {}
 882     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 883                                               type_check))
 884                 for key, value in op_input.items())
 885
 886   return None
 887
 888
 889 def _ReleaseLocks(lu, level, names=None, keep=None):
 890   """Releases locks owned by an LU.
 891
 892   @type lu: L{LogicalUnit}
 893   @param level: Lock level
 894   @type names: list or None
 895   @param names: Names of locks to release
 896   @type keep: list or None
 897   @param keep: Names of locks to retain
 898
 899   """
 900   assert not (keep is not None and names is not None), \
 901          "Only one of the 'names' and the 'keep' parameters can be given"
 902
 903   if names is not None:
 904     should_release = names.__contains__
 905   elif keep:
 906     should_release = lambda name: name not in keep
 907   else:
 908     should_release = None
 909
 910   owned = lu.owned_locks(level)
 911   if not owned:
 912     # Not owning any lock at this level, do nothing
 913     pass
 914
 915   elif should_release:
 916     retain = []
 917     release = []
 918
 919     # Determine which locks to release
 920     for name in owned:
 921       if should_release(name):
 922         release.append(name)
 923       else:
 924         retain.append(name)
 925
 926     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 927
 928     # Release just some locks
 929     lu.glm.release(level, names=release)
 930
 931     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 932   else:
 933     # Release everything
 934     lu.glm.release(level)
 935
 936     assert not lu.glm.is_owned(level), "No locks should be owned"
 937
 938
 939 def _MapInstanceDisksToNodes(instances):
 940   """Creates a map from (node, volume) to instance name.
 941
 942   @type instances: list of L{objects.Instance}
 943   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 944
 945   """
 946   return dict(((node, vol), inst.name)
 947               for inst in instances
 948               for (node, vols) in inst.MapLVsByNode().items()
 949               for vol in vols)
 950
 951
 952 def _RunPostHook(lu, node_name):
 953   """Runs the post-hook for an opcode on a single node.
 954
 955   """
 956   hm = lu.proc.BuildHooksManager(lu)
 957   try:
 958     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 959   except:
 960     # pylint: disable=W0702
 961     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 962
 963
 964 def _CheckOutputFields(static, dynamic, selected):
 965   """Checks whether all selected fields are valid.
 966
 967   @type static: L{utils.FieldSet}
 968   @param static: static fields set
 969   @type dynamic: L{utils.FieldSet}
 970   @param dynamic: dynamic fields set
 971
 972   """
 973   f = utils.FieldSet()
 974   f.Extend(static)
 975   f.Extend(dynamic)
 976
 977   delta = f.NonMatching(selected)
 978   if delta:
 979     raise errors.OpPrereqError("Unknown output fields selected: %s"
 980                                % ",".join(delta), errors.ECODE_INVAL)
 981
 982
 983 def _CheckGlobalHvParams(params):
 984   """Validates that given hypervisor params are not global ones.
 985
 986   This will ensure that instances don't get customised versions of
 987   global params.
 988
 989   """
 990   used_globals = constants.HVC_GLOBALS.intersection(params)
 991   if used_globals:
 992     msg = ("The following hypervisor parameters are global and cannot"
 993            " be customized at instance level, please modify them at"
 994            " cluster level: %s" % utils.CommaJoin(used_globals))
 995     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 996
 997
 998 def _CheckNodeOnline(lu, node, msg=None):
 999   """Ensure that a given node is online.
1000
1001   @param lu: the LU on behalf of which we make the check
1002   @param node: the node to check
1003   @param msg: if passed, should be a message to replace the default one
1004   @raise errors.OpPrereqError: if the node is offline
1005
1006   """
1007   if msg is None:
1008     msg = "Can't use offline node"
1009   if lu.cfg.GetNodeInfo(node).offline:
1010     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1011
1012
1013 def _CheckNodeNotDrained(lu, node):
1014   """Ensure that a given node is not drained.
1015
1016   @param lu: the LU on behalf of which we make the check
1017   @param node: the node to check
1018   @raise errors.OpPrereqError: if the node is drained
1019
1020   """
1021   if lu.cfg.GetNodeInfo(node).drained:
1022     raise errors.OpPrereqError("Can't use drained node %s" % node,
1023                                errors.ECODE_STATE)
1024
1025
1026 def _CheckNodeVmCapable(lu, node):
1027   """Ensure that a given node is vm capable.
1028
1029   @param lu: the LU on behalf of which we make the check
1030   @param node: the node to check
1031   @raise errors.OpPrereqError: if the node is not vm capable
1032
1033   """
1034   if not lu.cfg.GetNodeInfo(node).vm_capable:
1035     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1036                                errors.ECODE_STATE)
1037
1038
1039 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1040   """Ensure that a node supports a given OS.
1041
1042   @param lu: the LU on behalf of which we make the check
1043   @param node: the node to check
1044   @param os_name: the OS to query about
1045   @param force_variant: whether to ignore variant errors
1046   @raise errors.OpPrereqError: if the node is not supporting the OS
1047
1048   """
1049   result = lu.rpc.call_os_get(node, os_name)
1050   result.Raise("OS '%s' not in supported OS list for node %s" %
1051                (os_name, node),
1052                prereq=True, ecode=errors.ECODE_INVAL)
1053   if not force_variant:
1054     _CheckOSVariant(result.payload, os_name)
1055
1056
1057 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1058   """Ensure that a node has the given secondary ip.
1059
1060   @type lu: L{LogicalUnit}
1061   @param lu: the LU on behalf of which we make the check
1062   @type node: string
1063   @param node: the node to check
1064   @type secondary_ip: string
1065   @param secondary_ip: the ip to check
1066   @type prereq: boolean
1067   @param prereq: whether to throw a prerequisite or an execute error
1068   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1069   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1070
1071   """
1072   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1073   result.Raise("Failure checking secondary ip on node %s" % node,
1074                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1075   if not result.payload:
1076     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1077            " please fix and re-run this command" % secondary_ip)
1078     if prereq:
1079       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1080     else:
1081       raise errors.OpExecError(msg)
1082
1083
1084 def _GetClusterDomainSecret():
1085   """Reads the cluster domain secret.
1086
1087   """
1088   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1089                                strict=True)
1090
1091
1092 def _CheckInstanceState(lu, instance, req_states, msg=None):
1093   """Ensure that an instance is in one of the required states.
1094
1095   @param lu: the LU on behalf of which we make the check
1096   @param instance: the instance to check
1097   @param msg: if passed, should be a message to replace the default one
1098   @raise errors.OpPrereqError: if the instance is not in the required state
1099
1100   """
1101   if msg is None:
1102     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1103   if instance.admin_state not in req_states:
1104     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1105                                (instance.name, instance.admin_state, msg),
1106                                errors.ECODE_STATE)
1107
1108   if constants.ADMINST_UP not in req_states:
1109     pnode = instance.primary_node
1110     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1111     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1112                 prereq=True, ecode=errors.ECODE_ENVIRON)
1113
1114     if instance.name in ins_l.payload:
1115       raise errors.OpPrereqError("Instance %s is running, %s" %
1116                                  (instance.name, msg), errors.ECODE_STATE)
1117
1118
1119 def _ComputeMinMaxSpec(name, ipolicy, value):
1120   """Computes if value is in the desired range.
1121
1122   @param name: name of the parameter for which we perform the check
1123   @param ipolicy: dictionary containing min, max and std values
1124   @param value: actual value that we want to use
1125   @return: None or element not meeting the criteria
1126
1127
1128   """
1129   if value in [None, constants.VALUE_AUTO]:
1130     return None
1131   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1132   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1133   if value > max_v or min_v > value:
1134     return ("%s value %s is not in range [%s, %s]" %
1135             (name, value, min_v, max_v))
1136   return None
1137
1138
1139 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1140                                  nic_count, disk_sizes, spindle_use,
1141                                  _compute_fn=_ComputeMinMaxSpec):
1142   """Verifies ipolicy against provided specs.
1143
1144   @type ipolicy: dict
1145   @param ipolicy: The ipolicy
1146   @type mem_size: int
1147   @param mem_size: The memory size
1148   @type cpu_count: int
1149   @param cpu_count: Used cpu cores
1150   @type disk_count: int
1151   @param disk_count: Number of disks used
1152   @type nic_count: int
1153   @param nic_count: Number of nics used
1154   @type disk_sizes: list of ints
1155   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1156   @type spindle_use: int
1157   @param spindle_use: The number of spindles this instance uses
1158   @param _compute_fn: The compute function (unittest only)
1159   @return: A list of violations, or an empty list of no violations are found
1160
1161   """
1162   assert disk_count == len(disk_sizes)
1163
1164   test_settings = [
1165     (constants.ISPEC_MEM_SIZE, mem_size),
1166     (constants.ISPEC_CPU_COUNT, cpu_count),
1167     (constants.ISPEC_DISK_COUNT, disk_count),
1168     (constants.ISPEC_NIC_COUNT, nic_count),
1169     (constants.ISPEC_SPINDLE_USE, spindle_use),
1170     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1171
1172   return filter(None,
1173                 (_compute_fn(name, ipolicy, value)
1174                  for (name, value) in test_settings))
1175
1176
1177 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1178                                      _compute_fn=_ComputeIPolicySpecViolation):
1179   """Compute if instance meets the specs of ipolicy.
1180
1181   @type ipolicy: dict
1182   @param ipolicy: The ipolicy to verify against
1183   @type instance: L{objects.Instance}
1184   @param instance: The instance to verify
1185   @param _compute_fn: The function to verify ipolicy (unittest only)
1186   @see: L{_ComputeIPolicySpecViolation}
1187
1188   """
1189   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1190   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1191   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1192   disk_count = len(instance.disks)
1193   disk_sizes = [disk.size for disk in instance.disks]
1194   nic_count = len(instance.nics)
1195
1196   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1197                      disk_sizes, spindle_use)
1198
1199
1200 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1201     _compute_fn=_ComputeIPolicySpecViolation):
1202   """Compute if instance specs meets the specs of ipolicy.
1203
1204   @type ipolicy: dict
1205   @param ipolicy: The ipolicy to verify against
1206   @param instance_spec: dict
1207   @param instance_spec: The instance spec to verify
1208   @param _compute_fn: The function to verify ipolicy (unittest only)
1209   @see: L{_ComputeIPolicySpecViolation}
1210
1211   """
1212   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1213   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1214   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1215   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1216   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1217   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1218
1219   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1220                      disk_sizes, spindle_use)
1221
1222
1223 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1224                                  target_group,
1225                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1226   """Compute if instance meets the specs of the new target group.
1227
1228   @param ipolicy: The ipolicy to verify
1229   @param instance: The instance object to verify
1230   @param current_group: The current group of the instance
1231   @param target_group: The new group of the instance
1232   @param _compute_fn: The function to verify ipolicy (unittest only)
1233   @see: L{_ComputeIPolicySpecViolation}
1234
1235   """
1236   if current_group == target_group:
1237     return []
1238   else:
1239     return _compute_fn(ipolicy, instance)
1240
1241
1242 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1243                             _compute_fn=_ComputeIPolicyNodeViolation):
1244   """Checks that the target node is correct in terms of instance policy.
1245
1246   @param ipolicy: The ipolicy to verify
1247   @param instance: The instance object to verify
1248   @param node: The new node to relocate
1249   @param ignore: Ignore violations of the ipolicy
1250   @param _compute_fn: The function to verify ipolicy (unittest only)
1251   @see: L{_ComputeIPolicySpecViolation}
1252
1253   """
1254   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1255   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1256
1257   if res:
1258     msg = ("Instance does not meet target node group's (%s) instance"
1259            " policy: %s") % (node.group, utils.CommaJoin(res))
1260     if ignore:
1261       lu.LogWarning(msg)
1262     else:
1263       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1264
1265
1266 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1267   """Computes a set of any instances that would violate the new ipolicy.
1268
1269   @param old_ipolicy: The current (still in-place) ipolicy
1270   @param new_ipolicy: The new (to become) ipolicy
1271   @param instances: List of instances to verify
1272   @return: A list of instances which violates the new ipolicy but did not before
1273
1274   """
1275   return (_ComputeViolatingInstances(old_ipolicy, instances) -
1276           _ComputeViolatingInstances(new_ipolicy, instances))
1277
1278
1279 def _ExpandItemName(fn, name, kind):
1280   """Expand an item name.
1281
1282   @param fn: the function to use for expansion
1283   @param name: requested item name
1284   @param kind: text description ('Node' or 'Instance')
1285   @return: the resolved (full) name
1286   @raise errors.OpPrereqError: if the item is not found
1287
1288   """
1289   full_name = fn(name)
1290   if full_name is None:
1291     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1292                                errors.ECODE_NOENT)
1293   return full_name
1294
1295
1296 def _ExpandNodeName(cfg, name):
1297   """Wrapper over L{_ExpandItemName} for nodes."""
1298   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1299
1300
1301 def _ExpandInstanceName(cfg, name):
1302   """Wrapper over L{_ExpandItemName} for instance."""
1303   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1304
1305
1306 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1307                           minmem, maxmem, vcpus, nics, disk_template, disks,
1308                           bep, hvp, hypervisor_name, tags):
1309   """Builds instance related env variables for hooks
1310
1311   This builds the hook environment from individual variables.
1312
1313   @type name: string
1314   @param name: the name of the instance
1315   @type primary_node: string
1316   @param primary_node: the name of the instance's primary node
1317   @type secondary_nodes: list
1318   @param secondary_nodes: list of secondary nodes as strings
1319   @type os_type: string
1320   @param os_type: the name of the instance's OS
1321   @type status: string
1322   @param status: the desired status of the instance
1323   @type minmem: string
1324   @param minmem: the minimum memory size of the instance
1325   @type maxmem: string
1326   @param maxmem: the maximum memory size of the instance
1327   @type vcpus: string
1328   @param vcpus: the count of VCPUs the instance has
1329   @type nics: list
1330   @param nics: list of tuples (ip, mac, mode, link) representing
1331       the NICs the instance has
1332   @type disk_template: string
1333   @param disk_template: the disk template of the instance
1334   @type disks: list
1335   @param disks: the list of (size, mode) pairs
1336   @type bep: dict
1337   @param bep: the backend parameters for the instance
1338   @type hvp: dict
1339   @param hvp: the hypervisor parameters for the instance
1340   @type hypervisor_name: string
1341   @param hypervisor_name: the hypervisor for the instance
1342   @type tags: list
1343   @param tags: list of instance tags as strings
1344   @rtype: dict
1345   @return: the hook environment for this instance
1346
1347   """
1348   env = {
1349     "OP_TARGET": name,
1350     "INSTANCE_NAME": name,
1351     "INSTANCE_PRIMARY": primary_node,
1352     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1353     "INSTANCE_OS_TYPE": os_type,
1354     "INSTANCE_STATUS": status,
1355     "INSTANCE_MINMEM": minmem,
1356     "INSTANCE_MAXMEM": maxmem,
1357     # TODO(2.7) remove deprecated "memory" value
1358     "INSTANCE_MEMORY": maxmem,
1359     "INSTANCE_VCPUS": vcpus,
1360     "INSTANCE_DISK_TEMPLATE": disk_template,
1361     "INSTANCE_HYPERVISOR": hypervisor_name,
1362   }
1363   if nics:
1364     nic_count = len(nics)
1365     for idx, (ip, mac, mode, link) in enumerate(nics):
1366       if ip is None:
1367         ip = ""
1368       env["INSTANCE_NIC%d_IP" % idx] = ip
1369       env["INSTANCE_NIC%d_MAC" % idx] = mac
1370       env["INSTANCE_NIC%d_MODE" % idx] = mode
1371       env["INSTANCE_NIC%d_LINK" % idx] = link
1372       if mode == constants.NIC_MODE_BRIDGED:
1373         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1374   else:
1375     nic_count = 0
1376
1377   env["INSTANCE_NIC_COUNT"] = nic_count
1378
1379   if disks:
1380     disk_count = len(disks)
1381     for idx, (size, mode) in enumerate(disks):
1382       env["INSTANCE_DISK%d_SIZE" % idx] = size
1383       env["INSTANCE_DISK%d_MODE" % idx] = mode
1384   else:
1385     disk_count = 0
1386
1387   env["INSTANCE_DISK_COUNT"] = disk_count
1388
1389   if not tags:
1390     tags = []
1391
1392   env["INSTANCE_TAGS"] = " ".join(tags)
1393
1394   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1395     for key, value in source.items():
1396       env["INSTANCE_%s_%s" % (kind, key)] = value
1397
1398   return env
1399
1400
1401 def _NICListToTuple(lu, nics):
1402   """Build a list of nic information tuples.
1403
1404   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1405   value in LUInstanceQueryData.
1406
1407   @type lu:  L{LogicalUnit}
1408   @param lu: the logical unit on whose behalf we execute
1409   @type nics: list of L{objects.NIC}
1410   @param nics: list of nics to convert to hooks tuples
1411
1412   """
1413   hooks_nics = []
1414   cluster = lu.cfg.GetClusterInfo()
1415   for nic in nics:
1416     ip = nic.ip
1417     mac = nic.mac
1418     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1419     mode = filled_params[constants.NIC_MODE]
1420     link = filled_params[constants.NIC_LINK]
1421     hooks_nics.append((ip, mac, mode, link))
1422   return hooks_nics
1423
1424
1425 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1426   """Builds instance related env variables for hooks from an object.
1427
1428   @type lu: L{LogicalUnit}
1429   @param lu: the logical unit on whose behalf we execute
1430   @type instance: L{objects.Instance}
1431   @param instance: the instance for which we should build the
1432       environment
1433   @type override: dict
1434   @param override: dictionary with key/values that will override
1435       our values
1436   @rtype: dict
1437   @return: the hook environment dictionary
1438
1439   """
1440   cluster = lu.cfg.GetClusterInfo()
1441   bep = cluster.FillBE(instance)
1442   hvp = cluster.FillHV(instance)
1443   args = {
1444     "name": instance.name,
1445     "primary_node": instance.primary_node,
1446     "secondary_nodes": instance.secondary_nodes,
1447     "os_type": instance.os,
1448     "status": instance.admin_state,
1449     "maxmem": bep[constants.BE_MAXMEM],
1450     "minmem": bep[constants.BE_MINMEM],
1451     "vcpus": bep[constants.BE_VCPUS],
1452     "nics": _NICListToTuple(lu, instance.nics),
1453     "disk_template": instance.disk_template,
1454     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1455     "bep": bep,
1456     "hvp": hvp,
1457     "hypervisor_name": instance.hypervisor,
1458     "tags": instance.tags,
1459   }
1460   if override:
1461     args.update(override)
1462   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1463
1464
1465 def _AdjustCandidatePool(lu, exceptions):
1466   """Adjust the candidate pool after node operations.
1467
1468   """
1469   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1470   if mod_list:
1471     lu.LogInfo("Promoted nodes to master candidate role: %s",
1472                utils.CommaJoin(node.name for node in mod_list))
1473     for name in mod_list:
1474       lu.context.ReaddNode(name)
1475   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1476   if mc_now > mc_max:
1477     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1478                (mc_now, mc_max))
1479
1480
1481 def _DecideSelfPromotion(lu, exceptions=None):
1482   """Decide whether I should promote myself as a master candidate.
1483
1484   """
1485   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1486   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1487   # the new node will increase mc_max with one, so:
1488   mc_should = min(mc_should + 1, cp_size)
1489   return mc_now < mc_should
1490
1491
1492 def _CalculateGroupIPolicy(cluster, group):
1493   """Calculate instance policy for group.
1494
1495   """
1496   return cluster.SimpleFillIPolicy(group.ipolicy)
1497
1498
1499 def _ComputeViolatingInstances(ipolicy, instances):
1500   """Computes a set of instances who violates given ipolicy.
1501
1502   @param ipolicy: The ipolicy to verify
1503   @type instances: object.Instance
1504   @param instances: List of instances to verify
1505   @return: A frozenset of instance names violating the ipolicy
1506
1507   """
1508   return frozenset([inst.name for inst in instances
1509                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1510
1511
1512 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1513   """Check that the brigdes needed by a list of nics exist.
1514
1515   """
1516   cluster = lu.cfg.GetClusterInfo()
1517   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1518   brlist = [params[constants.NIC_LINK] for params in paramslist
1519             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1520   if brlist:
1521     result = lu.rpc.call_bridges_exist(target_node, brlist)
1522     result.Raise("Error checking bridges on destination node '%s'" %
1523                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1524
1525
1526 def _CheckInstanceBridgesExist(lu, instance, node=None):
1527   """Check that the brigdes needed by an instance exist.
1528
1529   """
1530   if node is None:
1531     node = instance.primary_node
1532   _CheckNicsBridgesExist(lu, instance.nics, node)
1533
1534
1535 def _CheckOSVariant(os_obj, name):
1536   """Check whether an OS name conforms to the os variants specification.
1537
1538   @type os_obj: L{objects.OS}
1539   @param os_obj: OS object to check
1540   @type name: string
1541   @param name: OS name passed by the user, to check for validity
1542
1543   """
1544   variant = objects.OS.GetVariant(name)
1545   if not os_obj.supported_variants:
1546     if variant:
1547       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1548                                  " passed)" % (os_obj.name, variant),
1549                                  errors.ECODE_INVAL)
1550     return
1551   if not variant:
1552     raise errors.OpPrereqError("OS name must include a variant",
1553                                errors.ECODE_INVAL)
1554
1555   if variant not in os_obj.supported_variants:
1556     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1557
1558
1559 def _GetNodeInstancesInner(cfg, fn):
1560   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1561
1562
1563 def _GetNodeInstances(cfg, node_name):
1564   """Returns a list of all primary and secondary instances on a node.
1565
1566   """
1567
1568   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1569
1570
1571 def _GetNodePrimaryInstances(cfg, node_name):
1572   """Returns primary instances on a node.
1573
1574   """
1575   return _GetNodeInstancesInner(cfg,
1576                                 lambda inst: node_name == inst.primary_node)
1577
1578
1579 def _GetNodeSecondaryInstances(cfg, node_name):
1580   """Returns secondary instances on a node.
1581
1582   """
1583   return _GetNodeInstancesInner(cfg,
1584                                 lambda inst: node_name in inst.secondary_nodes)
1585
1586
1587 def _GetStorageTypeArgs(cfg, storage_type):
1588   """Returns the arguments for a storage type.
1589
1590   """
1591   # Special case for file storage
1592   if storage_type == constants.ST_FILE:
1593     # storage.FileStorage wants a list of storage directories
1594     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1595
1596   return []
1597
1598
1599 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1600   faulty = []
1601
1602   for dev in instance.disks:
1603     cfg.SetDiskID(dev, node_name)
1604
1605   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1606   result.Raise("Failed to get disk status from node %s" % node_name,
1607                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1608
1609   for idx, bdev_status in enumerate(result.payload):
1610     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1611       faulty.append(idx)
1612
1613   return faulty
1614
1615
1616 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1617   """Check the sanity of iallocator and node arguments and use the
1618   cluster-wide iallocator if appropriate.
1619
1620   Check that at most one of (iallocator, node) is specified. If none is
1621   specified, then the LU's opcode's iallocator slot is filled with the
1622   cluster-wide default iallocator.
1623
1624   @type iallocator_slot: string
1625   @param iallocator_slot: the name of the opcode iallocator slot
1626   @type node_slot: string
1627   @param node_slot: the name of the opcode target node slot
1628
1629   """
1630   node = getattr(lu.op, node_slot, None)
1631   iallocator = getattr(lu.op, iallocator_slot, None)
1632
1633   if node is not None and iallocator is not None:
1634     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1635                                errors.ECODE_INVAL)
1636   elif node is None and iallocator is None:
1637     default_iallocator = lu.cfg.GetDefaultIAllocator()
1638     if default_iallocator:
1639       setattr(lu.op, iallocator_slot, default_iallocator)
1640     else:
1641       raise errors.OpPrereqError("No iallocator or node given and no"
1642                                  " cluster-wide default iallocator found;"
1643                                  " please specify either an iallocator or a"
1644                                  " node, or set a cluster-wide default"
1645                                  " iallocator")
1646
1647
1648 def _GetDefaultIAllocator(cfg, iallocator):
1649   """Decides on which iallocator to use.
1650
1651   @type cfg: L{config.ConfigWriter}
1652   @param cfg: Cluster configuration object
1653   @type iallocator: string or None
1654   @param iallocator: Iallocator specified in opcode
1655   @rtype: string
1656   @return: Iallocator name
1657
1658   """
1659   if not iallocator:
1660     # Use default iallocator
1661     iallocator = cfg.GetDefaultIAllocator()
1662
1663   if not iallocator:
1664     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1665                                " opcode nor as a cluster-wide default",
1666                                errors.ECODE_INVAL)
1667
1668   return iallocator
1669
1670
1671 class LUClusterPostInit(LogicalUnit):
1672   """Logical unit for running hooks after cluster initialization.
1673
1674   """
1675   HPATH = "cluster-init"
1676   HTYPE = constants.HTYPE_CLUSTER
1677
1678   def BuildHooksEnv(self):
1679     """Build hooks env.
1680
1681     """
1682     return {
1683       "OP_TARGET": self.cfg.GetClusterName(),
1684       }
1685
1686   def BuildHooksNodes(self):
1687     """Build hooks nodes.
1688
1689     """
1690     return ([], [self.cfg.GetMasterNode()])
1691
1692   def Exec(self, feedback_fn):
1693     """Nothing to do.
1694
1695     """
1696     return True
1697
1698
1699 class LUClusterDestroy(LogicalUnit):
1700   """Logical unit for destroying the cluster.
1701
1702   """
1703   HPATH = "cluster-destroy"
1704   HTYPE = constants.HTYPE_CLUSTER
1705
1706   def BuildHooksEnv(self):
1707     """Build hooks env.
1708
1709     """
1710     return {
1711       "OP_TARGET": self.cfg.GetClusterName(),
1712       }
1713
1714   def BuildHooksNodes(self):
1715     """Build hooks nodes.
1716
1717     """
1718     return ([], [])
1719
1720   def CheckPrereq(self):
1721     """Check prerequisites.
1722
1723     This checks whether the cluster is empty.
1724
1725     Any errors are signaled by raising errors.OpPrereqError.
1726
1727     """
1728     master = self.cfg.GetMasterNode()
1729
1730     nodelist = self.cfg.GetNodeList()
1731     if len(nodelist) != 1 or nodelist[0] != master:
1732       raise errors.OpPrereqError("There are still %d node(s) in"
1733                                  " this cluster." % (len(nodelist) - 1),
1734                                  errors.ECODE_INVAL)
1735     instancelist = self.cfg.GetInstanceList()
1736     if instancelist:
1737       raise errors.OpPrereqError("There are still %d instance(s) in"
1738                                  " this cluster." % len(instancelist),
1739                                  errors.ECODE_INVAL)
1740
1741   def Exec(self, feedback_fn):
1742     """Destroys the cluster.
1743
1744     """
1745     master_params = self.cfg.GetMasterNetworkParameters()
1746
1747     # Run post hooks on master node before it's removed
1748     _RunPostHook(self, master_params.name)
1749
1750     ems = self.cfg.GetUseExternalMipScript()
1751     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1752                                                      master_params, ems)
1753     if result.fail_msg:
1754       self.LogWarning("Error disabling the master IP address: %s",
1755                       result.fail_msg)
1756
1757     return master_params.name
1758
1759
1760 def _VerifyCertificate(filename):
1761   """Verifies a certificate for L{LUClusterVerifyConfig}.
1762
1763   @type filename: string
1764   @param filename: Path to PEM file
1765
1766   """
1767   try:
1768     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1769                                            utils.ReadFile(filename))
1770   except Exception, err: # pylint: disable=W0703
1771     return (LUClusterVerifyConfig.ETYPE_ERROR,
1772             "Failed to load X509 certificate %s: %s" % (filename, err))
1773
1774   (errcode, msg) = \
1775     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1776                                 constants.SSL_CERT_EXPIRATION_ERROR)
1777
1778   if msg:
1779     fnamemsg = "While verifying %s: %s" % (filename, msg)
1780   else:
1781     fnamemsg = None
1782
1783   if errcode is None:
1784     return (None, fnamemsg)
1785   elif errcode == utils.CERT_WARNING:
1786     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1787   elif errcode == utils.CERT_ERROR:
1788     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1789
1790   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1791
1792
1793 def _GetAllHypervisorParameters(cluster, instances):
1794   """Compute the set of all hypervisor parameters.
1795
1796   @type cluster: L{objects.Cluster}
1797   @param cluster: the cluster object
1798   @param instances: list of L{objects.Instance}
1799   @param instances: additional instances from which to obtain parameters
1800   @rtype: list of (origin, hypervisor, parameters)
1801   @return: a list with all parameters found, indicating the hypervisor they
1802        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1803
1804   """
1805   hvp_data = []
1806
1807   for hv_name in cluster.enabled_hypervisors:
1808     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1809
1810   for os_name, os_hvp in cluster.os_hvp.items():
1811     for hv_name, hv_params in os_hvp.items():
1812       if hv_params:
1813         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1814         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1815
1816   # TODO: collapse identical parameter values in a single one
1817   for instance in instances:
1818     if instance.hvparams:
1819       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1820                        cluster.FillHV(instance)))
1821
1822   return hvp_data
1823
1824
1825 class _VerifyErrors(object):
1826   """Mix-in for cluster/group verify LUs.
1827
1828   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1829   self.op and self._feedback_fn to be available.)
1830
1831   """
1832
1833   ETYPE_FIELD = "code"
1834   ETYPE_ERROR = "ERROR"
1835   ETYPE_WARNING = "WARNING"
1836
1837   def _Error(self, ecode, item, msg, *args, **kwargs):
1838     """Format an error message.
1839
1840     Based on the opcode's error_codes parameter, either format a
1841     parseable error code, or a simpler error string.
1842
1843     This must be called only from Exec and functions called from Exec.
1844
1845     """
1846     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1847     itype, etxt, _ = ecode
1848     # first complete the msg
1849     if args:
1850       msg = msg % args
1851     # then format the whole message
1852     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1853       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1854     else:
1855       if item:
1856         item = " " + item
1857       else:
1858         item = ""
1859       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1860     # and finally report it via the feedback_fn
1861     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1862
1863   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1864     """Log an error message if the passed condition is True.
1865
1866     """
1867     cond = (bool(cond)
1868             or self.op.debug_simulate_errors) # pylint: disable=E1101
1869
1870     # If the error code is in the list of ignored errors, demote the error to a
1871     # warning
1872     (_, etxt, _) = ecode
1873     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1874       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1875
1876     if cond:
1877       self._Error(ecode, *args, **kwargs)
1878
1879     # do not mark the operation as failed for WARN cases only
1880     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1881       self.bad = self.bad or cond
1882
1883
1884 class LUClusterVerify(NoHooksLU):
1885   """Submits all jobs necessary to verify the cluster.
1886
1887   """
1888   REQ_BGL = False
1889
1890   def ExpandNames(self):
1891     self.needed_locks = {}
1892
1893   def Exec(self, feedback_fn):
1894     jobs = []
1895
1896     if self.op.group_name:
1897       groups = [self.op.group_name]
1898       depends_fn = lambda: None
1899     else:
1900       groups = self.cfg.GetNodeGroupList()
1901
1902       # Verify global configuration
1903       jobs.append([
1904         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1905         ])
1906
1907       # Always depend on global verification
1908       depends_fn = lambda: [(-len(jobs), [])]
1909
1910     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1911                                             ignore_errors=self.op.ignore_errors,
1912                                             depends=depends_fn())]
1913                 for group in groups)
1914
1915     # Fix up all parameters
1916     for op in itertools.chain(*jobs): # pylint: disable=W0142
1917       op.debug_simulate_errors = self.op.debug_simulate_errors
1918       op.verbose = self.op.verbose
1919       op.error_codes = self.op.error_codes
1920       try:
1921         op.skip_checks = self.op.skip_checks
1922       except AttributeError:
1923         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1924
1925     return ResultWithJobs(jobs)
1926
1927
1928 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1929   """Verifies the cluster config.
1930
1931   """
1932   REQ_BGL = False
1933
1934   def _VerifyHVP(self, hvp_data):
1935     """Verifies locally the syntax of the hypervisor parameters.
1936
1937     """
1938     for item, hv_name, hv_params in hvp_data:
1939       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1940              (item, hv_name))
1941       try:
1942         hv_class = hypervisor.GetHypervisor(hv_name)
1943         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1944         hv_class.CheckParameterSyntax(hv_params)
1945       except errors.GenericError, err:
1946         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1947
1948   def ExpandNames(self):
1949     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1950     self.share_locks = _ShareAll()
1951
1952   def CheckPrereq(self):
1953     """Check prerequisites.
1954
1955     """
1956     # Retrieve all information
1957     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1958     self.all_node_info = self.cfg.GetAllNodesInfo()
1959     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1960
1961   def Exec(self, feedback_fn):
1962     """Verify integrity of cluster, performing various test on nodes.
1963
1964     """
1965     self.bad = False
1966     self._feedback_fn = feedback_fn
1967
1968     feedback_fn("* Verifying cluster config")
1969
1970     for msg in self.cfg.VerifyConfig():
1971       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1972
1973     feedback_fn("* Verifying cluster certificate files")
1974
1975     for cert_filename in constants.ALL_CERT_FILES:
1976       (errcode, msg) = _VerifyCertificate(cert_filename)
1977       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1978
1979     feedback_fn("* Verifying hypervisor parameters")
1980
1981     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1982                                                 self.all_inst_info.values()))
1983
1984     feedback_fn("* Verifying all nodes belong to an existing group")
1985
1986     # We do this verification here because, should this bogus circumstance
1987     # occur, it would never be caught by VerifyGroup, which only acts on
1988     # nodes/instances reachable from existing node groups.
1989
1990     dangling_nodes = set(node.name for node in self.all_node_info.values()
1991                          if node.group not in self.all_group_info)
1992
1993     dangling_instances = {}
1994     no_node_instances = []
1995
1996     for inst in self.all_inst_info.values():
1997       if inst.primary_node in dangling_nodes:
1998         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1999       elif inst.primary_node not in self.all_node_info:
2000         no_node_instances.append(inst.name)
2001
2002     pretty_dangling = [
2003         "%s (%s)" %
2004         (node.name,
2005          utils.CommaJoin(dangling_instances.get(node.name,
2006                                                 ["no instances"])))
2007         for node in dangling_nodes]
2008
2009     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2010                   None,
2011                   "the following nodes (and their instances) belong to a non"
2012                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2013
2014     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2015                   None,
2016                   "the following instances have a non-existing primary-node:"
2017                   " %s", utils.CommaJoin(no_node_instances))
2018
2019     return not self.bad
2020
2021
2022 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2023   """Verifies the status of a node group.
2024
2025   """
2026   HPATH = "cluster-verify"
2027   HTYPE = constants.HTYPE_CLUSTER
2028   REQ_BGL = False
2029
2030   _HOOKS_INDENT_RE = re.compile("^", re.M)
2031
2032   class NodeImage(object):
2033     """A class representing the logical and physical status of a node.
2034
2035     @type name: string
2036     @ivar name: the node name to which this object refers
2037     @ivar volumes: a structure as returned from
2038         L{ganeti.backend.GetVolumeList} (runtime)
2039     @ivar instances: a list of running instances (runtime)
2040     @ivar pinst: list of configured primary instances (config)
2041     @ivar sinst: list of configured secondary instances (config)
2042     @ivar sbp: dictionary of {primary-node: list of instances} for all
2043         instances for which this node is secondary (config)
2044     @ivar mfree: free memory, as reported by hypervisor (runtime)
2045     @ivar dfree: free disk, as reported by the node (runtime)
2046     @ivar offline: the offline status (config)
2047     @type rpc_fail: boolean
2048     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2049         not whether the individual keys were correct) (runtime)
2050     @type lvm_fail: boolean
2051     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2052     @type hyp_fail: boolean
2053     @ivar hyp_fail: whether the RPC call didn't return the instance list
2054     @type ghost: boolean
2055     @ivar ghost: whether this is a known node or not (config)
2056     @type os_fail: boolean
2057     @ivar os_fail: whether the RPC call didn't return valid OS data
2058     @type oslist: list
2059     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2060     @type vm_capable: boolean
2061     @ivar vm_capable: whether the node can host instances
2062
2063     """
2064     def __init__(self, offline=False, name=None, vm_capable=True):
2065       self.name = name
2066       self.volumes = {}
2067       self.instances = []
2068       self.pinst = []
2069       self.sinst = []
2070       self.sbp = {}
2071       self.mfree = 0
2072       self.dfree = 0
2073       self.offline = offline
2074       self.vm_capable = vm_capable
2075       self.rpc_fail = False
2076       self.lvm_fail = False
2077       self.hyp_fail = False
2078       self.ghost = False
2079       self.os_fail = False
2080       self.oslist = {}
2081
2082   def ExpandNames(self):
2083     # This raises errors.OpPrereqError on its own:
2084     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2085
2086     # Get instances in node group; this is unsafe and needs verification later
2087     inst_names = \
2088       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2089
2090     self.needed_locks = {
2091       locking.LEVEL_INSTANCE: inst_names,
2092       locking.LEVEL_NODEGROUP: [self.group_uuid],
2093       locking.LEVEL_NODE: [],
2094       }
2095
2096     self.share_locks = _ShareAll()
2097
2098   def DeclareLocks(self, level):
2099     if level == locking.LEVEL_NODE:
2100       # Get members of node group; this is unsafe and needs verification later
2101       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2102
2103       all_inst_info = self.cfg.GetAllInstancesInfo()
2104
2105       # In Exec(), we warn about mirrored instances that have primary and
2106       # secondary living in separate node groups. To fully verify that
2107       # volumes for these instances are healthy, we will need to do an
2108       # extra call to their secondaries. We ensure here those nodes will
2109       # be locked.
2110       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2111         # Important: access only the instances whose lock is owned
2112         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2113           nodes.update(all_inst_info[inst].secondary_nodes)
2114
2115       self.needed_locks[locking.LEVEL_NODE] = nodes
2116
2117   def CheckPrereq(self):
2118     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2119     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2120
2121     group_nodes = set(self.group_info.members)
2122     group_instances = \
2123       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2124
2125     unlocked_nodes = \
2126         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2127
2128     unlocked_instances = \
2129         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2130
2131     if unlocked_nodes:
2132       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2133                                  utils.CommaJoin(unlocked_nodes),
2134                                  errors.ECODE_STATE)
2135
2136     if unlocked_instances:
2137       raise errors.OpPrereqError("Missing lock for instances: %s" %
2138                                  utils.CommaJoin(unlocked_instances),
2139                                  errors.ECODE_STATE)
2140
2141     self.all_node_info = self.cfg.GetAllNodesInfo()
2142     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2143
2144     self.my_node_names = utils.NiceSort(group_nodes)
2145     self.my_inst_names = utils.NiceSort(group_instances)
2146
2147     self.my_node_info = dict((name, self.all_node_info[name])
2148                              for name in self.my_node_names)
2149
2150     self.my_inst_info = dict((name, self.all_inst_info[name])
2151                              for name in self.my_inst_names)
2152
2153     # We detect here the nodes that will need the extra RPC calls for verifying
2154     # split LV volumes; they should be locked.
2155     extra_lv_nodes = set()
2156
2157     for inst in self.my_inst_info.values():
2158       if inst.disk_template in constants.DTS_INT_MIRROR:
2159         for nname in inst.all_nodes:
2160           if self.all_node_info[nname].group != self.group_uuid:
2161             extra_lv_nodes.add(nname)
2162
2163     unlocked_lv_nodes = \
2164         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2165
2166     if unlocked_lv_nodes:
2167       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2168                                  utils.CommaJoin(unlocked_lv_nodes),
2169                                  errors.ECODE_STATE)
2170     self.extra_lv_nodes = list(extra_lv_nodes)
2171
2172   def _VerifyNode(self, ninfo, nresult):
2173     """Perform some basic validation on data returned from a node.
2174
2175       - check the result data structure is well formed and has all the
2176         mandatory fields
2177       - check ganeti version
2178
2179     @type ninfo: L{objects.Node}
2180     @param ninfo: the node to check
2181     @param nresult: the results from the node
2182     @rtype: boolean
2183     @return: whether overall this call was successful (and we can expect
2184          reasonable values in the respose)
2185
2186     """
2187     node = ninfo.name
2188     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2189
2190     # main result, nresult should be a non-empty dict
2191     test = not nresult or not isinstance(nresult, dict)
2192     _ErrorIf(test, constants.CV_ENODERPC, node,
2193                   "unable to verify node: no data returned")
2194     if test:
2195       return False
2196
2197     # compares ganeti version
2198     local_version = constants.PROTOCOL_VERSION
2199     remote_version = nresult.get("version", None)
2200     test = not (remote_version and
2201                 isinstance(remote_version, (list, tuple)) and
2202                 len(remote_version) == 2)
2203     _ErrorIf(test, constants.CV_ENODERPC, node,
2204              "connection to node returned invalid data")
2205     if test:
2206       return False
2207
2208     test = local_version != remote_version[0]
2209     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2210              "incompatible protocol versions: master %s,"
2211              " node %s", local_version, remote_version[0])
2212     if test:
2213       return False
2214
2215     # node seems compatible, we can actually try to look into its results
2216
2217     # full package version
2218     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2219                   constants.CV_ENODEVERSION, node,
2220                   "software version mismatch: master %s, node %s",
2221                   constants.RELEASE_VERSION, remote_version[1],
2222                   code=self.ETYPE_WARNING)
2223
2224     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2225     if ninfo.vm_capable and isinstance(hyp_result, dict):
2226       for hv_name, hv_result in hyp_result.iteritems():
2227         test = hv_result is not None
2228         _ErrorIf(test, constants.CV_ENODEHV, node,
2229                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2230
2231     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2232     if ninfo.vm_capable and isinstance(hvp_result, list):
2233       for item, hv_name, hv_result in hvp_result:
2234         _ErrorIf(True, constants.CV_ENODEHV, node,
2235                  "hypervisor %s parameter verify failure (source %s): %s",
2236                  hv_name, item, hv_result)
2237
2238     test = nresult.get(constants.NV_NODESETUP,
2239                        ["Missing NODESETUP results"])
2240     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2241              "; ".join(test))
2242
2243     return True
2244
2245   def _VerifyNodeTime(self, ninfo, nresult,
2246                       nvinfo_starttime, nvinfo_endtime):
2247     """Check the node time.
2248
2249     @type ninfo: L{objects.Node}
2250     @param ninfo: the node to check
2251     @param nresult: the remote results for the node
2252     @param nvinfo_starttime: the start time of the RPC call
2253     @param nvinfo_endtime: the end time of the RPC call
2254
2255     """
2256     node = ninfo.name
2257     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2258
2259     ntime = nresult.get(constants.NV_TIME, None)
2260     try:
2261       ntime_merged = utils.MergeTime(ntime)
2262     except (ValueError, TypeError):
2263       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2264       return
2265
2266     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2267       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2268     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2269       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2270     else:
2271       ntime_diff = None
2272
2273     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2274              "Node time diverges by at least %s from master node time",
2275              ntime_diff)
2276
2277   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2278     """Check the node LVM results.
2279
2280     @type ninfo: L{objects.Node}
2281     @param ninfo: the node to check
2282     @param nresult: the remote results for the node
2283     @param vg_name: the configured VG name
2284
2285     """
2286     if vg_name is None:
2287       return
2288
2289     node = ninfo.name
2290     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2291
2292     # checks vg existence and size > 20G
2293     vglist = nresult.get(constants.NV_VGLIST, None)
2294     test = not vglist
2295     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2296     if not test:
2297       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2298                                             constants.MIN_VG_SIZE)
2299       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2300
2301     # check pv names
2302     pvlist = nresult.get(constants.NV_PVLIST, None)
2303     test = pvlist is None
2304     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2305     if not test:
2306       # check that ':' is not present in PV names, since it's a
2307       # special character for lvcreate (denotes the range of PEs to
2308       # use on the PV)
2309       for _, pvname, owner_vg in pvlist:
2310         test = ":" in pvname
2311         _ErrorIf(test, constants.CV_ENODELVM, node,
2312                  "Invalid character ':' in PV '%s' of VG '%s'",
2313                  pvname, owner_vg)
2314
2315   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2316     """Check the node bridges.
2317
2318     @type ninfo: L{objects.Node}
2319     @param ninfo: the node to check
2320     @param nresult: the remote results for the node
2321     @param bridges: the expected list of bridges
2322
2323     """
2324     if not bridges:
2325       return
2326
2327     node = ninfo.name
2328     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2329
2330     missing = nresult.get(constants.NV_BRIDGES, None)
2331     test = not isinstance(missing, list)
2332     _ErrorIf(test, constants.CV_ENODENET, node,
2333              "did not return valid bridge information")
2334     if not test:
2335       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2336                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2337
2338   def _VerifyNodeUserScripts(self, ninfo, nresult):
2339     """Check the results of user scripts presence and executability on the node
2340
2341     @type ninfo: L{objects.Node}
2342     @param ninfo: the node to check
2343     @param nresult: the remote results for the node
2344
2345     """
2346     node = ninfo.name
2347
2348     test = not constants.NV_USERSCRIPTS in nresult
2349     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2350                   "did not return user scripts information")
2351
2352     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2353     if not test:
2354       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2355                     "user scripts not present or not executable: %s" %
2356                     utils.CommaJoin(sorted(broken_scripts)))
2357
2358   def _VerifyNodeNetwork(self, ninfo, nresult):
2359     """Check the node network connectivity results.
2360
2361     @type ninfo: L{objects.Node}
2362     @param ninfo: the node to check
2363     @param nresult: the remote results for the node
2364
2365     """
2366     node = ninfo.name
2367     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2368
2369     test = constants.NV_NODELIST not in nresult
2370     _ErrorIf(test, constants.CV_ENODESSH, node,
2371              "node hasn't returned node ssh connectivity data")
2372     if not test:
2373       if nresult[constants.NV_NODELIST]:
2374         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2375           _ErrorIf(True, constants.CV_ENODESSH, node,
2376                    "ssh communication with node '%s': %s", a_node, a_msg)
2377
2378     test = constants.NV_NODENETTEST not in nresult
2379     _ErrorIf(test, constants.CV_ENODENET, node,
2380              "node hasn't returned node tcp connectivity data")
2381     if not test:
2382       if nresult[constants.NV_NODENETTEST]:
2383         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2384         for anode in nlist:
2385           _ErrorIf(True, constants.CV_ENODENET, node,
2386                    "tcp communication with node '%s': %s",
2387                    anode, nresult[constants.NV_NODENETTEST][anode])
2388
2389     test = constants.NV_MASTERIP not in nresult
2390     _ErrorIf(test, constants.CV_ENODENET, node,
2391              "node hasn't returned node master IP reachability data")
2392     if not test:
2393       if not nresult[constants.NV_MASTERIP]:
2394         if node == self.master_node:
2395           msg = "the master node cannot reach the master IP (not configured?)"
2396         else:
2397           msg = "cannot reach the master IP"
2398         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2399
2400   def _VerifyInstance(self, instance, instanceconfig, node_image,
2401                       diskstatus):
2402     """Verify an instance.
2403
2404     This function checks to see if the required block devices are
2405     available on the instance's node.
2406
2407     """
2408     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2409     node_current = instanceconfig.primary_node
2410
2411     node_vol_should = {}
2412     instanceconfig.MapLVsByNode(node_vol_should)
2413
2414     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2415     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2416     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2417
2418     for node in node_vol_should:
2419       n_img = node_image[node]
2420       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2421         # ignore missing volumes on offline or broken nodes
2422         continue
2423       for volume in node_vol_should[node]:
2424         test = volume not in n_img.volumes
2425         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2426                  "volume %s missing on node %s", volume, node)
2427
2428     if instanceconfig.admin_state == constants.ADMINST_UP:
2429       pri_img = node_image[node_current]
2430       test = instance not in pri_img.instances and not pri_img.offline
2431       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2432                "instance not running on its primary node %s",
2433                node_current)
2434
2435     diskdata = [(nname, success, status, idx)
2436                 for (nname, disks) in diskstatus.items()
2437                 for idx, (success, status) in enumerate(disks)]
2438
2439     for nname, success, bdev_status, idx in diskdata:
2440       # the 'ghost node' construction in Exec() ensures that we have a
2441       # node here
2442       snode = node_image[nname]
2443       bad_snode = snode.ghost or snode.offline
2444       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2445                not success and not bad_snode,
2446                constants.CV_EINSTANCEFAULTYDISK, instance,
2447                "couldn't retrieve status for disk/%s on %s: %s",
2448                idx, nname, bdev_status)
2449       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2450                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2451                constants.CV_EINSTANCEFAULTYDISK, instance,
2452                "disk/%s on %s is faulty", idx, nname)
2453
2454   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2455     """Verify if there are any unknown volumes in the cluster.
2456
2457     The .os, .swap and backup volumes are ignored. All other volumes are
2458     reported as unknown.
2459
2460     @type reserved: L{ganeti.utils.FieldSet}
2461     @param reserved: a FieldSet of reserved volume names
2462
2463     """
2464     for node, n_img in node_image.items():
2465       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2466           self.all_node_info[node].group != self.group_uuid):
2467         # skip non-healthy nodes
2468         continue
2469       for volume in n_img.volumes:
2470         test = ((node not in node_vol_should or
2471                 volume not in node_vol_should[node]) and
2472                 not reserved.Matches(volume))
2473         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2474                       "volume %s is unknown", volume)
2475
2476   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2477     """Verify N+1 Memory Resilience.
2478
2479     Check that if one single node dies we can still start all the
2480     instances it was primary for.
2481
2482     """
2483     cluster_info = self.cfg.GetClusterInfo()
2484     for node, n_img in node_image.items():
2485       # This code checks that every node which is now listed as
2486       # secondary has enough memory to host all instances it is
2487       # supposed to should a single other node in the cluster fail.
2488       # FIXME: not ready for failover to an arbitrary node
2489       # FIXME: does not support file-backed instances
2490       # WARNING: we currently take into account down instances as well
2491       # as up ones, considering that even if they're down someone
2492       # might want to start them even in the event of a node failure.
2493       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2494         # we're skipping nodes marked offline and nodes in other groups from
2495         # the N+1 warning, since most likely we don't have good memory
2496         # infromation from them; we already list instances living on such
2497         # nodes, and that's enough warning
2498         continue
2499       #TODO(dynmem): also consider ballooning out other instances
2500       for prinode, instances in n_img.sbp.items():
2501         needed_mem = 0
2502         for instance in instances:
2503           bep = cluster_info.FillBE(instance_cfg[instance])
2504           if bep[constants.BE_AUTO_BALANCE]:
2505             needed_mem += bep[constants.BE_MINMEM]
2506         test = n_img.mfree < needed_mem
2507         self._ErrorIf(test, constants.CV_ENODEN1, node,
2508                       "not enough memory to accomodate instance failovers"
2509                       " should node %s fail (%dMiB needed, %dMiB available)",
2510                       prinode, needed_mem, n_img.mfree)
2511
2512   @classmethod
2513   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2514                    (files_all, files_opt, files_mc, files_vm)):
2515     """Verifies file checksums collected from all nodes.
2516
2517     @param errorif: Callback for reporting errors
2518     @param nodeinfo: List of L{objects.Node} objects
2519     @param master_node: Name of master node
2520     @param all_nvinfo: RPC results
2521
2522     """
2523     # Define functions determining which nodes to consider for a file
2524     files2nodefn = [
2525       (files_all, None),
2526       (files_mc, lambda node: (node.master_candidate or
2527                                node.name == master_node)),
2528       (files_vm, lambda node: node.vm_capable),
2529       ]
2530
2531     # Build mapping from filename to list of nodes which should have the file
2532     nodefiles = {}
2533     for (files, fn) in files2nodefn:
2534       if fn is None:
2535         filenodes = nodeinfo
2536       else:
2537         filenodes = filter(fn, nodeinfo)
2538       nodefiles.update((filename,
2539                         frozenset(map(operator.attrgetter("name"), filenodes)))
2540                        for filename in files)
2541
2542     assert set(nodefiles) == (files_all | files_mc | files_vm)
2543
2544     fileinfo = dict((filename, {}) for filename in nodefiles)
2545     ignore_nodes = set()
2546
2547     for node in nodeinfo:
2548       if node.offline:
2549         ignore_nodes.add(node.name)
2550         continue
2551
2552       nresult = all_nvinfo[node.name]
2553
2554       if nresult.fail_msg or not nresult.payload:
2555         node_files = None
2556       else:
2557         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2558
2559       test = not (node_files and isinstance(node_files, dict))
2560       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2561               "Node did not return file checksum data")
2562       if test:
2563         ignore_nodes.add(node.name)
2564         continue
2565
2566       # Build per-checksum mapping from filename to nodes having it
2567       for (filename, checksum) in node_files.items():
2568         assert filename in nodefiles
2569         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2570
2571     for (filename, checksums) in fileinfo.items():
2572       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2573
2574       # Nodes having the file
2575       with_file = frozenset(node_name
2576                             for nodes in fileinfo[filename].values()
2577                             for node_name in nodes) - ignore_nodes
2578
2579       expected_nodes = nodefiles[filename] - ignore_nodes
2580
2581       # Nodes missing file
2582       missing_file = expected_nodes - with_file
2583
2584       if filename in files_opt:
2585         # All or no nodes
2586         errorif(missing_file and missing_file != expected_nodes,
2587                 constants.CV_ECLUSTERFILECHECK, None,
2588                 "File %s is optional, but it must exist on all or no"
2589                 " nodes (not found on %s)",
2590                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2591       else:
2592         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2593                 "File %s is missing from node(s) %s", filename,
2594                 utils.CommaJoin(utils.NiceSort(missing_file)))
2595
2596         # Warn if a node has a file it shouldn't
2597         unexpected = with_file - expected_nodes
2598         errorif(unexpected,
2599                 constants.CV_ECLUSTERFILECHECK, None,
2600                 "File %s should not exist on node(s) %s",
2601                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2602
2603       # See if there are multiple versions of the file
2604       test = len(checksums) > 1
2605       if test:
2606         variants = ["variant %s on %s" %
2607                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2608                     for (idx, (checksum, nodes)) in
2609                       enumerate(sorted(checksums.items()))]
2610       else:
2611         variants = []
2612
2613       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2614               "File %s found with %s different checksums (%s)",
2615               filename, len(checksums), "; ".join(variants))
2616
2617   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2618                       drbd_map):
2619     """Verifies and the node DRBD status.
2620
2621     @type ninfo: L{objects.Node}
2622     @param ninfo: the node to check
2623     @param nresult: the remote results for the node
2624     @param instanceinfo: the dict of instances
2625     @param drbd_helper: the configured DRBD usermode helper
2626     @param drbd_map: the DRBD map as returned by
2627         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2628
2629     """
2630     node = ninfo.name
2631     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2632
2633     if drbd_helper:
2634       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2635       test = (helper_result == None)
2636       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2637                "no drbd usermode helper returned")
2638       if helper_result:
2639         status, payload = helper_result
2640         test = not status
2641         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2642                  "drbd usermode helper check unsuccessful: %s", payload)
2643         test = status and (payload != drbd_helper)
2644         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2645                  "wrong drbd usermode helper: %s", payload)
2646
2647     # compute the DRBD minors
2648     node_drbd = {}
2649     for minor, instance in drbd_map[node].items():
2650       test = instance not in instanceinfo
2651       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2652                "ghost instance '%s' in temporary DRBD map", instance)
2653         # ghost instance should not be running, but otherwise we
2654         # don't give double warnings (both ghost instance and
2655         # unallocated minor in use)
2656       if test:
2657         node_drbd[minor] = (instance, False)
2658       else:
2659         instance = instanceinfo[instance]
2660         node_drbd[minor] = (instance.name,
2661                             instance.admin_state == constants.ADMINST_UP)
2662
2663     # and now check them
2664     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2665     test = not isinstance(used_minors, (tuple, list))
2666     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2667              "cannot parse drbd status file: %s", str(used_minors))
2668     if test:
2669       # we cannot check drbd status
2670       return
2671
2672     for minor, (iname, must_exist) in node_drbd.items():
2673       test = minor not in used_minors and must_exist
2674       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2675                "drbd minor %d of instance %s is not active", minor, iname)
2676     for minor in used_minors:
2677       test = minor not in node_drbd
2678       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2679                "unallocated drbd minor %d is in use", minor)
2680
2681   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2682     """Builds the node OS structures.
2683
2684     @type ninfo: L{objects.Node}
2685     @param ninfo: the node to check
2686     @param nresult: the remote results for the node
2687     @param nimg: the node image object
2688
2689     """
2690     node = ninfo.name
2691     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2692
2693     remote_os = nresult.get(constants.NV_OSLIST, None)
2694     test = (not isinstance(remote_os, list) or
2695             not compat.all(isinstance(v, list) and len(v) == 7
2696                            for v in remote_os))
2697
2698     _ErrorIf(test, constants.CV_ENODEOS, node,
2699              "node hasn't returned valid OS data")
2700
2701     nimg.os_fail = test
2702
2703     if test:
2704       return
2705
2706     os_dict = {}
2707
2708     for (name, os_path, status, diagnose,
2709          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2710
2711       if name not in os_dict:
2712         os_dict[name] = []
2713
2714       # parameters is a list of lists instead of list of tuples due to
2715       # JSON lacking a real tuple type, fix it:
2716       parameters = [tuple(v) for v in parameters]
2717       os_dict[name].append((os_path, status, diagnose,
2718                             set(variants), set(parameters), set(api_ver)))
2719
2720     nimg.oslist = os_dict
2721
2722   def _VerifyNodeOS(self, ninfo, nimg, base):
2723     """Verifies the node OS list.
2724
2725     @type ninfo: L{objects.Node}
2726     @param ninfo: the node to check
2727     @param nimg: the node image object
2728     @param base: the 'template' node we match against (e.g. from the master)
2729
2730     """
2731     node = ninfo.name
2732     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2733
2734     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2735
2736     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2737     for os_name, os_data in nimg.oslist.items():
2738       assert os_data, "Empty OS status for OS %s?!" % os_name
2739       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2740       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2741                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2742       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2743                "OS '%s' has multiple entries (first one shadows the rest): %s",
2744                os_name, utils.CommaJoin([v[0] for v in os_data]))
2745       # comparisons with the 'base' image
2746       test = os_name not in base.oslist
2747       _ErrorIf(test, constants.CV_ENODEOS, node,
2748                "Extra OS %s not present on reference node (%s)",
2749                os_name, base.name)
2750       if test:
2751         continue
2752       assert base.oslist[os_name], "Base node has empty OS status?"
2753       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2754       if not b_status:
2755         # base OS is invalid, skipping
2756         continue
2757       for kind, a, b in [("API version", f_api, b_api),
2758                          ("variants list", f_var, b_var),
2759                          ("parameters", beautify_params(f_param),
2760                           beautify_params(b_param))]:
2761         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2762                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2763                  kind, os_name, base.name,
2764                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2765
2766     # check any missing OSes
2767     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2768     _ErrorIf(missing, constants.CV_ENODEOS, node,
2769              "OSes present on reference node %s but missing on this node: %s",
2770              base.name, utils.CommaJoin(missing))
2771
2772   def _VerifyOob(self, ninfo, nresult):
2773     """Verifies out of band functionality of a node.
2774
2775     @type ninfo: L{objects.Node}
2776     @param ninfo: the node to check
2777     @param nresult: the remote results for the node
2778
2779     """
2780     node = ninfo.name
2781     # We just have to verify the paths on master and/or master candidates
2782     # as the oob helper is invoked on the master
2783     if ((ninfo.master_candidate or ninfo.master_capable) and
2784         constants.NV_OOB_PATHS in nresult):
2785       for path_result in nresult[constants.NV_OOB_PATHS]:
2786         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2787
2788   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2789     """Verifies and updates the node volume data.
2790
2791     This function will update a L{NodeImage}'s internal structures
2792     with data from the remote call.
2793
2794     @type ninfo: L{objects.Node}
2795     @param ninfo: the node to check
2796     @param nresult: the remote results for the node
2797     @param nimg: the node image object
2798     @param vg_name: the configured VG name
2799
2800     """
2801     node = ninfo.name
2802     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2803
2804     nimg.lvm_fail = True
2805     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2806     if vg_name is None:
2807       pass
2808     elif isinstance(lvdata, basestring):
2809       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2810                utils.SafeEncode(lvdata))
2811     elif not isinstance(lvdata, dict):
2812       _ErrorIf(True, constants.CV_ENODELVM, node,
2813                "rpc call to node failed (lvlist)")
2814     else:
2815       nimg.volumes = lvdata
2816       nimg.lvm_fail = False
2817
2818   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2819     """Verifies and updates the node instance list.
2820
2821     If the listing was successful, then updates this node's instance
2822     list. Otherwise, it marks the RPC call as failed for the instance
2823     list key.
2824
2825     @type ninfo: L{objects.Node}
2826     @param ninfo: the node to check
2827     @param nresult: the remote results for the node
2828     @param nimg: the node image object
2829
2830     """
2831     idata = nresult.get(constants.NV_INSTANCELIST, None)
2832     test = not isinstance(idata, list)
2833     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2834                   "rpc call to node failed (instancelist): %s",
2835                   utils.SafeEncode(str(idata)))
2836     if test:
2837       nimg.hyp_fail = True
2838     else:
2839       nimg.instances = idata
2840
2841   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2842     """Verifies and computes a node information map
2843
2844     @type ninfo: L{objects.Node}
2845     @param ninfo: the node to check
2846     @param nresult: the remote results for the node
2847     @param nimg: the node image object
2848     @param vg_name: the configured VG name
2849
2850     """
2851     node = ninfo.name
2852     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2853
2854     # try to read free memory (from the hypervisor)
2855     hv_info = nresult.get(constants.NV_HVINFO, None)
2856     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2857     _ErrorIf(test, constants.CV_ENODEHV, node,
2858              "rpc call to node failed (hvinfo)")
2859     if not test:
2860       try:
2861         nimg.mfree = int(hv_info["memory_free"])
2862       except (ValueError, TypeError):
2863         _ErrorIf(True, constants.CV_ENODERPC, node,
2864                  "node returned invalid nodeinfo, check hypervisor")
2865
2866     # FIXME: devise a free space model for file based instances as well
2867     if vg_name is not None:
2868       test = (constants.NV_VGLIST not in nresult or
2869               vg_name not in nresult[constants.NV_VGLIST])
2870       _ErrorIf(test, constants.CV_ENODELVM, node,
2871                "node didn't return data for the volume group '%s'"
2872                " - it is either missing or broken", vg_name)
2873       if not test:
2874         try:
2875           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2876         except (ValueError, TypeError):
2877           _ErrorIf(True, constants.CV_ENODERPC, node,
2878                    "node returned invalid LVM info, check LVM status")
2879
2880   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2881     """Gets per-disk status information for all instances.
2882
2883     @type nodelist: list of strings
2884     @param nodelist: Node names
2885     @type node_image: dict of (name, L{objects.Node})
2886     @param node_image: Node objects
2887     @type instanceinfo: dict of (name, L{objects.Instance})
2888     @param instanceinfo: Instance objects
2889     @rtype: {instance: {node: [(succes, payload)]}}
2890     @return: a dictionary of per-instance dictionaries with nodes as
2891         keys and disk information as values; the disk information is a
2892         list of tuples (success, payload)
2893
2894     """
2895     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2896
2897     node_disks = {}
2898     node_disks_devonly = {}
2899     diskless_instances = set()
2900     diskless = constants.DT_DISKLESS
2901
2902     for nname in nodelist:
2903       node_instances = list(itertools.chain(node_image[nname].pinst,
2904                                             node_image[nname].sinst))
2905       diskless_instances.update(inst for inst in node_instances
2906                                 if instanceinfo[inst].disk_template == diskless)
2907       disks = [(inst, disk)
2908                for inst in node_instances
2909                for disk in instanceinfo[inst].disks]
2910
2911       if not disks:
2912         # No need to collect data
2913         continue
2914
2915       node_disks[nname] = disks
2916
2917       # Creating copies as SetDiskID below will modify the objects and that can
2918       # lead to incorrect data returned from nodes
2919       devonly = [dev.Copy() for (_, dev) in disks]
2920
2921       for dev in devonly:
2922         self.cfg.SetDiskID(dev, nname)
2923
2924       node_disks_devonly[nname] = devonly
2925
2926     assert len(node_disks) == len(node_disks_devonly)
2927
2928     # Collect data from all nodes with disks
2929     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2930                                                           node_disks_devonly)
2931
2932     assert len(result) == len(node_disks)
2933
2934     instdisk = {}
2935
2936     for (nname, nres) in result.items():
2937       disks = node_disks[nname]
2938
2939       if nres.offline:
2940         # No data from this node
2941         data = len(disks) * [(False, "node offline")]
2942       else:
2943         msg = nres.fail_msg
2944         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2945                  "while getting disk information: %s", msg)
2946         if msg:
2947           # No data from this node
2948           data = len(disks) * [(False, msg)]
2949         else:
2950           data = []
2951           for idx, i in enumerate(nres.payload):
2952             if isinstance(i, (tuple, list)) and len(i) == 2:
2953               data.append(i)
2954             else:
2955               logging.warning("Invalid result from node %s, entry %d: %s",
2956                               nname, idx, i)
2957               data.append((False, "Invalid result from the remote node"))
2958
2959       for ((inst, _), status) in zip(disks, data):
2960         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2961
2962     # Add empty entries for diskless instances.
2963     for inst in diskless_instances:
2964       assert inst not in instdisk
2965       instdisk[inst] = {}
2966
2967     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2968                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2969                       compat.all(isinstance(s, (tuple, list)) and
2970                                  len(s) == 2 for s in statuses)
2971                       for inst, nnames in instdisk.items()
2972                       for nname, statuses in nnames.items())
2973     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2974
2975     return instdisk
2976
2977   @staticmethod
2978   def _SshNodeSelector(group_uuid, all_nodes):
2979     """Create endless iterators for all potential SSH check hosts.
2980
2981     """
2982     nodes = [node for node in all_nodes
2983              if (node.group != group_uuid and
2984                  not node.offline)]
2985     keyfunc = operator.attrgetter("group")
2986
2987     return map(itertools.cycle,
2988                [sorted(map(operator.attrgetter("name"), names))
2989                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2990                                                   keyfunc)])
2991
2992   @classmethod
2993   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2994     """Choose which nodes should talk to which other nodes.
2995
2996     We will make nodes contact all nodes in their group, and one node from
2997     every other group.
2998
2999     @warning: This algorithm has a known issue if one node group is much
3000       smaller than others (e.g. just one node). In such a case all other
3001       nodes will talk to the single node.
3002
3003     """
3004     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3005     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3006
3007     return (online_nodes,
3008             dict((name, sorted([i.next() for i in sel]))
3009                  for name in online_nodes))
3010
3011   def BuildHooksEnv(self):
3012     """Build hooks env.
3013
3014     Cluster-Verify hooks just ran in the post phase and their failure makes
3015     the output be logged in the verify output and the verification to fail.
3016
3017     """
3018     env = {
3019       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3020       }
3021
3022     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3023                for node in self.my_node_info.values())
3024
3025     return env
3026
3027   def BuildHooksNodes(self):
3028     """Build hooks nodes.
3029
3030     """
3031     return ([], self.my_node_names)
3032
3033   def Exec(self, feedback_fn):
3034     """Verify integrity of the node group, performing various test on nodes.
3035
3036     """
3037     # This method has too many local variables. pylint: disable=R0914
3038     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3039
3040     if not self.my_node_names:
3041       # empty node group
3042       feedback_fn("* Empty node group, skipping verification")
3043       return True
3044
3045     self.bad = False
3046     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3047     verbose = self.op.verbose
3048     self._feedback_fn = feedback_fn
3049
3050     vg_name = self.cfg.GetVGName()
3051     drbd_helper = self.cfg.GetDRBDHelper()
3052     cluster = self.cfg.GetClusterInfo()
3053     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3054     hypervisors = cluster.enabled_hypervisors
3055     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3056
3057     i_non_redundant = [] # Non redundant instances
3058     i_non_a_balanced = [] # Non auto-balanced instances
3059     i_offline = 0 # Count of offline instances
3060     n_offline = 0 # Count of offline nodes
3061     n_drained = 0 # Count of nodes being drained
3062     node_vol_should = {}
3063
3064     # FIXME: verify OS list
3065
3066     # File verification
3067     filemap = _ComputeAncillaryFiles(cluster, False)
3068
3069     # do local checksums
3070     master_node = self.master_node = self.cfg.GetMasterNode()
3071     master_ip = self.cfg.GetMasterIP()
3072
3073     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3074
3075     user_scripts = []
3076     if self.cfg.GetUseExternalMipScript():
3077       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3078
3079     node_verify_param = {
3080       constants.NV_FILELIST:
3081         utils.UniqueSequence(filename
3082                              for files in filemap
3083                              for filename in files),
3084       constants.NV_NODELIST:
3085         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3086                                   self.all_node_info.values()),
3087       constants.NV_HYPERVISOR: hypervisors,
3088       constants.NV_HVPARAMS:
3089         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3090       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3091                                  for node in node_data_list
3092                                  if not node.offline],
3093       constants.NV_INSTANCELIST: hypervisors,
3094       constants.NV_VERSION: None,
3095       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3096       constants.NV_NODESETUP: None,
3097       constants.NV_TIME: None,
3098       constants.NV_MASTERIP: (master_node, master_ip),
3099       constants.NV_OSLIST: None,
3100       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3101       constants.NV_USERSCRIPTS: user_scripts,
3102       }
3103
3104     if vg_name is not None:
3105       node_verify_param[constants.NV_VGLIST] = None
3106       node_verify_param[constants.NV_LVLIST] = vg_name
3107       node_verify_param[constants.NV_PVLIST] = [vg_name]
3108       node_verify_param[constants.NV_DRBDLIST] = None
3109
3110     if drbd_helper:
3111       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3112
3113     # bridge checks
3114     # FIXME: this needs to be changed per node-group, not cluster-wide
3115     bridges = set()
3116     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3117     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3118       bridges.add(default_nicpp[constants.NIC_LINK])
3119     for instance in self.my_inst_info.values():
3120       for nic in instance.nics:
3121         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3122         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3123           bridges.add(full_nic[constants.NIC_LINK])
3124
3125     if bridges:
3126       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3127
3128     # Build our expected cluster state
3129     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3130                                                  name=node.name,
3131                                                  vm_capable=node.vm_capable))
3132                       for node in node_data_list)
3133
3134     # Gather OOB paths
3135     oob_paths = []
3136     for node in self.all_node_info.values():
3137       path = _SupportsOob(self.cfg, node)
3138       if path and path not in oob_paths:
3139         oob_paths.append(path)
3140
3141     if oob_paths:
3142       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3143
3144     for instance in self.my_inst_names:
3145       inst_config = self.my_inst_info[instance]
3146
3147       for nname in inst_config.all_nodes:
3148         if nname not in node_image:
3149           gnode = self.NodeImage(name=nname)
3150           gnode.ghost = (nname not in self.all_node_info)
3151           node_image[nname] = gnode
3152
3153       inst_config.MapLVsByNode(node_vol_should)
3154
3155       pnode = inst_config.primary_node
3156       node_image[pnode].pinst.append(instance)
3157
3158       for snode in inst_config.secondary_nodes:
3159         nimg = node_image[snode]
3160         nimg.sinst.append(instance)
3161         if pnode not in nimg.sbp:
3162           nimg.sbp[pnode] = []
3163         nimg.sbp[pnode].append(instance)
3164
3165     # At this point, we have the in-memory data structures complete,
3166     # except for the runtime information, which we'll gather next
3167
3168     # Due to the way our RPC system works, exact response times cannot be
3169     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3170     # time before and after executing the request, we can at least have a time
3171     # window.
3172     nvinfo_starttime = time.time()
3173     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3174                                            node_verify_param,
3175                                            self.cfg.GetClusterName())
3176     nvinfo_endtime = time.time()
3177
3178     if self.extra_lv_nodes and vg_name is not None:
3179       extra_lv_nvinfo = \
3180           self.rpc.call_node_verify(self.extra_lv_nodes,
3181                                     {constants.NV_LVLIST: vg_name},
3182                                     self.cfg.GetClusterName())
3183     else:
3184       extra_lv_nvinfo = {}
3185
3186     all_drbd_map = self.cfg.ComputeDRBDMap()
3187
3188     feedback_fn("* Gathering disk information (%s nodes)" %
3189                 len(self.my_node_names))
3190     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3191                                      self.my_inst_info)
3192
3193     feedback_fn("* Verifying configuration file consistency")
3194
3195     # If not all nodes are being checked, we need to make sure the master node
3196     # and a non-checked vm_capable node are in the list.
3197     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3198     if absent_nodes:
3199       vf_nvinfo = all_nvinfo.copy()
3200       vf_node_info = list(self.my_node_info.values())
3201       additional_nodes = []
3202       if master_node not in self.my_node_info:
3203         additional_nodes.append(master_node)
3204         vf_node_info.append(self.all_node_info[master_node])
3205       # Add the first vm_capable node we find which is not included
3206       for node in absent_nodes:
3207         nodeinfo = self.all_node_info[node]
3208         if nodeinfo.vm_capable and not nodeinfo.offline:
3209           additional_nodes.append(node)
3210           vf_node_info.append(self.all_node_info[node])
3211           break
3212       key = constants.NV_FILELIST
3213       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3214                                                  {key: node_verify_param[key]},
3215                                                  self.cfg.GetClusterName()))
3216     else:
3217       vf_nvinfo = all_nvinfo
3218       vf_node_info = self.my_node_info.values()
3219
3220     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3221
3222     feedback_fn("* Verifying node status")
3223
3224     refos_img = None
3225
3226     for node_i in node_data_list:
3227       node = node_i.name
3228       nimg = node_image[node]
3229
3230       if node_i.offline:
3231         if verbose:
3232           feedback_fn("* Skipping offline node %s" % (node,))
3233         n_offline += 1
3234         continue
3235
3236       if node == master_node:
3237         ntype = "master"
3238       elif node_i.master_candidate:
3239         ntype = "master candidate"
3240       elif node_i.drained:
3241         ntype = "drained"
3242         n_drained += 1
3243       else:
3244         ntype = "regular"
3245       if verbose:
3246         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3247
3248       msg = all_nvinfo[node].fail_msg
3249       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3250                msg)
3251       if msg:
3252         nimg.rpc_fail = True
3253         continue
3254
3255       nresult = all_nvinfo[node].payload
3256
3257       nimg.call_ok = self._VerifyNode(node_i, nresult)
3258       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3259       self._VerifyNodeNetwork(node_i, nresult)
3260       self._VerifyNodeUserScripts(node_i, nresult)
3261       self._VerifyOob(node_i, nresult)
3262
3263       if nimg.vm_capable:
3264         self._VerifyNodeLVM(node_i, nresult, vg_name)
3265         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3266                              all_drbd_map)
3267
3268         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3269         self._UpdateNodeInstances(node_i, nresult, nimg)
3270         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3271         self._UpdateNodeOS(node_i, nresult, nimg)
3272
3273         if not nimg.os_fail:
3274           if refos_img is None:
3275             refos_img = nimg
3276           self._VerifyNodeOS(node_i, nimg, refos_img)
3277         self._VerifyNodeBridges(node_i, nresult, bridges)
3278
3279         # Check whether all running instancies are primary for the node. (This
3280         # can no longer be done from _VerifyInstance below, since some of the
3281         # wrong instances could be from other node groups.)
3282         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3283
3284         for inst in non_primary_inst:
3285           # FIXME: investigate best way to handle offline insts
3286           if inst.admin_state == constants.ADMINST_OFFLINE:
3287             if verbose:
3288               feedback_fn("* Skipping offline instance %s" % inst.name)
3289             i_offline += 1
3290             continue
3291           test = inst in self.all_inst_info
3292           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3293                    "instance should not run on node %s", node_i.name)
3294           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3295                    "node is running unknown instance %s", inst)
3296
3297     for node, result in extra_lv_nvinfo.items():
3298       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3299                               node_image[node], vg_name)
3300
3301     feedback_fn("* Verifying instance status")
3302     for instance in self.my_inst_names:
3303       if verbose:
3304         feedback_fn("* Verifying instance %s" % instance)
3305       inst_config = self.my_inst_info[instance]
3306       self._VerifyInstance(instance, inst_config, node_image,
3307                            instdisk[instance])
3308       inst_nodes_offline = []
3309
3310       pnode = inst_config.primary_node
3311       pnode_img = node_image[pnode]
3312       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3313                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3314                " primary node failed", instance)
3315
3316       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3317                pnode_img.offline,
3318                constants.CV_EINSTANCEBADNODE, instance,
3319                "instance is marked as running and lives on offline node %s",
3320                inst_config.primary_node)
3321
3322       # If the instance is non-redundant we cannot survive losing its primary
3323       # node, so we are not N+1 compliant. On the other hand we have no disk
3324       # templates with more than one secondary so that situation is not well
3325       # supported either.
3326       # FIXME: does not support file-backed instances
3327       if not inst_config.secondary_nodes:
3328         i_non_redundant.append(instance)
3329
3330       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3331                constants.CV_EINSTANCELAYOUT,
3332                instance, "instance has multiple secondary nodes: %s",
3333                utils.CommaJoin(inst_config.secondary_nodes),
3334                code=self.ETYPE_WARNING)
3335
3336       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3337         pnode = inst_config.primary_node
3338         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3339         instance_groups = {}
3340
3341         for node in instance_nodes:
3342           instance_groups.setdefault(self.all_node_info[node].group,
3343                                      []).append(node)
3344
3345         pretty_list = [
3346           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3347           # Sort so that we always list the primary node first.
3348           for group, nodes in sorted(instance_groups.items(),
3349                                      key=lambda (_, nodes): pnode in nodes,
3350                                      reverse=True)]
3351
3352         self._ErrorIf(len(instance_groups) > 1,
3353                       constants.CV_EINSTANCESPLITGROUPS,
3354                       instance, "instance has primary and secondary nodes in"
3355                       " different groups: %s", utils.CommaJoin(pretty_list),
3356                       code=self.ETYPE_WARNING)
3357
3358       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3359         i_non_a_balanced.append(instance)
3360
3361       for snode in inst_config.secondary_nodes:
3362         s_img = node_image[snode]
3363         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3364                  snode, "instance %s, connection to secondary node failed",
3365                  instance)
3366
3367         if s_img.offline:
3368           inst_nodes_offline.append(snode)
3369
3370       # warn that the instance lives on offline nodes
3371       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3372                "instance has offline secondary node(s) %s",
3373                utils.CommaJoin(inst_nodes_offline))
3374       # ... or ghost/non-vm_capable nodes
3375       for node in inst_config.all_nodes:
3376         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3377                  instance, "instance lives on ghost node %s", node)
3378         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3379                  instance, "instance lives on non-vm_capable node %s", node)
3380
3381     feedback_fn("* Verifying orphan volumes")
3382     reserved = utils.FieldSet(*cluster.reserved_lvs)
3383
3384     # We will get spurious "unknown volume" warnings if any node of this group
3385     # is secondary for an instance whose primary is in another group. To avoid
3386     # them, we find these instances and add their volumes to node_vol_should.
3387     for inst in self.all_inst_info.values():
3388       for secondary in inst.secondary_nodes:
3389         if (secondary in self.my_node_info
3390             and inst.name not in self.my_inst_info):
3391           inst.MapLVsByNode(node_vol_should)
3392           break
3393
3394     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3395
3396     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3397       feedback_fn("* Verifying N+1 Memory redundancy")
3398       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3399
3400     feedback_fn("* Other Notes")
3401     if i_non_redundant:
3402       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3403                   % len(i_non_redundant))
3404
3405     if i_non_a_balanced:
3406       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3407                   % len(i_non_a_balanced))
3408
3409     if i_offline:
3410       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3411
3412     if n_offline:
3413       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3414
3415     if n_drained:
3416       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3417
3418     return not self.bad
3419
3420   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3421     """Analyze the post-hooks' result
3422
3423     This method analyses the hook result, handles it, and sends some
3424     nicely-formatted feedback back to the user.
3425
3426     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3427         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3428     @param hooks_results: the results of the multi-node hooks rpc call
3429     @param feedback_fn: function used send feedback back to the caller
3430     @param lu_result: previous Exec result
3431     @return: the new Exec result, based on the previous result
3432         and hook results
3433
3434     """
3435     # We only really run POST phase hooks, only for non-empty groups,
3436     # and are only interested in their results
3437     if not self.my_node_names:
3438       # empty node group
3439       pass
3440     elif phase == constants.HOOKS_PHASE_POST:
3441       # Used to change hooks' output to proper indentation
3442       feedback_fn("* Hooks Results")
3443       assert hooks_results, "invalid result from hooks"
3444
3445       for node_name in hooks_results:
3446         res = hooks_results[node_name]
3447         msg = res.fail_msg
3448         test = msg and not res.offline
3449         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3450                       "Communication failure in hooks execution: %s", msg)
3451         if res.offline or msg:
3452           # No need to investigate payload if node is offline or gave
3453           # an error.
3454           continue
3455         for script, hkr, output in res.payload:
3456           test = hkr == constants.HKR_FAIL
3457           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3458                         "Script %s failed, output:", script)
3459           if test:
3460             output = self._HOOKS_INDENT_RE.sub("      ", output)
3461             feedback_fn("%s" % output)
3462             lu_result = False
3463
3464     return lu_result
3465
3466
3467 class LUClusterVerifyDisks(NoHooksLU):
3468   """Verifies the cluster disks status.
3469
3470   """
3471   REQ_BGL = False
3472
3473   def ExpandNames(self):
3474     self.share_locks = _ShareAll()
3475     self.needed_locks = {
3476       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3477       }
3478
3479   def Exec(self, feedback_fn):
3480     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3481
3482     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3483     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3484                            for group in group_names])
3485
3486
3487 class LUGroupVerifyDisks(NoHooksLU):
3488   """Verifies the status of all disks in a node group.
3489
3490   """
3491   REQ_BGL = False
3492
3493   def ExpandNames(self):
3494     # Raises errors.OpPrereqError on its own if group can't be found
3495     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3496
3497     self.share_locks = _ShareAll()
3498     self.needed_locks = {
3499       locking.LEVEL_INSTANCE: [],
3500       locking.LEVEL_NODEGROUP: [],
3501       locking.LEVEL_NODE: [],
3502       }
3503
3504   def DeclareLocks(self, level):
3505     if level == locking.LEVEL_INSTANCE:
3506       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3507
3508       # Lock instances optimistically, needs verification once node and group
3509       # locks have been acquired
3510       self.needed_locks[locking.LEVEL_INSTANCE] = \
3511         self.cfg.GetNodeGroupInstances(self.group_uuid)
3512
3513     elif level == locking.LEVEL_NODEGROUP:
3514       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3515
3516       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3517         set([self.group_uuid] +
3518             # Lock all groups used by instances optimistically; this requires
3519             # going via the node before it's locked, requiring verification
3520             # later on
3521             [group_uuid
3522              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3523              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3524
3525     elif level == locking.LEVEL_NODE:
3526       # This will only lock the nodes in the group to be verified which contain
3527       # actual instances
3528       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3529       self._LockInstancesNodes()
3530
3531       # Lock all nodes in group to be verified
3532       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3533       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3534       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3535
3536   def CheckPrereq(self):
3537     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3538     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3539     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3540
3541     assert self.group_uuid in owned_groups
3542
3543     # Check if locked instances are still correct
3544     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3545
3546     # Get instance information
3547     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3548
3549     # Check if node groups for locked instances are still correct
3550     _CheckInstancesNodeGroups(self.cfg, self.instances,
3551                               owned_groups, owned_nodes, self.group_uuid)
3552
3553   def Exec(self, feedback_fn):
3554     """Verify integrity of cluster disks.
3555
3556     @rtype: tuple of three items
3557     @return: a tuple of (dict of node-to-node_error, list of instances
3558         which need activate-disks, dict of instance: (node, volume) for
3559         missing volumes
3560
3561     """
3562     res_nodes = {}
3563     res_instances = set()
3564     res_missing = {}
3565
3566     nv_dict = _MapInstanceDisksToNodes([inst
3567             for inst in self.instances.values()
3568             if inst.admin_state == constants.ADMINST_UP])
3569
3570     if nv_dict:
3571       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3572                              set(self.cfg.GetVmCapableNodeList()))
3573
3574       node_lvs = self.rpc.call_lv_list(nodes, [])
3575
3576       for (node, node_res) in node_lvs.items():
3577         if node_res.offline:
3578           continue
3579
3580         msg = node_res.fail_msg
3581         if msg:
3582           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3583           res_nodes[node] = msg
3584           continue
3585
3586         for lv_name, (_, _, lv_online) in node_res.payload.items():
3587           inst = nv_dict.pop((node, lv_name), None)
3588           if not (lv_online or inst is None):
3589             res_instances.add(inst)
3590
3591       # any leftover items in nv_dict are missing LVs, let's arrange the data
3592       # better
3593       for key, inst in nv_dict.iteritems():
3594         res_missing.setdefault(inst, []).append(list(key))
3595
3596     return (res_nodes, list(res_instances), res_missing)
3597
3598
3599 class LUClusterRepairDiskSizes(NoHooksLU):
3600   """Verifies the cluster disks sizes.
3601
3602   """
3603   REQ_BGL = False
3604
3605   def ExpandNames(self):
3606     if self.op.instances:
3607       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3608       self.needed_locks = {
3609         locking.LEVEL_NODE_RES: [],
3610         locking.LEVEL_INSTANCE: self.wanted_names,
3611         }
3612       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3613     else:
3614       self.wanted_names = None
3615       self.needed_locks = {
3616         locking.LEVEL_NODE_RES: locking.ALL_SET,
3617         locking.LEVEL_INSTANCE: locking.ALL_SET,
3618         }
3619     self.share_locks = {
3620       locking.LEVEL_NODE_RES: 1,
3621       locking.LEVEL_INSTANCE: 0,
3622       }
3623
3624   def DeclareLocks(self, level):
3625     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3626       self._LockInstancesNodes(primary_only=True, level=level)
3627
3628   def CheckPrereq(self):
3629     """Check prerequisites.
3630
3631     This only checks the optional instance list against the existing names.
3632
3633     """
3634     if self.wanted_names is None:
3635       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3636
3637     self.wanted_instances = \
3638         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3639
3640   def _EnsureChildSizes(self, disk):
3641     """Ensure children of the disk have the needed disk size.
3642
3643     This is valid mainly for DRBD8 and fixes an issue where the
3644     children have smaller disk size.
3645
3646     @param disk: an L{ganeti.objects.Disk} object
3647
3648     """
3649     if disk.dev_type == constants.LD_DRBD8:
3650       assert disk.children, "Empty children for DRBD8?"
3651       fchild = disk.children[0]
3652       mismatch = fchild.size < disk.size
3653       if mismatch:
3654         self.LogInfo("Child disk has size %d, parent %d, fixing",
3655                      fchild.size, disk.size)
3656         fchild.size = disk.size
3657
3658       # and we recurse on this child only, not on the metadev
3659       return self._EnsureChildSizes(fchild) or mismatch
3660     else:
3661       return False
3662
3663   def Exec(self, feedback_fn):
3664     """Verify the size of cluster disks.
3665
3666     """
3667     # TODO: check child disks too
3668     # TODO: check differences in size between primary/secondary nodes
3669     per_node_disks = {}
3670     for instance in self.wanted_instances:
3671       pnode = instance.primary_node
3672       if pnode not in per_node_disks:
3673         per_node_disks[pnode] = []
3674       for idx, disk in enumerate(instance.disks):
3675         per_node_disks[pnode].append((instance, idx, disk))
3676
3677     assert not (frozenset(per_node_disks.keys()) -
3678                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3679       "Not owning correct locks"
3680     assert not self.owned_locks(locking.LEVEL_NODE)
3681
3682     changed = []
3683     for node, dskl in per_node_disks.items():
3684       newl = [v[2].Copy() for v in dskl]
3685       for dsk in newl:
3686         self.cfg.SetDiskID(dsk, node)
3687       result = self.rpc.call_blockdev_getsize(node, newl)
3688       if result.fail_msg:
3689         self.LogWarning("Failure in blockdev_getsize call to node"
3690                         " %s, ignoring", node)
3691         continue
3692       if len(result.payload) != len(dskl):
3693         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3694                         " result.payload=%s", node, len(dskl), result.payload)
3695         self.LogWarning("Invalid result from node %s, ignoring node results",
3696                         node)
3697         continue
3698       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3699         if size is None:
3700           self.LogWarning("Disk %d of instance %s did not return size"
3701                           " information, ignoring", idx, instance.name)
3702           continue
3703         if not isinstance(size, (int, long)):
3704           self.LogWarning("Disk %d of instance %s did not return valid"
3705                           " size information, ignoring", idx, instance.name)
3706           continue
3707         size = size >> 20
3708         if size != disk.size:
3709           self.LogInfo("Disk %d of instance %s has mismatched size,"
3710                        " correcting: recorded %d, actual %d", idx,
3711                        instance.name, disk.size, size)
3712           disk.size = size
3713           self.cfg.Update(instance, feedback_fn)
3714           changed.append((instance.name, idx, size))
3715         if self._EnsureChildSizes(disk):
3716           self.cfg.Update(instance, feedback_fn)
3717           changed.append((instance.name, idx, disk.size))
3718     return changed
3719
3720
3721 class LUClusterRename(LogicalUnit):
3722   """Rename the cluster.
3723
3724   """
3725   HPATH = "cluster-rename"
3726   HTYPE = constants.HTYPE_CLUSTER
3727
3728   def BuildHooksEnv(self):
3729     """Build hooks env.
3730
3731     """
3732     return {
3733       "OP_TARGET": self.cfg.GetClusterName(),
3734       "NEW_NAME": self.op.name,
3735       }
3736
3737   def BuildHooksNodes(self):
3738     """Build hooks nodes.
3739
3740     """
3741     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3742
3743   def CheckPrereq(self):
3744     """Verify that the passed name is a valid one.
3745
3746     """
3747     hostname = netutils.GetHostname(name=self.op.name,
3748                                     family=self.cfg.GetPrimaryIPFamily())
3749
3750     new_name = hostname.name
3751     self.ip = new_ip = hostname.ip
3752     old_name = self.cfg.GetClusterName()
3753     old_ip = self.cfg.GetMasterIP()
3754     if new_name == old_name and new_ip == old_ip:
3755       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3756                                  " cluster has changed",
3757                                  errors.ECODE_INVAL)
3758     if new_ip != old_ip:
3759       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3760         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3761                                    " reachable on the network" %
3762                                    new_ip, errors.ECODE_NOTUNIQUE)
3763
3764     self.op.name = new_name
3765
3766   def Exec(self, feedback_fn):
3767     """Rename the cluster.
3768
3769     """
3770     clustername = self.op.name
3771     new_ip = self.ip
3772
3773     # shutdown the master IP
3774     master_params = self.cfg.GetMasterNetworkParameters()
3775     ems = self.cfg.GetUseExternalMipScript()
3776     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3777                                                      master_params, ems)
3778     result.Raise("Could not disable the master role")
3779
3780     try:
3781       cluster = self.cfg.GetClusterInfo()
3782       cluster.cluster_name = clustername
3783       cluster.master_ip = new_ip
3784       self.cfg.Update(cluster, feedback_fn)
3785
3786       # update the known hosts file
3787       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3788       node_list = self.cfg.GetOnlineNodeList()
3789       try:
3790         node_list.remove(master_params.name)
3791       except ValueError:
3792         pass
3793       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3794     finally:
3795       master_params.ip = new_ip
3796       result = self.rpc.call_node_activate_master_ip(master_params.name,
3797                                                      master_params, ems)
3798       msg = result.fail_msg
3799       if msg:
3800         self.LogWarning("Could not re-enable the master role on"
3801                         " the master, please restart manually: %s", msg)
3802
3803     return clustername
3804
3805
3806 def _ValidateNetmask(cfg, netmask):
3807   """Checks if a netmask is valid.
3808
3809   @type cfg: L{config.ConfigWriter}
3810   @param cfg: The cluster configuration
3811   @type netmask: int
3812   @param netmask: the netmask to be verified
3813   @raise errors.OpPrereqError: if the validation fails
3814
3815   """
3816   ip_family = cfg.GetPrimaryIPFamily()
3817   try:
3818     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3819   except errors.ProgrammerError:
3820     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3821                                ip_family)
3822   if not ipcls.ValidateNetmask(netmask):
3823     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3824                                 (netmask))
3825
3826
3827 class LUClusterSetParams(LogicalUnit):
3828   """Change the parameters of the cluster.
3829
3830   """
3831   HPATH = "cluster-modify"
3832   HTYPE = constants.HTYPE_CLUSTER
3833   REQ_BGL = False
3834
3835   def CheckArguments(self):
3836     """Check parameters
3837
3838     """
3839     if self.op.uid_pool:
3840       uidpool.CheckUidPool(self.op.uid_pool)
3841
3842     if self.op.add_uids:
3843       uidpool.CheckUidPool(self.op.add_uids)
3844
3845     if self.op.remove_uids:
3846       uidpool.CheckUidPool(self.op.remove_uids)
3847
3848     if self.op.master_netmask is not None:
3849       _ValidateNetmask(self.cfg, self.op.master_netmask)
3850
3851     if self.op.diskparams:
3852       for dt_params in self.op.diskparams.values():
3853         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3854
3855   def ExpandNames(self):
3856     # FIXME: in the future maybe other cluster params won't require checking on
3857     # all nodes to be modified.
3858     self.needed_locks = {
3859       locking.LEVEL_NODE: locking.ALL_SET,
3860       locking.LEVEL_INSTANCE: locking.ALL_SET,
3861       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3862     }
3863     self.share_locks = {
3864         locking.LEVEL_NODE: 1,
3865         locking.LEVEL_INSTANCE: 1,
3866         locking.LEVEL_NODEGROUP: 1,
3867     }
3868
3869   def BuildHooksEnv(self):
3870     """Build hooks env.
3871
3872     """
3873     return {
3874       "OP_TARGET": self.cfg.GetClusterName(),
3875       "NEW_VG_NAME": self.op.vg_name,
3876       }
3877
3878   def BuildHooksNodes(self):
3879     """Build hooks nodes.
3880
3881     """
3882     mn = self.cfg.GetMasterNode()
3883     return ([mn], [mn])
3884
3885   def CheckPrereq(self):
3886     """Check prerequisites.
3887
3888     This checks whether the given params don't conflict and
3889     if the given volume group is valid.
3890
3891     """
3892     if self.op.vg_name is not None and not self.op.vg_name:
3893       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3894         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3895                                    " instances exist", errors.ECODE_INVAL)
3896
3897     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3898       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3899         raise errors.OpPrereqError("Cannot disable drbd helper while"
3900                                    " drbd-based instances exist",
3901                                    errors.ECODE_INVAL)
3902
3903     node_list = self.owned_locks(locking.LEVEL_NODE)
3904
3905     # if vg_name not None, checks given volume group on all nodes
3906     if self.op.vg_name:
3907       vglist = self.rpc.call_vg_list(node_list)
3908       for node in node_list:
3909         msg = vglist[node].fail_msg
3910         if msg:
3911           # ignoring down node
3912           self.LogWarning("Error while gathering data on node %s"
3913                           " (ignoring node): %s", node, msg)
3914           continue
3915         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3916                                               self.op.vg_name,
3917                                               constants.MIN_VG_SIZE)
3918         if vgstatus:
3919           raise errors.OpPrereqError("Error on node '%s': %s" %
3920                                      (node, vgstatus), errors.ECODE_ENVIRON)
3921
3922     if self.op.drbd_helper:
3923       # checks given drbd helper on all nodes
3924       helpers = self.rpc.call_drbd_helper(node_list)
3925       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3926         if ninfo.offline:
3927           self.LogInfo("Not checking drbd helper on offline node %s", node)
3928           continue
3929         msg = helpers[node].fail_msg
3930         if msg:
3931           raise errors.OpPrereqError("Error checking drbd helper on node"
3932                                      " '%s': %s" % (node, msg),
3933                                      errors.ECODE_ENVIRON)
3934         node_helper = helpers[node].payload
3935         if node_helper != self.op.drbd_helper:
3936           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3937                                      (node, node_helper), errors.ECODE_ENVIRON)
3938
3939     self.cluster = cluster = self.cfg.GetClusterInfo()
3940     # validate params changes
3941     if self.op.beparams:
3942       objects.UpgradeBeParams(self.op.beparams)
3943       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3944       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3945
3946     if self.op.ndparams:
3947       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3948       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3949
3950       # TODO: we need a more general way to handle resetting
3951       # cluster-level parameters to default values
3952       if self.new_ndparams["oob_program"] == "":
3953         self.new_ndparams["oob_program"] = \
3954             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3955
3956     if self.op.hv_state:
3957       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3958                                             self.cluster.hv_state_static)
3959       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3960                                for hv, values in new_hv_state.items())
3961
3962     if self.op.disk_state:
3963       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3964                                                 self.cluster.disk_state_static)
3965       self.new_disk_state = \
3966         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3967                             for name, values in svalues.items()))
3968              for storage, svalues in new_disk_state.items())
3969
3970     if self.op.ipolicy:
3971       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3972                                             group_policy=False)
3973
3974       all_instances = self.cfg.GetAllInstancesInfo().values()
3975       violations = set()
3976       for group in self.cfg.GetAllNodeGroupsInfo().values():
3977         instances = frozenset([inst for inst in all_instances
3978                                if compat.any(node in group.members
3979                                              for node in inst.all_nodes)])
3980         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3981         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3982                                                                    group),
3983                                             new_ipolicy, instances)
3984         if new:
3985           violations.update(new)
3986
3987       if violations:
3988         self.LogWarning("After the ipolicy change the following instances"
3989                         " violate them: %s",
3990                         utils.CommaJoin(violations))
3991
3992     if self.op.nicparams:
3993       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3994       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3995       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3996       nic_errors = []
3997
3998       # check all instances for consistency
3999       for instance in self.cfg.GetAllInstancesInfo().values():
4000         for nic_idx, nic in enumerate(instance.nics):
4001           params_copy = copy.deepcopy(nic.nicparams)
4002           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4003
4004           # check parameter syntax
4005           try:
4006             objects.NIC.CheckParameterSyntax(params_filled)
4007           except errors.ConfigurationError, err:
4008             nic_errors.append("Instance %s, nic/%d: %s" %
4009                               (instance.name, nic_idx, err))
4010
4011           # if we're moving instances to routed, check that they have an ip
4012           target_mode = params_filled[constants.NIC_MODE]
4013           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4014             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4015                               " address" % (instance.name, nic_idx))
4016       if nic_errors:
4017         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4018                                    "\n".join(nic_errors))
4019
4020     # hypervisor list/parameters
4021     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4022     if self.op.hvparams:
4023       for hv_name, hv_dict in self.op.hvparams.items():
4024         if hv_name not in self.new_hvparams:
4025           self.new_hvparams[hv_name] = hv_dict
4026         else:
4027           self.new_hvparams[hv_name].update(hv_dict)
4028
4029     # disk template parameters
4030     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4031     if self.op.diskparams:
4032       for dt_name, dt_params in self.op.diskparams.items():
4033         if dt_name not in self.op.diskparams:
4034           self.new_diskparams[dt_name] = dt_params
4035         else:
4036           self.new_diskparams[dt_name].update(dt_params)
4037
4038     # os hypervisor parameters
4039     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4040     if self.op.os_hvp:
4041       for os_name, hvs in self.op.os_hvp.items():
4042         if os_name not in self.new_os_hvp:
4043           self.new_os_hvp[os_name] = hvs
4044         else:
4045           for hv_name, hv_dict in hvs.items():
4046             if hv_name not in self.new_os_hvp[os_name]:
4047               self.new_os_hvp[os_name][hv_name] = hv_dict
4048             else:
4049               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4050
4051     # os parameters
4052     self.new_osp = objects.FillDict(cluster.osparams, {})
4053     if self.op.osparams:
4054       for os_name, osp in self.op.osparams.items():
4055         if os_name not in self.new_osp:
4056           self.new_osp[os_name] = {}
4057
4058         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4059                                                   use_none=True)
4060
4061         if not self.new_osp[os_name]:
4062           # we removed all parameters
4063           del self.new_osp[os_name]
4064         else:
4065           # check the parameter validity (remote check)
4066           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4067                          os_name, self.new_osp[os_name])
4068
4069     # changes to the hypervisor list
4070     if self.op.enabled_hypervisors is not None:
4071       self.hv_list = self.op.enabled_hypervisors
4072       for hv in self.hv_list:
4073         # if the hypervisor doesn't already exist in the cluster
4074         # hvparams, we initialize it to empty, and then (in both
4075         # cases) we make sure to fill the defaults, as we might not
4076         # have a complete defaults list if the hypervisor wasn't
4077         # enabled before
4078         if hv not in new_hvp:
4079           new_hvp[hv] = {}
4080         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4081         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4082     else:
4083       self.hv_list = cluster.enabled_hypervisors
4084
4085     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4086       # either the enabled list has changed, or the parameters have, validate
4087       for hv_name, hv_params in self.new_hvparams.items():
4088         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4089             (self.op.enabled_hypervisors and
4090              hv_name in self.op.enabled_hypervisors)):
4091           # either this is a new hypervisor, or its parameters have changed
4092           hv_class = hypervisor.GetHypervisor(hv_name)
4093           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4094           hv_class.CheckParameterSyntax(hv_params)
4095           _CheckHVParams(self, node_list, hv_name, hv_params)
4096
4097     if self.op.os_hvp:
4098       # no need to check any newly-enabled hypervisors, since the
4099       # defaults have already been checked in the above code-block
4100       for os_name, os_hvp in self.new_os_hvp.items():
4101         for hv_name, hv_params in os_hvp.items():
4102           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4103           # we need to fill in the new os_hvp on top of the actual hv_p
4104           cluster_defaults = self.new_hvparams.get(hv_name, {})
4105           new_osp = objects.FillDict(cluster_defaults, hv_params)
4106           hv_class = hypervisor.GetHypervisor(hv_name)
4107           hv_class.CheckParameterSyntax(new_osp)
4108           _CheckHVParams(self, node_list, hv_name, new_osp)
4109
4110     if self.op.default_iallocator:
4111       alloc_script = utils.FindFile(self.op.default_iallocator,
4112                                     constants.IALLOCATOR_SEARCH_PATH,
4113                                     os.path.isfile)
4114       if alloc_script is None:
4115         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4116                                    " specified" % self.op.default_iallocator,
4117                                    errors.ECODE_INVAL)
4118
4119   def Exec(self, feedback_fn):
4120     """Change the parameters of the cluster.
4121
4122     """
4123     if self.op.vg_name is not None:
4124       new_volume = self.op.vg_name
4125       if not new_volume:
4126         new_volume = None
4127       if new_volume != self.cfg.GetVGName():
4128         self.cfg.SetVGName(new_volume)
4129       else:
4130         feedback_fn("Cluster LVM configuration already in desired"
4131                     " state, not changing")
4132     if self.op.drbd_helper is not None:
4133       new_helper = self.op.drbd_helper
4134       if not new_helper:
4135         new_helper = None
4136       if new_helper != self.cfg.GetDRBDHelper():
4137         self.cfg.SetDRBDHelper(new_helper)
4138       else:
4139         feedback_fn("Cluster DRBD helper already in desired state,"
4140                     " not changing")
4141     if self.op.hvparams:
4142       self.cluster.hvparams = self.new_hvparams
4143     if self.op.os_hvp:
4144       self.cluster.os_hvp = self.new_os_hvp
4145     if self.op.enabled_hypervisors is not None:
4146       self.cluster.hvparams = self.new_hvparams
4147       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4148     if self.op.beparams:
4149       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4150     if self.op.nicparams:
4151       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4152     if self.op.ipolicy:
4153       self.cluster.ipolicy = self.new_ipolicy
4154     if self.op.osparams:
4155       self.cluster.osparams = self.new_osp
4156     if self.op.ndparams:
4157       self.cluster.ndparams = self.new_ndparams
4158     if self.op.diskparams:
4159       self.cluster.diskparams = self.new_diskparams
4160     if self.op.hv_state:
4161       self.cluster.hv_state_static = self.new_hv_state
4162     if self.op.disk_state:
4163       self.cluster.disk_state_static = self.new_disk_state
4164
4165     if self.op.candidate_pool_size is not None:
4166       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4167       # we need to update the pool size here, otherwise the save will fail
4168       _AdjustCandidatePool(self, [])
4169
4170     if self.op.maintain_node_health is not None:
4171       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4172         feedback_fn("Note: CONFD was disabled at build time, node health"
4173                     " maintenance is not useful (still enabling it)")
4174       self.cluster.maintain_node_health = self.op.maintain_node_health
4175
4176     if self.op.prealloc_wipe_disks is not None:
4177       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4178
4179     if self.op.add_uids is not None:
4180       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4181
4182     if self.op.remove_uids is not None:
4183       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4184
4185     if self.op.uid_pool is not None:
4186       self.cluster.uid_pool = self.op.uid_pool
4187
4188     if self.op.default_iallocator is not None:
4189       self.cluster.default_iallocator = self.op.default_iallocator
4190
4191     if self.op.reserved_lvs is not None:
4192       self.cluster.reserved_lvs = self.op.reserved_lvs
4193
4194     if self.op.use_external_mip_script is not None:
4195       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4196
4197     def helper_os(aname, mods, desc):
4198       desc += " OS list"
4199       lst = getattr(self.cluster, aname)
4200       for key, val in mods:
4201         if key == constants.DDM_ADD:
4202           if val in lst:
4203             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4204           else:
4205             lst.append(val)
4206         elif key == constants.DDM_REMOVE:
4207           if val in lst:
4208             lst.remove(val)
4209           else:
4210             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4211         else:
4212           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4213
4214     if self.op.hidden_os:
4215       helper_os("hidden_os", self.op.hidden_os, "hidden")
4216
4217     if self.op.blacklisted_os:
4218       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4219
4220     if self.op.master_netdev:
4221       master_params = self.cfg.GetMasterNetworkParameters()
4222       ems = self.cfg.GetUseExternalMipScript()
4223       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4224                   self.cluster.master_netdev)
4225       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4226                                                        master_params, ems)
4227       result.Raise("Could not disable the master ip")
4228       feedback_fn("Changing master_netdev from %s to %s" %
4229                   (master_params.netdev, self.op.master_netdev))
4230       self.cluster.master_netdev = self.op.master_netdev
4231
4232     if self.op.master_netmask:
4233       master_params = self.cfg.GetMasterNetworkParameters()
4234       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4235       result = self.rpc.call_node_change_master_netmask(master_params.name,
4236                                                         master_params.netmask,
4237                                                         self.op.master_netmask,
4238                                                         master_params.ip,
4239                                                         master_params.netdev)
4240       if result.fail_msg:
4241         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4242         feedback_fn(msg)
4243
4244       self.cluster.master_netmask = self.op.master_netmask
4245
4246     self.cfg.Update(self.cluster, feedback_fn)
4247
4248     if self.op.master_netdev:
4249       master_params = self.cfg.GetMasterNetworkParameters()
4250       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4251                   self.op.master_netdev)
4252       ems = self.cfg.GetUseExternalMipScript()
4253       result = self.rpc.call_node_activate_master_ip(master_params.name,
4254                                                      master_params, ems)
4255       if result.fail_msg:
4256         self.LogWarning("Could not re-enable the master ip on"
4257                         " the master, please restart manually: %s",
4258                         result.fail_msg)
4259
4260
4261 def _UploadHelper(lu, nodes, fname):
4262   """Helper for uploading a file and showing warnings.
4263
4264   """
4265   if os.path.exists(fname):
4266     result = lu.rpc.call_upload_file(nodes, fname)
4267     for to_node, to_result in result.items():
4268       msg = to_result.fail_msg
4269       if msg:
4270         msg = ("Copy of file %s to node %s failed: %s" %
4271                (fname, to_node, msg))
4272         lu.proc.LogWarning(msg)
4273
4274
4275 def _ComputeAncillaryFiles(cluster, redist):
4276   """Compute files external to Ganeti which need to be consistent.
4277
4278   @type redist: boolean
4279   @param redist: Whether to include files which need to be redistributed
4280
4281   """
4282   # Compute files for all nodes
4283   files_all = set([
4284     constants.SSH_KNOWN_HOSTS_FILE,
4285     constants.CONFD_HMAC_KEY,
4286     constants.CLUSTER_DOMAIN_SECRET_FILE,
4287     constants.SPICE_CERT_FILE,
4288     constants.SPICE_CACERT_FILE,
4289     constants.RAPI_USERS_FILE,
4290     ])
4291
4292   if not redist:
4293     files_all.update(constants.ALL_CERT_FILES)
4294     files_all.update(ssconf.SimpleStore().GetFileList())
4295   else:
4296     # we need to ship at least the RAPI certificate
4297     files_all.add(constants.RAPI_CERT_FILE)
4298
4299   if cluster.modify_etc_hosts:
4300     files_all.add(constants.ETC_HOSTS)
4301
4302   # Files which are optional, these must:
4303   # - be present in one other category as well
4304   # - either exist or not exist on all nodes of that category (mc, vm all)
4305   files_opt = set([
4306     constants.RAPI_USERS_FILE,
4307     ])
4308
4309   # Files which should only be on master candidates
4310   files_mc = set()
4311
4312   if not redist:
4313     files_mc.add(constants.CLUSTER_CONF_FILE)
4314
4315     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4316     # replication
4317     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4318
4319   # Files which should only be on VM-capable nodes
4320   files_vm = set(filename
4321     for hv_name in cluster.enabled_hypervisors
4322     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4323
4324   files_opt |= set(filename
4325     for hv_name in cluster.enabled_hypervisors
4326     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4327
4328   # Filenames in each category must be unique
4329   all_files_set = files_all | files_mc | files_vm
4330   assert (len(all_files_set) ==
4331           sum(map(len, [files_all, files_mc, files_vm]))), \
4332          "Found file listed in more than one file list"
4333
4334   # Optional files must be present in one other category
4335   assert all_files_set.issuperset(files_opt), \
4336          "Optional file not in a different required list"
4337
4338   return (files_all, files_opt, files_mc, files_vm)
4339
4340
4341 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4342   """Distribute additional files which are part of the cluster configuration.
4343
4344   ConfigWriter takes care of distributing the config and ssconf files, but
4345   there are more files which should be distributed to all nodes. This function
4346   makes sure those are copied.
4347
4348   @param lu: calling logical unit
4349   @param additional_nodes: list of nodes not in the config to distribute to
4350   @type additional_vm: boolean
4351   @param additional_vm: whether the additional nodes are vm-capable or not
4352
4353   """
4354   # Gather target nodes
4355   cluster = lu.cfg.GetClusterInfo()
4356   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4357
4358   online_nodes = lu.cfg.GetOnlineNodeList()
4359   vm_nodes = lu.cfg.GetVmCapableNodeList()
4360
4361   if additional_nodes is not None:
4362     online_nodes.extend(additional_nodes)
4363     if additional_vm:
4364       vm_nodes.extend(additional_nodes)
4365
4366   # Never distribute to master node
4367   for nodelist in [online_nodes, vm_nodes]:
4368     if master_info.name in nodelist:
4369       nodelist.remove(master_info.name)
4370
4371   # Gather file lists
4372   (files_all, _, files_mc, files_vm) = \
4373     _ComputeAncillaryFiles(cluster, True)
4374
4375   # Never re-distribute configuration file from here
4376   assert not (constants.CLUSTER_CONF_FILE in files_all or
4377               constants.CLUSTER_CONF_FILE in files_vm)
4378   assert not files_mc, "Master candidates not handled in this function"
4379
4380   filemap = [
4381     (online_nodes, files_all),
4382     (vm_nodes, files_vm),
4383     ]
4384
4385   # Upload the files
4386   for (node_list, files) in filemap:
4387     for fname in files:
4388       _UploadHelper(lu, node_list, fname)
4389
4390
4391 class LUClusterRedistConf(NoHooksLU):
4392   """Force the redistribution of cluster configuration.
4393
4394   This is a very simple LU.
4395
4396   """
4397   REQ_BGL = False
4398
4399   def ExpandNames(self):
4400     self.needed_locks = {
4401       locking.LEVEL_NODE: locking.ALL_SET,
4402     }
4403     self.share_locks[locking.LEVEL_NODE] = 1
4404
4405   def Exec(self, feedback_fn):
4406     """Redistribute the configuration.
4407
4408     """
4409     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4410     _RedistributeAncillaryFiles(self)
4411
4412
4413 class LUClusterActivateMasterIp(NoHooksLU):
4414   """Activate the master IP on the master node.
4415
4416   """
4417   def Exec(self, feedback_fn):
4418     """Activate the master IP.
4419
4420     """
4421     master_params = self.cfg.GetMasterNetworkParameters()
4422     ems = self.cfg.GetUseExternalMipScript()
4423     result = self.rpc.call_node_activate_master_ip(master_params.name,
4424                                                    master_params, ems)
4425     result.Raise("Could not activate the master IP")
4426
4427
4428 class LUClusterDeactivateMasterIp(NoHooksLU):
4429   """Deactivate the master IP on the master node.
4430
4431   """
4432   def Exec(self, feedback_fn):
4433     """Deactivate the master IP.
4434
4435     """
4436     master_params = self.cfg.GetMasterNetworkParameters()
4437     ems = self.cfg.GetUseExternalMipScript()
4438     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4439                                                      master_params, ems)
4440     result.Raise("Could not deactivate the master IP")
4441
4442
4443 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4444   """Sleep and poll for an instance's disk to sync.
4445
4446   """
4447   if not instance.disks or disks is not None and not disks:
4448     return True
4449
4450   disks = _ExpandCheckDisks(instance, disks)
4451
4452   if not oneshot:
4453     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4454
4455   node = instance.primary_node
4456
4457   for dev in disks:
4458     lu.cfg.SetDiskID(dev, node)
4459
4460   # TODO: Convert to utils.Retry
4461
4462   retries = 0
4463   degr_retries = 10 # in seconds, as we sleep 1 second each time
4464   while True:
4465     max_time = 0
4466     done = True
4467     cumul_degraded = False
4468     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4469     msg = rstats.fail_msg
4470     if msg:
4471       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4472       retries += 1
4473       if retries >= 10:
4474         raise errors.RemoteError("Can't contact node %s for mirror data,"
4475                                  " aborting." % node)
4476       time.sleep(6)
4477       continue
4478     rstats = rstats.payload
4479     retries = 0
4480     for i, mstat in enumerate(rstats):
4481       if mstat is None:
4482         lu.LogWarning("Can't compute data for node %s/%s",
4483                            node, disks[i].iv_name)
4484         continue
4485
4486       cumul_degraded = (cumul_degraded or
4487                         (mstat.is_degraded and mstat.sync_percent is None))
4488       if mstat.sync_percent is not None:
4489         done = False
4490         if mstat.estimated_time is not None:
4491           rem_time = ("%s remaining (estimated)" %
4492                       utils.FormatSeconds(mstat.estimated_time))
4493           max_time = mstat.estimated_time
4494         else:
4495           rem_time = "no time estimate"
4496         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4497                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4498
4499     # if we're done but degraded, let's do a few small retries, to
4500     # make sure we see a stable and not transient situation; therefore
4501     # we force restart of the loop
4502     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4503       logging.info("Degraded disks found, %d retries left", degr_retries)
4504       degr_retries -= 1
4505       time.sleep(1)
4506       continue
4507
4508     if done or oneshot:
4509       break
4510
4511     time.sleep(min(60, max_time))
4512
4513   if done:
4514     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4515   return not cumul_degraded
4516
4517
4518 def _BlockdevFind(lu, node, dev, instance):
4519   """Wrapper around call_blockdev_find to annotate diskparams.
4520
4521   @param lu: A reference to the lu object
4522   @param node: The node to call out
4523   @param dev: The device to find
4524   @param instance: The instance object the device belongs to
4525   @returns The result of the rpc call
4526
4527   """
4528   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4529   return lu.rpc.call_blockdev_find(node, disk)
4530
4531
4532 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4533   """Wrapper around L{_CheckDiskConsistencyInner}.
4534
4535   """
4536   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4537   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4538                                     ldisk=ldisk)
4539
4540
4541 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4542                                ldisk=False):
4543   """Check that mirrors are not degraded.
4544
4545   @attention: The device has to be annotated already.
4546
4547   The ldisk parameter, if True, will change the test from the
4548   is_degraded attribute (which represents overall non-ok status for
4549   the device(s)) to the ldisk (representing the local storage status).
4550
4551   """
4552   lu.cfg.SetDiskID(dev, node)
4553
4554   result = True
4555
4556   if on_primary or dev.AssembleOnSecondary():
4557     rstats = lu.rpc.call_blockdev_find(node, dev)
4558     msg = rstats.fail_msg
4559     if msg:
4560       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4561       result = False
4562     elif not rstats.payload:
4563       lu.LogWarning("Can't find disk on node %s", node)
4564       result = False
4565     else:
4566       if ldisk:
4567         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4568       else:
4569         result = result and not rstats.payload.is_degraded
4570
4571   if dev.children:
4572     for child in dev.children:
4573       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4574                                                      on_primary)
4575
4576   return result
4577
4578
4579 class LUOobCommand(NoHooksLU):
4580   """Logical unit for OOB handling.
4581
4582   """
4583   REQ_BGL = False
4584   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4585
4586   def ExpandNames(self):
4587     """Gather locks we need.
4588
4589     """
4590     if self.op.node_names:
4591       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4592       lock_names = self.op.node_names
4593     else:
4594       lock_names = locking.ALL_SET
4595
4596     self.needed_locks = {
4597       locking.LEVEL_NODE: lock_names,
4598       }
4599
4600   def CheckPrereq(self):
4601     """Check prerequisites.
4602
4603     This checks:
4604      - the node exists in the configuration
4605      - OOB is supported
4606
4607     Any errors are signaled by raising errors.OpPrereqError.
4608
4609     """
4610     self.nodes = []
4611     self.master_node = self.cfg.GetMasterNode()
4612
4613     assert self.op.power_delay >= 0.0
4614
4615     if self.op.node_names:
4616       if (self.op.command in self._SKIP_MASTER and
4617           self.master_node in self.op.node_names):
4618         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4619         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4620
4621         if master_oob_handler:
4622           additional_text = ("run '%s %s %s' if you want to operate on the"
4623                              " master regardless") % (master_oob_handler,
4624                                                       self.op.command,
4625                                                       self.master_node)
4626         else:
4627           additional_text = "it does not support out-of-band operations"
4628
4629         raise errors.OpPrereqError(("Operating on the master node %s is not"
4630                                     " allowed for %s; %s") %
4631                                    (self.master_node, self.op.command,
4632                                     additional_text), errors.ECODE_INVAL)
4633     else:
4634       self.op.node_names = self.cfg.GetNodeList()
4635       if self.op.command in self._SKIP_MASTER:
4636         self.op.node_names.remove(self.master_node)
4637
4638     if self.op.command in self._SKIP_MASTER:
4639       assert self.master_node not in self.op.node_names
4640
4641     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4642       if node is None:
4643         raise errors.OpPrereqError("Node %s not found" % node_name,
4644                                    errors.ECODE_NOENT)
4645       else:
4646         self.nodes.append(node)
4647
4648       if (not self.op.ignore_status and
4649           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4650         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4651                                     " not marked offline") % node_name,
4652                                    errors.ECODE_STATE)
4653
4654   def Exec(self, feedback_fn):
4655     """Execute OOB and return result if we expect any.
4656
4657     """
4658     master_node = self.master_node
4659     ret = []
4660
4661     for idx, node in enumerate(utils.NiceSort(self.nodes,
4662                                               key=lambda node: node.name)):
4663       node_entry = [(constants.RS_NORMAL, node.name)]
4664       ret.append(node_entry)
4665
4666       oob_program = _SupportsOob(self.cfg, node)
4667
4668       if not oob_program:
4669         node_entry.append((constants.RS_UNAVAIL, None))
4670         continue
4671
4672       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4673                    self.op.command, oob_program, node.name)
4674       result = self.rpc.call_run_oob(master_node, oob_program,
4675                                      self.op.command, node.name,
4676                                      self.op.timeout)
4677
4678       if result.fail_msg:
4679         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4680                         node.name, result.fail_msg)
4681         node_entry.append((constants.RS_NODATA, None))
4682       else:
4683         try:
4684           self._CheckPayload(result)
4685         except errors.OpExecError, err:
4686           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4687                           node.name, err)
4688           node_entry.append((constants.RS_NODATA, None))
4689         else:
4690           if self.op.command == constants.OOB_HEALTH:
4691             # For health we should log important events
4692             for item, status in result.payload:
4693               if status in [constants.OOB_STATUS_WARNING,
4694                             constants.OOB_STATUS_CRITICAL]:
4695                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4696                                 item, node.name, status)
4697
4698           if self.op.command == constants.OOB_POWER_ON:
4699             node.powered = True
4700           elif self.op.command == constants.OOB_POWER_OFF:
4701             node.powered = False
4702           elif self.op.command == constants.OOB_POWER_STATUS:
4703             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4704             if powered != node.powered:
4705               logging.warning(("Recorded power state (%s) of node '%s' does not"
4706                                " match actual power state (%s)"), node.powered,
4707                               node.name, powered)
4708
4709           # For configuration changing commands we should update the node
4710           if self.op.command in (constants.OOB_POWER_ON,
4711                                  constants.OOB_POWER_OFF):
4712             self.cfg.Update(node, feedback_fn)
4713
4714           node_entry.append((constants.RS_NORMAL, result.payload))
4715
4716           if (self.op.command == constants.OOB_POWER_ON and
4717               idx < len(self.nodes) - 1):
4718             time.sleep(self.op.power_delay)
4719
4720     return ret
4721
4722   def _CheckPayload(self, result):
4723     """Checks if the payload is valid.
4724
4725     @param result: RPC result
4726     @raises errors.OpExecError: If payload is not valid
4727
4728     """
4729     errs = []
4730     if self.op.command == constants.OOB_HEALTH:
4731       if not isinstance(result.payload, list):
4732         errs.append("command 'health' is expected to return a list but got %s" %
4733                     type(result.payload))
4734       else:
4735         for item, status in result.payload:
4736           if status not in constants.OOB_STATUSES:
4737             errs.append("health item '%s' has invalid status '%s'" %
4738                         (item, status))
4739
4740     if self.op.command == constants.OOB_POWER_STATUS:
4741       if not isinstance(result.payload, dict):
4742         errs.append("power-status is expected to return a dict but got %s" %
4743                     type(result.payload))
4744
4745     if self.op.command in [
4746         constants.OOB_POWER_ON,
4747         constants.OOB_POWER_OFF,
4748         constants.OOB_POWER_CYCLE,
4749         ]:
4750       if result.payload is not None:
4751         errs.append("%s is expected to not return payload but got '%s'" %
4752                     (self.op.command, result.payload))
4753
4754     if errs:
4755       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4756                                utils.CommaJoin(errs))
4757
4758
4759 class _OsQuery(_QueryBase):
4760   FIELDS = query.OS_FIELDS
4761
4762   def ExpandNames(self, lu):
4763     # Lock all nodes in shared mode
4764     # Temporary removal of locks, should be reverted later
4765     # TODO: reintroduce locks when they are lighter-weight
4766     lu.needed_locks = {}
4767     #self.share_locks[locking.LEVEL_NODE] = 1
4768     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4769
4770     # The following variables interact with _QueryBase._GetNames
4771     if self.names:
4772       self.wanted = self.names
4773     else:
4774       self.wanted = locking.ALL_SET
4775
4776     self.do_locking = self.use_locking
4777
4778   def DeclareLocks(self, lu, level):
4779     pass
4780
4781   @staticmethod
4782   def _DiagnoseByOS(rlist):
4783     """Remaps a per-node return list into an a per-os per-node dictionary
4784
4785     @param rlist: a map with node names as keys and OS objects as values
4786
4787     @rtype: dict
4788     @return: a dictionary with osnames as keys and as value another
4789         map, with nodes as keys and tuples of (path, status, diagnose,
4790         variants, parameters, api_versions) as values, eg::
4791
4792           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4793                                      (/srv/..., False, "invalid api")],
4794                            "node2": [(/srv/..., True, "", [], [])]}
4795           }
4796
4797     """
4798     all_os = {}
4799     # we build here the list of nodes that didn't fail the RPC (at RPC
4800     # level), so that nodes with a non-responding node daemon don't
4801     # make all OSes invalid
4802     good_nodes = [node_name for node_name in rlist
4803                   if not rlist[node_name].fail_msg]
4804     for node_name, nr in rlist.items():
4805       if nr.fail_msg or not nr.payload:
4806         continue
4807       for (name, path, status, diagnose, variants,
4808            params, api_versions) in nr.payload:
4809         if name not in all_os:
4810           # build a list of nodes for this os containing empty lists
4811           # for each node in node_list
4812           all_os[name] = {}
4813           for nname in good_nodes:
4814             all_os[name][nname] = []
4815         # convert params from [name, help] to (name, help)
4816         params = [tuple(v) for v in params]
4817         all_os[name][node_name].append((path, status, diagnose,
4818                                         variants, params, api_versions))
4819     return all_os
4820
4821   def _GetQueryData(self, lu):
4822     """Computes the list of nodes and their attributes.
4823
4824     """
4825     # Locking is not used
4826     assert not (compat.any(lu.glm.is_owned(level)
4827                            for level in locking.LEVELS
4828                            if level != locking.LEVEL_CLUSTER) or
4829                 self.do_locking or self.use_locking)
4830
4831     valid_nodes = [node.name
4832                    for node in lu.cfg.GetAllNodesInfo().values()
4833                    if not node.offline and node.vm_capable]
4834     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4835     cluster = lu.cfg.GetClusterInfo()
4836
4837     data = {}
4838
4839     for (os_name, os_data) in pol.items():
4840       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4841                           hidden=(os_name in cluster.hidden_os),
4842                           blacklisted=(os_name in cluster.blacklisted_os))
4843
4844       variants = set()
4845       parameters = set()
4846       api_versions = set()
4847
4848       for idx, osl in enumerate(os_data.values()):
4849         info.valid = bool(info.valid and osl and osl[0][1])
4850         if not info.valid:
4851           break
4852
4853         (node_variants, node_params, node_api) = osl[0][3:6]
4854         if idx == 0:
4855           # First entry
4856           variants.update(node_variants)
4857           parameters.update(node_params)
4858           api_versions.update(node_api)
4859         else:
4860           # Filter out inconsistent values
4861           variants.intersection_update(node_variants)
4862           parameters.intersection_update(node_params)
4863           api_versions.intersection_update(node_api)
4864
4865       info.variants = list(variants)
4866       info.parameters = list(parameters)
4867       info.api_versions = list(api_versions)
4868
4869       data[os_name] = info
4870
4871     # Prepare data in requested order
4872     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4873             if name in data]
4874
4875
4876 class LUOsDiagnose(NoHooksLU):
4877   """Logical unit for OS diagnose/query.
4878
4879   """
4880   REQ_BGL = False
4881
4882   @staticmethod
4883   def _BuildFilter(fields, names):
4884     """Builds a filter for querying OSes.
4885
4886     """
4887     name_filter = qlang.MakeSimpleFilter("name", names)
4888
4889     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4890     # respective field is not requested
4891     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4892                      for fname in ["hidden", "blacklisted"]
4893                      if fname not in fields]
4894     if "valid" not in fields:
4895       status_filter.append([qlang.OP_TRUE, "valid"])
4896
4897     if status_filter:
4898       status_filter.insert(0, qlang.OP_AND)
4899     else:
4900       status_filter = None
4901
4902     if name_filter and status_filter:
4903       return [qlang.OP_AND, name_filter, status_filter]
4904     elif name_filter:
4905       return name_filter
4906     else:
4907       return status_filter
4908
4909   def CheckArguments(self):
4910     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4911                        self.op.output_fields, False)
4912
4913   def ExpandNames(self):
4914     self.oq.ExpandNames(self)
4915
4916   def Exec(self, feedback_fn):
4917     return self.oq.OldStyleQuery(self)
4918
4919
4920 class LUNodeRemove(LogicalUnit):
4921   """Logical unit for removing a node.
4922
4923   """
4924   HPATH = "node-remove"
4925   HTYPE = constants.HTYPE_NODE
4926
4927   def BuildHooksEnv(self):
4928     """Build hooks env.
4929
4930     """
4931     return {
4932       "OP_TARGET": self.op.node_name,
4933       "NODE_NAME": self.op.node_name,
4934       }
4935
4936   def BuildHooksNodes(self):
4937     """Build hooks nodes.
4938
4939     This doesn't run on the target node in the pre phase as a failed
4940     node would then be impossible to remove.
4941
4942     """
4943     all_nodes = self.cfg.GetNodeList()
4944     try:
4945       all_nodes.remove(self.op.node_name)
4946     except ValueError:
4947       pass
4948     return (all_nodes, all_nodes)
4949
4950   def CheckPrereq(self):
4951     """Check prerequisites.
4952
4953     This checks:
4954      - the node exists in the configuration
4955      - it does not have primary or secondary instances
4956      - it's not the master
4957
4958     Any errors are signaled by raising errors.OpPrereqError.
4959
4960     """
4961     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4962     node = self.cfg.GetNodeInfo(self.op.node_name)
4963     assert node is not None
4964
4965     masternode = self.cfg.GetMasterNode()
4966     if node.name == masternode:
4967       raise errors.OpPrereqError("Node is the master node, failover to another"
4968                                  " node is required", errors.ECODE_INVAL)
4969
4970     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4971       if node.name in instance.all_nodes:
4972         raise errors.OpPrereqError("Instance %s is still running on the node,"
4973                                    " please remove first" % instance_name,
4974                                    errors.ECODE_INVAL)
4975     self.op.node_name = node.name
4976     self.node = node
4977
4978   def Exec(self, feedback_fn):
4979     """Removes the node from the cluster.
4980
4981     """
4982     node = self.node
4983     logging.info("Stopping the node daemon and removing configs from node %s",
4984                  node.name)
4985
4986     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4987
4988     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4989       "Not owning BGL"
4990
4991     # Promote nodes to master candidate as needed
4992     _AdjustCandidatePool(self, exceptions=[node.name])
4993     self.context.RemoveNode(node.name)
4994
4995     # Run post hooks on the node before it's removed
4996     _RunPostHook(self, node.name)
4997
4998     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4999     msg = result.fail_msg
5000     if msg:
5001       self.LogWarning("Errors encountered on the remote node while leaving"
5002                       " the cluster: %s", msg)
5003
5004     # Remove node from our /etc/hosts
5005     if self.cfg.GetClusterInfo().modify_etc_hosts:
5006       master_node = self.cfg.GetMasterNode()
5007       result = self.rpc.call_etc_hosts_modify(master_node,
5008                                               constants.ETC_HOSTS_REMOVE,
5009                                               node.name, None)
5010       result.Raise("Can't update hosts file with new host data")
5011       _RedistributeAncillaryFiles(self)
5012
5013
5014 class _NodeQuery(_QueryBase):
5015   FIELDS = query.NODE_FIELDS
5016
5017   def ExpandNames(self, lu):
5018     lu.needed_locks = {}
5019     lu.share_locks = _ShareAll()
5020
5021     if self.names:
5022       self.wanted = _GetWantedNodes(lu, self.names)
5023     else:
5024       self.wanted = locking.ALL_SET
5025
5026     self.do_locking = (self.use_locking and
5027                        query.NQ_LIVE in self.requested_data)
5028
5029     if self.do_locking:
5030       # If any non-static field is requested we need to lock the nodes
5031       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5032
5033   def DeclareLocks(self, lu, level):
5034     pass
5035
5036   def _GetQueryData(self, lu):
5037     """Computes the list of nodes and their attributes.
5038
5039     """
5040     all_info = lu.cfg.GetAllNodesInfo()
5041
5042     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5043
5044     # Gather data as requested
5045     if query.NQ_LIVE in self.requested_data:
5046       # filter out non-vm_capable nodes
5047       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5048
5049       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5050                                         [lu.cfg.GetHypervisorType()])
5051       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5052                        for (name, nresult) in node_data.items()
5053                        if not nresult.fail_msg and nresult.payload)
5054     else:
5055       live_data = None
5056
5057     if query.NQ_INST in self.requested_data:
5058       node_to_primary = dict([(name, set()) for name in nodenames])
5059       node_to_secondary = dict([(name, set()) for name in nodenames])
5060
5061       inst_data = lu.cfg.GetAllInstancesInfo()
5062
5063       for inst in inst_data.values():
5064         if inst.primary_node in node_to_primary:
5065           node_to_primary[inst.primary_node].add(inst.name)
5066         for secnode in inst.secondary_nodes:
5067           if secnode in node_to_secondary:
5068             node_to_secondary[secnode].add(inst.name)
5069     else:
5070       node_to_primary = None
5071       node_to_secondary = None
5072
5073     if query.NQ_OOB in self.requested_data:
5074       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5075                          for name, node in all_info.iteritems())
5076     else:
5077       oob_support = None
5078
5079     if query.NQ_GROUP in self.requested_data:
5080       groups = lu.cfg.GetAllNodeGroupsInfo()
5081     else:
5082       groups = {}
5083
5084     return query.NodeQueryData([all_info[name] for name in nodenames],
5085                                live_data, lu.cfg.GetMasterNode(),
5086                                node_to_primary, node_to_secondary, groups,
5087                                oob_support, lu.cfg.GetClusterInfo())
5088
5089
5090 class LUNodeQuery(NoHooksLU):
5091   """Logical unit for querying nodes.
5092
5093   """
5094   # pylint: disable=W0142
5095   REQ_BGL = False
5096
5097   def CheckArguments(self):
5098     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5099                          self.op.output_fields, self.op.use_locking)
5100
5101   def ExpandNames(self):
5102     self.nq.ExpandNames(self)
5103
5104   def DeclareLocks(self, level):
5105     self.nq.DeclareLocks(self, level)
5106
5107   def Exec(self, feedback_fn):
5108     return self.nq.OldStyleQuery(self)
5109
5110
5111 class LUNodeQueryvols(NoHooksLU):
5112   """Logical unit for getting volumes on node(s).
5113
5114   """
5115   REQ_BGL = False
5116   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5117   _FIELDS_STATIC = utils.FieldSet("node")
5118
5119   def CheckArguments(self):
5120     _CheckOutputFields(static=self._FIELDS_STATIC,
5121                        dynamic=self._FIELDS_DYNAMIC,
5122                        selected=self.op.output_fields)
5123
5124   def ExpandNames(self):
5125     self.share_locks = _ShareAll()
5126     self.needed_locks = {}
5127
5128     if not self.op.nodes:
5129       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5130     else:
5131       self.needed_locks[locking.LEVEL_NODE] = \
5132         _GetWantedNodes(self, self.op.nodes)
5133
5134   def Exec(self, feedback_fn):
5135     """Computes the list of nodes and their attributes.
5136
5137     """
5138     nodenames = self.owned_locks(locking.LEVEL_NODE)
5139     volumes = self.rpc.call_node_volumes(nodenames)
5140
5141     ilist = self.cfg.GetAllInstancesInfo()
5142     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5143
5144     output = []
5145     for node in nodenames:
5146       nresult = volumes[node]
5147       if nresult.offline:
5148         continue
5149       msg = nresult.fail_msg
5150       if msg:
5151         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5152         continue
5153
5154       node_vols = sorted(nresult.payload,
5155                          key=operator.itemgetter("dev"))
5156
5157       for vol in node_vols:
5158         node_output = []
5159         for field in self.op.output_fields:
5160           if field == "node":
5161             val = node
5162           elif field == "phys":
5163             val = vol["dev"]
5164           elif field == "vg":
5165             val = vol["vg"]
5166           elif field == "name":
5167             val = vol["name"]
5168           elif field == "size":
5169             val = int(float(vol["size"]))
5170           elif field == "instance":
5171             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5172           else:
5173             raise errors.ParameterError(field)
5174           node_output.append(str(val))
5175
5176         output.append(node_output)
5177
5178     return output
5179
5180
5181 class LUNodeQueryStorage(NoHooksLU):
5182   """Logical unit for getting information on storage units on node(s).
5183
5184   """
5185   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5186   REQ_BGL = False
5187
5188   def CheckArguments(self):
5189     _CheckOutputFields(static=self._FIELDS_STATIC,
5190                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5191                        selected=self.op.output_fields)
5192
5193   def ExpandNames(self):
5194     self.share_locks = _ShareAll()
5195     self.needed_locks = {}
5196
5197     if self.op.nodes:
5198       self.needed_locks[locking.LEVEL_NODE] = \
5199         _GetWantedNodes(self, self.op.nodes)
5200     else:
5201       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5202
5203   def Exec(self, feedback_fn):
5204     """Computes the list of nodes and their attributes.
5205
5206     """
5207     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5208
5209     # Always get name to sort by
5210     if constants.SF_NAME in self.op.output_fields:
5211       fields = self.op.output_fields[:]
5212     else:
5213       fields = [constants.SF_NAME] + self.op.output_fields
5214
5215     # Never ask for node or type as it's only known to the LU
5216     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5217       while extra in fields:
5218         fields.remove(extra)
5219
5220     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5221     name_idx = field_idx[constants.SF_NAME]
5222
5223     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5224     data = self.rpc.call_storage_list(self.nodes,
5225                                       self.op.storage_type, st_args,
5226                                       self.op.name, fields)
5227
5228     result = []
5229
5230     for node in utils.NiceSort(self.nodes):
5231       nresult = data[node]
5232       if nresult.offline:
5233         continue
5234
5235       msg = nresult.fail_msg
5236       if msg:
5237         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5238         continue
5239
5240       rows = dict([(row[name_idx], row) for row in nresult.payload])
5241
5242       for name in utils.NiceSort(rows.keys()):
5243         row = rows[name]
5244
5245         out = []
5246
5247         for field in self.op.output_fields:
5248           if field == constants.SF_NODE:
5249             val = node
5250           elif field == constants.SF_TYPE:
5251             val = self.op.storage_type
5252           elif field in field_idx:
5253             val = row[field_idx[field]]
5254           else:
5255             raise errors.ParameterError(field)
5256
5257           out.append(val)
5258
5259         result.append(out)
5260
5261     return result
5262
5263
5264 class _InstanceQuery(_QueryBase):
5265   FIELDS = query.INSTANCE_FIELDS
5266
5267   def ExpandNames(self, lu):
5268     lu.needed_locks = {}
5269     lu.share_locks = _ShareAll()
5270
5271     if self.names:
5272       self.wanted = _GetWantedInstances(lu, self.names)
5273     else:
5274       self.wanted = locking.ALL_SET
5275
5276     self.do_locking = (self.use_locking and
5277                        query.IQ_LIVE in self.requested_data)
5278     if self.do_locking:
5279       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5280       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5281       lu.needed_locks[locking.LEVEL_NODE] = []
5282       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5283
5284     self.do_grouplocks = (self.do_locking and
5285                           query.IQ_NODES in self.requested_data)
5286
5287   def DeclareLocks(self, lu, level):
5288     if self.do_locking:
5289       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5290         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5291
5292         # Lock all groups used by instances optimistically; this requires going
5293         # via the node before it's locked, requiring verification later on
5294         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5295           set(group_uuid
5296               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5297               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5298       elif level == locking.LEVEL_NODE:
5299         lu._LockInstancesNodes() # pylint: disable=W0212
5300
5301   @staticmethod
5302   def _CheckGroupLocks(lu):
5303     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5304     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5305
5306     # Check if node groups for locked instances are still correct
5307     for instance_name in owned_instances:
5308       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5309
5310   def _GetQueryData(self, lu):
5311     """Computes the list of instances and their attributes.
5312
5313     """
5314     if self.do_grouplocks:
5315       self._CheckGroupLocks(lu)
5316
5317     cluster = lu.cfg.GetClusterInfo()
5318     all_info = lu.cfg.GetAllInstancesInfo()
5319
5320     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5321
5322     instance_list = [all_info[name] for name in instance_names]
5323     nodes = frozenset(itertools.chain(*(inst.all_nodes
5324                                         for inst in instance_list)))
5325     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5326     bad_nodes = []
5327     offline_nodes = []
5328     wrongnode_inst = set()
5329
5330     # Gather data as requested
5331     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5332       live_data = {}
5333       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5334       for name in nodes:
5335         result = node_data[name]
5336         if result.offline:
5337           # offline nodes will be in both lists
5338           assert result.fail_msg
5339           offline_nodes.append(name)
5340         if result.fail_msg:
5341           bad_nodes.append(name)
5342         elif result.payload:
5343           for inst in result.payload:
5344             if inst in all_info:
5345               if all_info[inst].primary_node == name:
5346                 live_data.update(result.payload)
5347               else:
5348                 wrongnode_inst.add(inst)
5349             else:
5350               # orphan instance; we don't list it here as we don't
5351               # handle this case yet in the output of instance listing
5352               logging.warning("Orphan instance '%s' found on node %s",
5353                               inst, name)
5354         # else no instance is alive
5355     else:
5356       live_data = {}
5357
5358     if query.IQ_DISKUSAGE in self.requested_data:
5359       disk_usage = dict((inst.name,
5360                          _ComputeDiskSize(inst.disk_template,
5361                                           [{constants.IDISK_SIZE: disk.size}
5362                                            for disk in inst.disks]))
5363                         for inst in instance_list)
5364     else:
5365       disk_usage = None
5366
5367     if query.IQ_CONSOLE in self.requested_data:
5368       consinfo = {}
5369       for inst in instance_list:
5370         if inst.name in live_data:
5371           # Instance is running
5372           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5373         else:
5374           consinfo[inst.name] = None
5375       assert set(consinfo.keys()) == set(instance_names)
5376     else:
5377       consinfo = None
5378
5379     if query.IQ_NODES in self.requested_data:
5380       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5381                                             instance_list)))
5382       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5383       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5384                     for uuid in set(map(operator.attrgetter("group"),
5385                                         nodes.values())))
5386     else:
5387       nodes = None
5388       groups = None
5389
5390     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5391                                    disk_usage, offline_nodes, bad_nodes,
5392                                    live_data, wrongnode_inst, consinfo,
5393                                    nodes, groups)
5394
5395
5396 class LUQuery(NoHooksLU):
5397   """Query for resources/items of a certain kind.
5398
5399   """
5400   # pylint: disable=W0142
5401   REQ_BGL = False
5402
5403   def CheckArguments(self):
5404     qcls = _GetQueryImplementation(self.op.what)
5405
5406     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5407
5408   def ExpandNames(self):
5409     self.impl.ExpandNames(self)
5410
5411   def DeclareLocks(self, level):
5412     self.impl.DeclareLocks(self, level)
5413
5414   def Exec(self, feedback_fn):
5415     return self.impl.NewStyleQuery(self)
5416
5417
5418 class LUQueryFields(NoHooksLU):
5419   """Query for resources/items of a certain kind.
5420
5421   """
5422   # pylint: disable=W0142
5423   REQ_BGL = False
5424
5425   def CheckArguments(self):
5426     self.qcls = _GetQueryImplementation(self.op.what)
5427
5428   def ExpandNames(self):
5429     self.needed_locks = {}
5430
5431   def Exec(self, feedback_fn):
5432     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5433
5434
5435 class LUNodeModifyStorage(NoHooksLU):
5436   """Logical unit for modifying a storage volume on a node.
5437
5438   """
5439   REQ_BGL = False
5440
5441   def CheckArguments(self):
5442     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5443
5444     storage_type = self.op.storage_type
5445
5446     try:
5447       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5448     except KeyError:
5449       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5450                                  " modified" % storage_type,
5451                                  errors.ECODE_INVAL)
5452
5453     diff = set(self.op.changes.keys()) - modifiable
5454     if diff:
5455       raise errors.OpPrereqError("The following fields can not be modified for"
5456                                  " storage units of type '%s': %r" %
5457                                  (storage_type, list(diff)),
5458                                  errors.ECODE_INVAL)
5459
5460   def ExpandNames(self):
5461     self.needed_locks = {
5462       locking.LEVEL_NODE: self.op.node_name,
5463       }
5464
5465   def Exec(self, feedback_fn):
5466     """Computes the list of nodes and their attributes.
5467
5468     """
5469     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5470     result = self.rpc.call_storage_modify(self.op.node_name,
5471                                           self.op.storage_type, st_args,
5472                                           self.op.name, self.op.changes)
5473     result.Raise("Failed to modify storage unit '%s' on %s" %
5474                  (self.op.name, self.op.node_name))
5475
5476
5477 class LUNodeAdd(LogicalUnit):
5478   """Logical unit for adding node to the cluster.
5479
5480   """
5481   HPATH = "node-add"
5482   HTYPE = constants.HTYPE_NODE
5483   _NFLAGS = ["master_capable", "vm_capable"]
5484
5485   def CheckArguments(self):
5486     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5487     # validate/normalize the node name
5488     self.hostname = netutils.GetHostname(name=self.op.node_name,
5489                                          family=self.primary_ip_family)
5490     self.op.node_name = self.hostname.name
5491
5492     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5493       raise errors.OpPrereqError("Cannot readd the master node",
5494                                  errors.ECODE_STATE)
5495
5496     if self.op.readd and self.op.group:
5497       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5498                                  " being readded", errors.ECODE_INVAL)
5499
5500   def BuildHooksEnv(self):
5501     """Build hooks env.
5502
5503     This will run on all nodes before, and on all nodes + the new node after.
5504
5505     """
5506     return {
5507       "OP_TARGET": self.op.node_name,
5508       "NODE_NAME": self.op.node_name,
5509       "NODE_PIP": self.op.primary_ip,
5510       "NODE_SIP": self.op.secondary_ip,
5511       "MASTER_CAPABLE": str(self.op.master_capable),
5512       "VM_CAPABLE": str(self.op.vm_capable),
5513       }
5514
5515   def BuildHooksNodes(self):
5516     """Build hooks nodes.
5517
5518     """
5519     # Exclude added node
5520     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5521     post_nodes = pre_nodes + [self.op.node_name, ]
5522
5523     return (pre_nodes, post_nodes)
5524
5525   def CheckPrereq(self):
5526     """Check prerequisites.
5527
5528     This checks:
5529      - the new node is not already in the config
5530      - it is resolvable
5531      - its parameters (single/dual homed) matches the cluster
5532
5533     Any errors are signaled by raising errors.OpPrereqError.
5534
5535     """
5536     cfg = self.cfg
5537     hostname = self.hostname
5538     node = hostname.name
5539     primary_ip = self.op.primary_ip = hostname.ip
5540     if self.op.secondary_ip is None:
5541       if self.primary_ip_family == netutils.IP6Address.family:
5542         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5543                                    " IPv4 address must be given as secondary",
5544                                    errors.ECODE_INVAL)
5545       self.op.secondary_ip = primary_ip
5546
5547     secondary_ip = self.op.secondary_ip
5548     if not netutils.IP4Address.IsValid(secondary_ip):
5549       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5550                                  " address" % secondary_ip, errors.ECODE_INVAL)
5551
5552     node_list = cfg.GetNodeList()
5553     if not self.op.readd and node in node_list:
5554       raise errors.OpPrereqError("Node %s is already in the configuration" %
5555                                  node, errors.ECODE_EXISTS)
5556     elif self.op.readd and node not in node_list:
5557       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5558                                  errors.ECODE_NOENT)
5559
5560     self.changed_primary_ip = False
5561
5562     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5563       if self.op.readd and node == existing_node_name:
5564         if existing_node.secondary_ip != secondary_ip:
5565           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5566                                      " address configuration as before",
5567                                      errors.ECODE_INVAL)
5568         if existing_node.primary_ip != primary_ip:
5569           self.changed_primary_ip = True
5570
5571         continue
5572
5573       if (existing_node.primary_ip == primary_ip or
5574           existing_node.secondary_ip == primary_ip or
5575           existing_node.primary_ip == secondary_ip or
5576           existing_node.secondary_ip == secondary_ip):
5577         raise errors.OpPrereqError("New node ip address(es) conflict with"
5578                                    " existing node %s" % existing_node.name,
5579                                    errors.ECODE_NOTUNIQUE)
5580
5581     # After this 'if' block, None is no longer a valid value for the
5582     # _capable op attributes
5583     if self.op.readd:
5584       old_node = self.cfg.GetNodeInfo(node)
5585       assert old_node is not None, "Can't retrieve locked node %s" % node
5586       for attr in self._NFLAGS:
5587         if getattr(self.op, attr) is None:
5588           setattr(self.op, attr, getattr(old_node, attr))
5589     else:
5590       for attr in self._NFLAGS:
5591         if getattr(self.op, attr) is None:
5592           setattr(self.op, attr, True)
5593
5594     if self.op.readd and not self.op.vm_capable:
5595       pri, sec = cfg.GetNodeInstances(node)
5596       if pri or sec:
5597         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5598                                    " flag set to false, but it already holds"
5599                                    " instances" % node,
5600                                    errors.ECODE_STATE)
5601
5602     # check that the type of the node (single versus dual homed) is the
5603     # same as for the master
5604     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5605     master_singlehomed = myself.secondary_ip == myself.primary_ip
5606     newbie_singlehomed = secondary_ip == primary_ip
5607     if master_singlehomed != newbie_singlehomed:
5608       if master_singlehomed:
5609         raise errors.OpPrereqError("The master has no secondary ip but the"
5610                                    " new node has one",
5611                                    errors.ECODE_INVAL)
5612       else:
5613         raise errors.OpPrereqError("The master has a secondary ip but the"
5614                                    " new node doesn't have one",
5615                                    errors.ECODE_INVAL)
5616
5617     # checks reachability
5618     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5619       raise errors.OpPrereqError("Node not reachable by ping",
5620                                  errors.ECODE_ENVIRON)
5621
5622     if not newbie_singlehomed:
5623       # check reachability from my secondary ip to newbie's secondary ip
5624       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5625                            source=myself.secondary_ip):
5626         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5627                                    " based ping to node daemon port",
5628                                    errors.ECODE_ENVIRON)
5629
5630     if self.op.readd:
5631       exceptions = [node]
5632     else:
5633       exceptions = []
5634
5635     if self.op.master_capable:
5636       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5637     else:
5638       self.master_candidate = False
5639
5640     if self.op.readd:
5641       self.new_node = old_node
5642     else:
5643       node_group = cfg.LookupNodeGroup(self.op.group)
5644       self.new_node = objects.Node(name=node,
5645                                    primary_ip=primary_ip,
5646                                    secondary_ip=secondary_ip,
5647                                    master_candidate=self.master_candidate,
5648                                    offline=False, drained=False,
5649                                    group=node_group)
5650
5651     if self.op.ndparams:
5652       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5653
5654     if self.op.hv_state:
5655       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5656
5657     if self.op.disk_state:
5658       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5659
5660     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5661     #       it a property on the base class.
5662     result = rpc.DnsOnlyRunner().call_version([node])[node]
5663     result.Raise("Can't get version information from node %s" % node)
5664     if constants.PROTOCOL_VERSION == result.payload:
5665       logging.info("Communication to node %s fine, sw version %s match",
5666                    node, result.payload)
5667     else:
5668       raise errors.OpPrereqError("Version mismatch master version %s,"
5669                                  " node version %s" %
5670                                  (constants.PROTOCOL_VERSION, result.payload),
5671                                  errors.ECODE_ENVIRON)
5672
5673   def Exec(self, feedback_fn):
5674     """Adds the new node to the cluster.
5675
5676     """
5677     new_node = self.new_node
5678     node = new_node.name
5679
5680     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5681       "Not owning BGL"
5682
5683     # We adding a new node so we assume it's powered
5684     new_node.powered = True
5685
5686     # for re-adds, reset the offline/drained/master-candidate flags;
5687     # we need to reset here, otherwise offline would prevent RPC calls
5688     # later in the procedure; this also means that if the re-add
5689     # fails, we are left with a non-offlined, broken node
5690     if self.op.readd:
5691       new_node.drained = new_node.offline = False # pylint: disable=W0201
5692       self.LogInfo("Readding a node, the offline/drained flags were reset")
5693       # if we demote the node, we do cleanup later in the procedure
5694       new_node.master_candidate = self.master_candidate
5695       if self.changed_primary_ip:
5696         new_node.primary_ip = self.op.primary_ip
5697
5698     # copy the master/vm_capable flags
5699     for attr in self._NFLAGS:
5700       setattr(new_node, attr, getattr(self.op, attr))
5701
5702     # notify the user about any possible mc promotion
5703     if new_node.master_candidate:
5704       self.LogInfo("Node will be a master candidate")
5705
5706     if self.op.ndparams:
5707       new_node.ndparams = self.op.ndparams
5708     else:
5709       new_node.ndparams = {}
5710
5711     if self.op.hv_state:
5712       new_node.hv_state_static = self.new_hv_state
5713
5714     if self.op.disk_state:
5715       new_node.disk_state_static = self.new_disk_state
5716
5717     # Add node to our /etc/hosts, and add key to known_hosts
5718     if self.cfg.GetClusterInfo().modify_etc_hosts:
5719       master_node = self.cfg.GetMasterNode()
5720       result = self.rpc.call_etc_hosts_modify(master_node,
5721                                               constants.ETC_HOSTS_ADD,
5722                                               self.hostname.name,
5723                                               self.hostname.ip)
5724       result.Raise("Can't update hosts file with new host data")
5725
5726     if new_node.secondary_ip != new_node.primary_ip:
5727       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5728                                False)
5729
5730     node_verify_list = [self.cfg.GetMasterNode()]
5731     node_verify_param = {
5732       constants.NV_NODELIST: ([node], {}),
5733       # TODO: do a node-net-test as well?
5734     }
5735
5736     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5737                                        self.cfg.GetClusterName())
5738     for verifier in node_verify_list:
5739       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5740       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5741       if nl_payload:
5742         for failed in nl_payload:
5743           feedback_fn("ssh/hostname verification failed"
5744                       " (checking from %s): %s" %
5745                       (verifier, nl_payload[failed]))
5746         raise errors.OpExecError("ssh/hostname verification failed")
5747
5748     if self.op.readd:
5749       _RedistributeAncillaryFiles(self)
5750       self.context.ReaddNode(new_node)
5751       # make sure we redistribute the config
5752       self.cfg.Update(new_node, feedback_fn)
5753       # and make sure the new node will not have old files around
5754       if not new_node.master_candidate:
5755         result = self.rpc.call_node_demote_from_mc(new_node.name)
5756         msg = result.fail_msg
5757         if msg:
5758           self.LogWarning("Node failed to demote itself from master"
5759                           " candidate status: %s" % msg)
5760     else:
5761       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5762                                   additional_vm=self.op.vm_capable)
5763       self.context.AddNode(new_node, self.proc.GetECId())
5764
5765
5766 class LUNodeSetParams(LogicalUnit):
5767   """Modifies the parameters of a node.
5768
5769   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5770       to the node role (as _ROLE_*)
5771   @cvar _R2F: a dictionary from node role to tuples of flags
5772   @cvar _FLAGS: a list of attribute names corresponding to the flags
5773
5774   """
5775   HPATH = "node-modify"
5776   HTYPE = constants.HTYPE_NODE
5777   REQ_BGL = False
5778   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5779   _F2R = {
5780     (True, False, False): _ROLE_CANDIDATE,
5781     (False, True, False): _ROLE_DRAINED,
5782     (False, False, True): _ROLE_OFFLINE,
5783     (False, False, False): _ROLE_REGULAR,
5784     }
5785   _R2F = dict((v, k) for k, v in _F2R.items())
5786   _FLAGS = ["master_candidate", "drained", "offline"]
5787
5788   def CheckArguments(self):
5789     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5790     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5791                 self.op.master_capable, self.op.vm_capable,
5792                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5793                 self.op.disk_state]
5794     if all_mods.count(None) == len(all_mods):
5795       raise errors.OpPrereqError("Please pass at least one modification",
5796                                  errors.ECODE_INVAL)
5797     if all_mods.count(True) > 1:
5798       raise errors.OpPrereqError("Can't set the node into more than one"
5799                                  " state at the same time",
5800                                  errors.ECODE_INVAL)
5801
5802     # Boolean value that tells us whether we might be demoting from MC
5803     self.might_demote = (self.op.master_candidate == False or
5804                          self.op.offline == True or
5805                          self.op.drained == True or
5806                          self.op.master_capable == False)
5807
5808     if self.op.secondary_ip:
5809       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5810         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5811                                    " address" % self.op.secondary_ip,
5812                                    errors.ECODE_INVAL)
5813
5814     self.lock_all = self.op.auto_promote and self.might_demote
5815     self.lock_instances = self.op.secondary_ip is not None
5816
5817   def _InstanceFilter(self, instance):
5818     """Filter for getting affected instances.
5819
5820     """
5821     return (instance.disk_template in constants.DTS_INT_MIRROR and
5822             self.op.node_name in instance.all_nodes)
5823
5824   def ExpandNames(self):
5825     if self.lock_all:
5826       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5827     else:
5828       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5829
5830     # Since modifying a node can have severe effects on currently running
5831     # operations the resource lock is at least acquired in shared mode
5832     self.needed_locks[locking.LEVEL_NODE_RES] = \
5833       self.needed_locks[locking.LEVEL_NODE]
5834
5835     # Get node resource and instance locks in shared mode; they are not used
5836     # for anything but read-only access
5837     self.share_locks[locking.LEVEL_NODE_RES] = 1
5838     self.share_locks[locking.LEVEL_INSTANCE] = 1
5839
5840     if self.lock_instances:
5841       self.needed_locks[locking.LEVEL_INSTANCE] = \
5842         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5843
5844   def BuildHooksEnv(self):
5845     """Build hooks env.
5846
5847     This runs on the master node.
5848
5849     """
5850     return {
5851       "OP_TARGET": self.op.node_name,
5852       "MASTER_CANDIDATE": str(self.op.master_candidate),
5853       "OFFLINE": str(self.op.offline),
5854       "DRAINED": str(self.op.drained),
5855       "MASTER_CAPABLE": str(self.op.master_capable),
5856       "VM_CAPABLE": str(self.op.vm_capable),
5857       }
5858
5859   def BuildHooksNodes(self):
5860     """Build hooks nodes.
5861
5862     """
5863     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5864     return (nl, nl)
5865
5866   def CheckPrereq(self):
5867     """Check prerequisites.
5868
5869     This only checks the instance list against the existing names.
5870
5871     """
5872     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5873
5874     if self.lock_instances:
5875       affected_instances = \
5876         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5877
5878       # Verify instance locks
5879       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5880       wanted_instances = frozenset(affected_instances.keys())
5881       if wanted_instances - owned_instances:
5882         raise errors.OpPrereqError("Instances affected by changing node %s's"
5883                                    " secondary IP address have changed since"
5884                                    " locks were acquired, wanted '%s', have"
5885                                    " '%s'; retry the operation" %
5886                                    (self.op.node_name,
5887                                     utils.CommaJoin(wanted_instances),
5888                                     utils.CommaJoin(owned_instances)),
5889                                    errors.ECODE_STATE)
5890     else:
5891       affected_instances = None
5892
5893     if (self.op.master_candidate is not None or
5894         self.op.drained is not None or
5895         self.op.offline is not None):
5896       # we can't change the master's node flags
5897       if self.op.node_name == self.cfg.GetMasterNode():
5898         raise errors.OpPrereqError("The master role can be changed"
5899                                    " only via master-failover",
5900                                    errors.ECODE_INVAL)
5901
5902     if self.op.master_candidate and not node.master_capable:
5903       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5904                                  " it a master candidate" % node.name,
5905                                  errors.ECODE_STATE)
5906
5907     if self.op.vm_capable == False:
5908       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5909       if ipri or isec:
5910         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5911                                    " the vm_capable flag" % node.name,
5912                                    errors.ECODE_STATE)
5913
5914     if node.master_candidate and self.might_demote and not self.lock_all:
5915       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5916       # check if after removing the current node, we're missing master
5917       # candidates
5918       (mc_remaining, mc_should, _) = \
5919           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5920       if mc_remaining < mc_should:
5921         raise errors.OpPrereqError("Not enough master candidates, please"
5922                                    " pass auto promote option to allow"
5923                                    " promotion", errors.ECODE_STATE)
5924
5925     self.old_flags = old_flags = (node.master_candidate,
5926                                   node.drained, node.offline)
5927     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5928     self.old_role = old_role = self._F2R[old_flags]
5929
5930     # Check for ineffective changes
5931     for attr in self._FLAGS:
5932       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5933         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5934         setattr(self.op, attr, None)
5935
5936     # Past this point, any flag change to False means a transition
5937     # away from the respective state, as only real changes are kept
5938
5939     # TODO: We might query the real power state if it supports OOB
5940     if _SupportsOob(self.cfg, node):
5941       if self.op.offline is False and not (node.powered or
5942                                            self.op.powered == True):
5943         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5944                                     " offline status can be reset") %
5945                                    self.op.node_name)
5946     elif self.op.powered is not None:
5947       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5948                                   " as it does not support out-of-band"
5949                                   " handling") % self.op.node_name)
5950
5951     # If we're being deofflined/drained, we'll MC ourself if needed
5952     if (self.op.drained == False or self.op.offline == False or
5953         (self.op.master_capable and not node.master_capable)):
5954       if _DecideSelfPromotion(self):
5955         self.op.master_candidate = True
5956         self.LogInfo("Auto-promoting node to master candidate")
5957
5958     # If we're no longer master capable, we'll demote ourselves from MC
5959     if self.op.master_capable == False and node.master_candidate:
5960       self.LogInfo("Demoting from master candidate")
5961       self.op.master_candidate = False
5962
5963     # Compute new role
5964     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5965     if self.op.master_candidate:
5966       new_role = self._ROLE_CANDIDATE
5967     elif self.op.drained:
5968       new_role = self._ROLE_DRAINED
5969     elif self.op.offline:
5970       new_role = self._ROLE_OFFLINE
5971     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5972       # False is still in new flags, which means we're un-setting (the
5973       # only) True flag
5974       new_role = self._ROLE_REGULAR
5975     else: # no new flags, nothing, keep old role
5976       new_role = old_role
5977
5978     self.new_role = new_role
5979
5980     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5981       # Trying to transition out of offline status
5982       result = self.rpc.call_version([node.name])[node.name]
5983       if result.fail_msg:
5984         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5985                                    " to report its version: %s" %
5986                                    (node.name, result.fail_msg),
5987                                    errors.ECODE_STATE)
5988       else:
5989         self.LogWarning("Transitioning node from offline to online state"
5990                         " without using re-add. Please make sure the node"
5991                         " is healthy!")
5992
5993     if self.op.secondary_ip:
5994       # Ok even without locking, because this can't be changed by any LU
5995       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5996       master_singlehomed = master.secondary_ip == master.primary_ip
5997       if master_singlehomed and self.op.secondary_ip:
5998         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5999                                    " homed cluster", errors.ECODE_INVAL)
6000
6001       assert not (frozenset(affected_instances) -
6002                   self.owned_locks(locking.LEVEL_INSTANCE))
6003
6004       if node.offline:
6005         if affected_instances:
6006           raise errors.OpPrereqError("Cannot change secondary IP address:"
6007                                      " offline node has instances (%s)"
6008                                      " configured to use it" %
6009                                      utils.CommaJoin(affected_instances.keys()))
6010       else:
6011         # On online nodes, check that no instances are running, and that
6012         # the node has the new ip and we can reach it.
6013         for instance in affected_instances.values():
6014           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6015                               msg="cannot change secondary ip")
6016
6017         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6018         if master.name != node.name:
6019           # check reachability from master secondary ip to new secondary ip
6020           if not netutils.TcpPing(self.op.secondary_ip,
6021                                   constants.DEFAULT_NODED_PORT,
6022                                   source=master.secondary_ip):
6023             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6024                                        " based ping to node daemon port",
6025                                        errors.ECODE_ENVIRON)
6026
6027     if self.op.ndparams:
6028       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6029       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6030       self.new_ndparams = new_ndparams
6031
6032     if self.op.hv_state:
6033       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6034                                                  self.node.hv_state_static)
6035
6036     if self.op.disk_state:
6037       self.new_disk_state = \
6038         _MergeAndVerifyDiskState(self.op.disk_state,
6039                                  self.node.disk_state_static)
6040
6041   def Exec(self, feedback_fn):
6042     """Modifies a node.
6043
6044     """
6045     node = self.node
6046     old_role = self.old_role
6047     new_role = self.new_role
6048
6049     result = []
6050
6051     if self.op.ndparams:
6052       node.ndparams = self.new_ndparams
6053
6054     if self.op.powered is not None:
6055       node.powered = self.op.powered
6056
6057     if self.op.hv_state:
6058       node.hv_state_static = self.new_hv_state
6059
6060     if self.op.disk_state:
6061       node.disk_state_static = self.new_disk_state
6062
6063     for attr in ["master_capable", "vm_capable"]:
6064       val = getattr(self.op, attr)
6065       if val is not None:
6066         setattr(node, attr, val)
6067         result.append((attr, str(val)))
6068
6069     if new_role != old_role:
6070       # Tell the node to demote itself, if no longer MC and not offline
6071       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6072         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6073         if msg:
6074           self.LogWarning("Node failed to demote itself: %s", msg)
6075
6076       new_flags = self._R2F[new_role]
6077       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6078         if of != nf:
6079           result.append((desc, str(nf)))
6080       (node.master_candidate, node.drained, node.offline) = new_flags
6081
6082       # we locked all nodes, we adjust the CP before updating this node
6083       if self.lock_all:
6084         _AdjustCandidatePool(self, [node.name])
6085
6086     if self.op.secondary_ip:
6087       node.secondary_ip = self.op.secondary_ip
6088       result.append(("secondary_ip", self.op.secondary_ip))
6089
6090     # this will trigger configuration file update, if needed
6091     self.cfg.Update(node, feedback_fn)
6092
6093     # this will trigger job queue propagation or cleanup if the mc
6094     # flag changed
6095     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6096       self.context.ReaddNode(node)
6097
6098     return result
6099
6100
6101 class LUNodePowercycle(NoHooksLU):
6102   """Powercycles a node.
6103
6104   """
6105   REQ_BGL = False
6106
6107   def CheckArguments(self):
6108     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6109     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6110       raise errors.OpPrereqError("The node is the master and the force"
6111                                  " parameter was not set",
6112                                  errors.ECODE_INVAL)
6113
6114   def ExpandNames(self):
6115     """Locking for PowercycleNode.
6116
6117     This is a last-resort option and shouldn't block on other
6118     jobs. Therefore, we grab no locks.
6119
6120     """
6121     self.needed_locks = {}
6122
6123   def Exec(self, feedback_fn):
6124     """Reboots a node.
6125
6126     """
6127     result = self.rpc.call_node_powercycle(self.op.node_name,
6128                                            self.cfg.GetHypervisorType())
6129     result.Raise("Failed to schedule the reboot")
6130     return result.payload
6131
6132
6133 class LUClusterQuery(NoHooksLU):
6134   """Query cluster configuration.
6135
6136   """
6137   REQ_BGL = False
6138
6139   def ExpandNames(self):
6140     self.needed_locks = {}
6141
6142   def Exec(self, feedback_fn):
6143     """Return cluster config.
6144
6145     """
6146     cluster = self.cfg.GetClusterInfo()
6147     os_hvp = {}
6148
6149     # Filter just for enabled hypervisors
6150     for os_name, hv_dict in cluster.os_hvp.items():
6151       os_hvp[os_name] = {}
6152       for hv_name, hv_params in hv_dict.items():
6153         if hv_name in cluster.enabled_hypervisors:
6154           os_hvp[os_name][hv_name] = hv_params
6155
6156     # Convert ip_family to ip_version
6157     primary_ip_version = constants.IP4_VERSION
6158     if cluster.primary_ip_family == netutils.IP6Address.family:
6159       primary_ip_version = constants.IP6_VERSION
6160
6161     result = {
6162       "software_version": constants.RELEASE_VERSION,
6163       "protocol_version": constants.PROTOCOL_VERSION,
6164       "config_version": constants.CONFIG_VERSION,
6165       "os_api_version": max(constants.OS_API_VERSIONS),
6166       "export_version": constants.EXPORT_VERSION,
6167       "architecture": runtime.GetArchInfo(),
6168       "name": cluster.cluster_name,
6169       "master": cluster.master_node,
6170       "default_hypervisor": cluster.primary_hypervisor,
6171       "enabled_hypervisors": cluster.enabled_hypervisors,
6172       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6173                         for hypervisor_name in cluster.enabled_hypervisors]),
6174       "os_hvp": os_hvp,
6175       "beparams": cluster.beparams,
6176       "osparams": cluster.osparams,
6177       "ipolicy": cluster.ipolicy,
6178       "nicparams": cluster.nicparams,
6179       "ndparams": cluster.ndparams,
6180       "diskparams": cluster.diskparams,
6181       "candidate_pool_size": cluster.candidate_pool_size,
6182       "master_netdev": cluster.master_netdev,
6183       "master_netmask": cluster.master_netmask,
6184       "use_external_mip_script": cluster.use_external_mip_script,
6185       "volume_group_name": cluster.volume_group_name,
6186       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6187       "file_storage_dir": cluster.file_storage_dir,
6188       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6189       "maintain_node_health": cluster.maintain_node_health,
6190       "ctime": cluster.ctime,
6191       "mtime": cluster.mtime,
6192       "uuid": cluster.uuid,
6193       "tags": list(cluster.GetTags()),
6194       "uid_pool": cluster.uid_pool,
6195       "default_iallocator": cluster.default_iallocator,
6196       "reserved_lvs": cluster.reserved_lvs,
6197       "primary_ip_version": primary_ip_version,
6198       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6199       "hidden_os": cluster.hidden_os,
6200       "blacklisted_os": cluster.blacklisted_os,
6201       }
6202
6203     return result
6204
6205
6206 class LUClusterConfigQuery(NoHooksLU):
6207   """Return configuration values.
6208
6209   """
6210   REQ_BGL = False
6211
6212   def CheckArguments(self):
6213     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6214
6215   def ExpandNames(self):
6216     self.cq.ExpandNames(self)
6217
6218   def DeclareLocks(self, level):
6219     self.cq.DeclareLocks(self, level)
6220
6221   def Exec(self, feedback_fn):
6222     result = self.cq.OldStyleQuery(self)
6223
6224     assert len(result) == 1
6225
6226     return result[0]
6227
6228
6229 class _ClusterQuery(_QueryBase):
6230   FIELDS = query.CLUSTER_FIELDS
6231
6232   #: Do not sort (there is only one item)
6233   SORT_FIELD = None
6234
6235   def ExpandNames(self, lu):
6236     lu.needed_locks = {}
6237
6238     # The following variables interact with _QueryBase._GetNames
6239     self.wanted = locking.ALL_SET
6240     self.do_locking = self.use_locking
6241
6242     if self.do_locking:
6243       raise errors.OpPrereqError("Can not use locking for cluster queries",
6244                                  errors.ECODE_INVAL)
6245
6246   def DeclareLocks(self, lu, level):
6247     pass
6248
6249   def _GetQueryData(self, lu):
6250     """Computes the list of nodes and their attributes.
6251
6252     """
6253     # Locking is not used
6254     assert not (compat.any(lu.glm.is_owned(level)
6255                            for level in locking.LEVELS
6256                            if level != locking.LEVEL_CLUSTER) or
6257                 self.do_locking or self.use_locking)
6258
6259     if query.CQ_CONFIG in self.requested_data:
6260       cluster = lu.cfg.GetClusterInfo()
6261     else:
6262       cluster = NotImplemented
6263
6264     if query.CQ_QUEUE_DRAINED in self.requested_data:
6265       drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6266     else:
6267       drain_flag = NotImplemented
6268
6269     if query.CQ_WATCHER_PAUSE in self.requested_data:
6270       watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6271     else:
6272       watcher_pause = NotImplemented
6273
6274     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6275
6276
6277 class LUInstanceActivateDisks(NoHooksLU):
6278   """Bring up an instance's disks.
6279
6280   """
6281   REQ_BGL = False
6282
6283   def ExpandNames(self):
6284     self._ExpandAndLockInstance()
6285     self.needed_locks[locking.LEVEL_NODE] = []
6286     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6287
6288   def DeclareLocks(self, level):
6289     if level == locking.LEVEL_NODE:
6290       self._LockInstancesNodes()
6291
6292   def CheckPrereq(self):
6293     """Check prerequisites.
6294
6295     This checks that the instance is in the cluster.
6296
6297     """
6298     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6299     assert self.instance is not None, \
6300       "Cannot retrieve locked instance %s" % self.op.instance_name
6301     _CheckNodeOnline(self, self.instance.primary_node)
6302
6303   def Exec(self, feedback_fn):
6304     """Activate the disks.
6305
6306     """
6307     disks_ok, disks_info = \
6308               _AssembleInstanceDisks(self, self.instance,
6309                                      ignore_size=self.op.ignore_size)
6310     if not disks_ok:
6311       raise errors.OpExecError("Cannot activate block devices")
6312
6313     return disks_info
6314
6315
6316 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6317                            ignore_size=False):
6318   """Prepare the block devices for an instance.
6319
6320   This sets up the block devices on all nodes.
6321
6322   @type lu: L{LogicalUnit}
6323   @param lu: the logical unit on whose behalf we execute
6324   @type instance: L{objects.Instance}
6325   @param instance: the instance for whose disks we assemble
6326   @type disks: list of L{objects.Disk} or None
6327   @param disks: which disks to assemble (or all, if None)
6328   @type ignore_secondaries: boolean
6329   @param ignore_secondaries: if true, errors on secondary nodes
6330       won't result in an error return from the function
6331   @type ignore_size: boolean
6332   @param ignore_size: if true, the current known size of the disk
6333       will not be used during the disk activation, useful for cases
6334       when the size is wrong
6335   @return: False if the operation failed, otherwise a list of
6336       (host, instance_visible_name, node_visible_name)
6337       with the mapping from node devices to instance devices
6338
6339   """
6340   device_info = []
6341   disks_ok = True
6342   iname = instance.name
6343   disks = _ExpandCheckDisks(instance, disks)
6344
6345   # With the two passes mechanism we try to reduce the window of
6346   # opportunity for the race condition of switching DRBD to primary
6347   # before handshaking occured, but we do not eliminate it
6348
6349   # The proper fix would be to wait (with some limits) until the
6350   # connection has been made and drbd transitions from WFConnection
6351   # into any other network-connected state (Connected, SyncTarget,
6352   # SyncSource, etc.)
6353
6354   # 1st pass, assemble on all nodes in secondary mode
6355   for idx, inst_disk in enumerate(disks):
6356     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6357       if ignore_size:
6358         node_disk = node_disk.Copy()
6359         node_disk.UnsetSize()
6360       lu.cfg.SetDiskID(node_disk, node)
6361       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6362                                              False, idx)
6363       msg = result.fail_msg
6364       if msg:
6365         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6366                            " (is_primary=False, pass=1): %s",
6367                            inst_disk.iv_name, node, msg)
6368         if not ignore_secondaries:
6369           disks_ok = False
6370
6371   # FIXME: race condition on drbd migration to primary
6372
6373   # 2nd pass, do only the primary node
6374   for idx, inst_disk in enumerate(disks):
6375     dev_path = None
6376
6377     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6378       if node != instance.primary_node:
6379         continue
6380       if ignore_size:
6381         node_disk = node_disk.Copy()
6382         node_disk.UnsetSize()
6383       lu.cfg.SetDiskID(node_disk, node)
6384       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6385                                              True, idx)
6386       msg = result.fail_msg
6387       if msg:
6388         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6389                            " (is_primary=True, pass=2): %s",
6390                            inst_disk.iv_name, node, msg)
6391         disks_ok = False
6392       else:
6393         dev_path = result.payload
6394
6395     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6396
6397   # leave the disks configured for the primary node
6398   # this is a workaround that would be fixed better by
6399   # improving the logical/physical id handling
6400   for disk in disks:
6401     lu.cfg.SetDiskID(disk, instance.primary_node)
6402
6403   return disks_ok, device_info
6404
6405
6406 def _StartInstanceDisks(lu, instance, force):
6407   """Start the disks of an instance.
6408
6409   """
6410   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6411                                            ignore_secondaries=force)
6412   if not disks_ok:
6413     _ShutdownInstanceDisks(lu, instance)
6414     if force is not None and not force:
6415       lu.proc.LogWarning("", hint="If the message above refers to a"
6416                          " secondary node,"
6417                          " you can retry the operation using '--force'.")
6418     raise errors.OpExecError("Disk consistency error")
6419
6420
6421 class LUInstanceDeactivateDisks(NoHooksLU):
6422   """Shutdown an instance's disks.
6423
6424   """
6425   REQ_BGL = False
6426
6427   def ExpandNames(self):
6428     self._ExpandAndLockInstance()
6429     self.needed_locks[locking.LEVEL_NODE] = []
6430     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6431
6432   def DeclareLocks(self, level):
6433     if level == locking.LEVEL_NODE:
6434       self._LockInstancesNodes()
6435
6436   def CheckPrereq(self):
6437     """Check prerequisites.
6438
6439     This checks that the instance is in the cluster.
6440
6441     """
6442     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6443     assert self.instance is not None, \
6444       "Cannot retrieve locked instance %s" % self.op.instance_name
6445
6446   def Exec(self, feedback_fn):
6447     """Deactivate the disks
6448
6449     """
6450     instance = self.instance
6451     if self.op.force:
6452       _ShutdownInstanceDisks(self, instance)
6453     else:
6454       _SafeShutdownInstanceDisks(self, instance)
6455
6456
6457 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6458   """Shutdown block devices of an instance.
6459
6460   This function checks if an instance is running, before calling
6461   _ShutdownInstanceDisks.
6462
6463   """
6464   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6465   _ShutdownInstanceDisks(lu, instance, disks=disks)
6466
6467
6468 def _ExpandCheckDisks(instance, disks):
6469   """Return the instance disks selected by the disks list
6470
6471   @type disks: list of L{objects.Disk} or None
6472   @param disks: selected disks
6473   @rtype: list of L{objects.Disk}
6474   @return: selected instance disks to act on
6475
6476   """
6477   if disks is None:
6478     return instance.disks
6479   else:
6480     if not set(disks).issubset(instance.disks):
6481       raise errors.ProgrammerError("Can only act on disks belonging to the"
6482                                    " target instance")
6483     return disks
6484
6485
6486 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6487   """Shutdown block devices of an instance.
6488
6489   This does the shutdown on all nodes of the instance.
6490
6491   If the ignore_primary is false, errors on the primary node are
6492   ignored.
6493
6494   """
6495   all_result = True
6496   disks = _ExpandCheckDisks(instance, disks)
6497
6498   for disk in disks:
6499     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6500       lu.cfg.SetDiskID(top_disk, node)
6501       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6502       msg = result.fail_msg
6503       if msg:
6504         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6505                       disk.iv_name, node, msg)
6506         if ((node == instance.primary_node and not ignore_primary) or
6507             (node != instance.primary_node and not result.offline)):
6508           all_result = False
6509   return all_result
6510
6511
6512 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6513   """Checks if a node has enough free memory.
6514
6515   This function check if a given node has the needed amount of free
6516   memory. In case the node has less memory or we cannot get the
6517   information from the node, this function raise an OpPrereqError
6518   exception.
6519
6520   @type lu: C{LogicalUnit}
6521   @param lu: a logical unit from which we get configuration data
6522   @type node: C{str}
6523   @param node: the node to check
6524   @type reason: C{str}
6525   @param reason: string to use in the error message
6526   @type requested: C{int}
6527   @param requested: the amount of memory in MiB to check for
6528   @type hypervisor_name: C{str}
6529   @param hypervisor_name: the hypervisor to ask for memory stats
6530   @rtype: integer
6531   @return: node current free memory
6532   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6533       we cannot check the node
6534
6535   """
6536   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6537   nodeinfo[node].Raise("Can't get data from node %s" % node,
6538                        prereq=True, ecode=errors.ECODE_ENVIRON)
6539   (_, _, (hv_info, )) = nodeinfo[node].payload
6540
6541   free_mem = hv_info.get("memory_free", None)
6542   if not isinstance(free_mem, int):
6543     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6544                                " was '%s'" % (node, free_mem),
6545                                errors.ECODE_ENVIRON)
6546   if requested > free_mem:
6547     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6548                                " needed %s MiB, available %s MiB" %
6549                                (node, reason, requested, free_mem),
6550                                errors.ECODE_NORES)
6551   return free_mem
6552
6553
6554 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6555   """Checks if nodes have enough free disk space in the all VGs.
6556
6557   This function check if all given nodes have the needed amount of
6558   free disk. In case any node has less disk or we cannot get the
6559   information from the node, this function raise an OpPrereqError
6560   exception.
6561
6562   @type lu: C{LogicalUnit}
6563   @param lu: a logical unit from which we get configuration data
6564   @type nodenames: C{list}
6565   @param nodenames: the list of node names to check
6566   @type req_sizes: C{dict}
6567   @param req_sizes: the hash of vg and corresponding amount of disk in
6568       MiB to check for
6569   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6570       or we cannot check the node
6571
6572   """
6573   for vg, req_size in req_sizes.items():
6574     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6575
6576
6577 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6578   """Checks if nodes have enough free disk space in the specified VG.
6579
6580   This function check if all given nodes have the needed amount of
6581   free disk. In case any node has less disk or we cannot get the
6582   information from the node, this function raise an OpPrereqError
6583   exception.
6584
6585   @type lu: C{LogicalUnit}
6586   @param lu: a logical unit from which we get configuration data
6587   @type nodenames: C{list}
6588   @param nodenames: the list of node names to check
6589   @type vg: C{str}
6590   @param vg: the volume group to check
6591   @type requested: C{int}
6592   @param requested: the amount of disk in MiB to check for
6593   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6594       or we cannot check the node
6595
6596   """
6597   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6598   for node in nodenames:
6599     info = nodeinfo[node]
6600     info.Raise("Cannot get current information from node %s" % node,
6601                prereq=True, ecode=errors.ECODE_ENVIRON)
6602     (_, (vg_info, ), _) = info.payload
6603     vg_free = vg_info.get("vg_free", None)
6604     if not isinstance(vg_free, int):
6605       raise errors.OpPrereqError("Can't compute free disk space on node"
6606                                  " %s for vg %s, result was '%s'" %
6607                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6608     if requested > vg_free:
6609       raise errors.OpPrereqError("Not enough disk space on target node %s"
6610                                  " vg %s: required %d MiB, available %d MiB" %
6611                                  (node, vg, requested, vg_free),
6612                                  errors.ECODE_NORES)
6613
6614
6615 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6616   """Checks if nodes have enough physical CPUs
6617
6618   This function checks if all given nodes have the needed number of
6619   physical CPUs. In case any node has less CPUs or we cannot get the
6620   information from the node, this function raises an OpPrereqError
6621   exception.
6622
6623   @type lu: C{LogicalUnit}
6624   @param lu: a logical unit from which we get configuration data
6625   @type nodenames: C{list}
6626   @param nodenames: the list of node names to check
6627   @type requested: C{int}
6628   @param requested: the minimum acceptable number of physical CPUs
6629   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6630       or we cannot check the node
6631
6632   """
6633   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6634   for node in nodenames:
6635     info = nodeinfo[node]
6636     info.Raise("Cannot get current information from node %s" % node,
6637                prereq=True, ecode=errors.ECODE_ENVIRON)
6638     (_, _, (hv_info, )) = info.payload
6639     num_cpus = hv_info.get("cpu_total", None)
6640     if not isinstance(num_cpus, int):
6641       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6642                                  " on node %s, result was '%s'" %
6643                                  (node, num_cpus), errors.ECODE_ENVIRON)
6644     if requested > num_cpus:
6645       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6646                                  "required" % (node, num_cpus, requested),
6647                                  errors.ECODE_NORES)
6648
6649
6650 class LUInstanceStartup(LogicalUnit):
6651   """Starts an instance.
6652
6653   """
6654   HPATH = "instance-start"
6655   HTYPE = constants.HTYPE_INSTANCE
6656   REQ_BGL = False
6657
6658   def CheckArguments(self):
6659     # extra beparams
6660     if self.op.beparams:
6661       # fill the beparams dict
6662       objects.UpgradeBeParams(self.op.beparams)
6663       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6664
6665   def ExpandNames(self):
6666     self._ExpandAndLockInstance()
6667     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6668
6669   def DeclareLocks(self, level):
6670     if level == locking.LEVEL_NODE_RES:
6671       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6672
6673   def BuildHooksEnv(self):
6674     """Build hooks env.
6675
6676     This runs on master, primary and secondary nodes of the instance.
6677
6678     """
6679     env = {
6680       "FORCE": self.op.force,
6681       }
6682
6683     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6684
6685     return env
6686
6687   def BuildHooksNodes(self):
6688     """Build hooks nodes.
6689
6690     """
6691     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6692     return (nl, nl)
6693
6694   def CheckPrereq(self):
6695     """Check prerequisites.
6696
6697     This checks that the instance is in the cluster.
6698
6699     """
6700     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6701     assert self.instance is not None, \
6702       "Cannot retrieve locked instance %s" % self.op.instance_name
6703
6704     # extra hvparams
6705     if self.op.hvparams:
6706       # check hypervisor parameter syntax (locally)
6707       cluster = self.cfg.GetClusterInfo()
6708       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6709       filled_hvp = cluster.FillHV(instance)
6710       filled_hvp.update(self.op.hvparams)
6711       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6712       hv_type.CheckParameterSyntax(filled_hvp)
6713       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6714
6715     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6716
6717     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6718
6719     if self.primary_offline and self.op.ignore_offline_nodes:
6720       self.proc.LogWarning("Ignoring offline primary node")
6721
6722       if self.op.hvparams or self.op.beparams:
6723         self.proc.LogWarning("Overridden parameters are ignored")
6724     else:
6725       _CheckNodeOnline(self, instance.primary_node)
6726
6727       bep = self.cfg.GetClusterInfo().FillBE(instance)
6728       bep.update(self.op.beparams)
6729
6730       # check bridges existence
6731       _CheckInstanceBridgesExist(self, instance)
6732
6733       remote_info = self.rpc.call_instance_info(instance.primary_node,
6734                                                 instance.name,
6735                                                 instance.hypervisor)
6736       remote_info.Raise("Error checking node %s" % instance.primary_node,
6737                         prereq=True, ecode=errors.ECODE_ENVIRON)
6738       if not remote_info.payload: # not running already
6739         _CheckNodeFreeMemory(self, instance.primary_node,
6740                              "starting instance %s" % instance.name,
6741                              bep[constants.BE_MINMEM], instance.hypervisor)
6742
6743   def Exec(self, feedback_fn):
6744     """Start the instance.
6745
6746     """
6747     instance = self.instance
6748     force = self.op.force
6749
6750     if not self.op.no_remember:
6751       self.cfg.MarkInstanceUp(instance.name)
6752
6753     if self.primary_offline:
6754       assert self.op.ignore_offline_nodes
6755       self.proc.LogInfo("Primary node offline, marked instance as started")
6756     else:
6757       node_current = instance.primary_node
6758
6759       _StartInstanceDisks(self, instance, force)
6760
6761       result = \
6762         self.rpc.call_instance_start(node_current,
6763                                      (instance, self.op.hvparams,
6764                                       self.op.beparams),
6765                                      self.op.startup_paused)
6766       msg = result.fail_msg
6767       if msg:
6768         _ShutdownInstanceDisks(self, instance)
6769         raise errors.OpExecError("Could not start instance: %s" % msg)
6770
6771
6772 class LUInstanceReboot(LogicalUnit):
6773   """Reboot an instance.
6774
6775   """
6776   HPATH = "instance-reboot"
6777   HTYPE = constants.HTYPE_INSTANCE
6778   REQ_BGL = False
6779
6780   def ExpandNames(self):
6781     self._ExpandAndLockInstance()
6782
6783   def BuildHooksEnv(self):
6784     """Build hooks env.
6785
6786     This runs on master, primary and secondary nodes of the instance.
6787
6788     """
6789     env = {
6790       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6791       "REBOOT_TYPE": self.op.reboot_type,
6792       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6793       }
6794
6795     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6796
6797     return env
6798
6799   def BuildHooksNodes(self):
6800     """Build hooks nodes.
6801
6802     """
6803     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6804     return (nl, nl)
6805
6806   def CheckPrereq(self):
6807     """Check prerequisites.
6808
6809     This checks that the instance is in the cluster.
6810
6811     """
6812     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6813     assert self.instance is not None, \
6814       "Cannot retrieve locked instance %s" % self.op.instance_name
6815     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6816     _CheckNodeOnline(self, instance.primary_node)
6817
6818     # check bridges existence
6819     _CheckInstanceBridgesExist(self, instance)
6820
6821   def Exec(self, feedback_fn):
6822     """Reboot the instance.
6823
6824     """
6825     instance = self.instance
6826     ignore_secondaries = self.op.ignore_secondaries
6827     reboot_type = self.op.reboot_type
6828
6829     remote_info = self.rpc.call_instance_info(instance.primary_node,
6830                                               instance.name,
6831                                               instance.hypervisor)
6832     remote_info.Raise("Error checking node %s" % instance.primary_node)
6833     instance_running = bool(remote_info.payload)
6834
6835     node_current = instance.primary_node
6836
6837     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6838                                             constants.INSTANCE_REBOOT_HARD]:
6839       for disk in instance.disks:
6840         self.cfg.SetDiskID(disk, node_current)
6841       result = self.rpc.call_instance_reboot(node_current, instance,
6842                                              reboot_type,
6843                                              self.op.shutdown_timeout)
6844       result.Raise("Could not reboot instance")
6845     else:
6846       if instance_running:
6847         result = self.rpc.call_instance_shutdown(node_current, instance,
6848                                                  self.op.shutdown_timeout)
6849         result.Raise("Could not shutdown instance for full reboot")
6850         _ShutdownInstanceDisks(self, instance)
6851       else:
6852         self.LogInfo("Instance %s was already stopped, starting now",
6853                      instance.name)
6854       _StartInstanceDisks(self, instance, ignore_secondaries)
6855       result = self.rpc.call_instance_start(node_current,
6856                                             (instance, None, None), False)
6857       msg = result.fail_msg
6858       if msg:
6859         _ShutdownInstanceDisks(self, instance)
6860         raise errors.OpExecError("Could not start instance for"
6861                                  " full reboot: %s" % msg)
6862
6863     self.cfg.MarkInstanceUp(instance.name)
6864
6865
6866 class LUInstanceShutdown(LogicalUnit):
6867   """Shutdown an instance.
6868
6869   """
6870   HPATH = "instance-stop"
6871   HTYPE = constants.HTYPE_INSTANCE
6872   REQ_BGL = False
6873
6874   def ExpandNames(self):
6875     self._ExpandAndLockInstance()
6876
6877   def BuildHooksEnv(self):
6878     """Build hooks env.
6879
6880     This runs on master, primary and secondary nodes of the instance.
6881
6882     """
6883     env = _BuildInstanceHookEnvByObject(self, self.instance)
6884     env["TIMEOUT"] = self.op.timeout
6885     return env
6886
6887   def BuildHooksNodes(self):
6888     """Build hooks nodes.
6889
6890     """
6891     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6892     return (nl, nl)
6893
6894   def CheckPrereq(self):
6895     """Check prerequisites.
6896
6897     This checks that the instance is in the cluster.
6898
6899     """
6900     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6901     assert self.instance is not None, \
6902       "Cannot retrieve locked instance %s" % self.op.instance_name
6903
6904     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6905
6906     self.primary_offline = \
6907       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6908
6909     if self.primary_offline and self.op.ignore_offline_nodes:
6910       self.proc.LogWarning("Ignoring offline primary node")
6911     else:
6912       _CheckNodeOnline(self, self.instance.primary_node)
6913
6914   def Exec(self, feedback_fn):
6915     """Shutdown the instance.
6916
6917     """
6918     instance = self.instance
6919     node_current = instance.primary_node
6920     timeout = self.op.timeout
6921
6922     if not self.op.no_remember:
6923       self.cfg.MarkInstanceDown(instance.name)
6924
6925     if self.primary_offline:
6926       assert self.op.ignore_offline_nodes
6927       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6928     else:
6929       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6930       msg = result.fail_msg
6931       if msg:
6932         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6933
6934       _ShutdownInstanceDisks(self, instance)
6935
6936
6937 class LUInstanceReinstall(LogicalUnit):
6938   """Reinstall an instance.
6939
6940   """
6941   HPATH = "instance-reinstall"
6942   HTYPE = constants.HTYPE_INSTANCE
6943   REQ_BGL = False
6944
6945   def ExpandNames(self):
6946     self._ExpandAndLockInstance()
6947
6948   def BuildHooksEnv(self):
6949     """Build hooks env.
6950
6951     This runs on master, primary and secondary nodes of the instance.
6952
6953     """
6954     return _BuildInstanceHookEnvByObject(self, self.instance)
6955
6956   def BuildHooksNodes(self):
6957     """Build hooks nodes.
6958
6959     """
6960     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6961     return (nl, nl)
6962
6963   def CheckPrereq(self):
6964     """Check prerequisites.
6965
6966     This checks that the instance is in the cluster and is not running.
6967
6968     """
6969     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6970     assert instance is not None, \
6971       "Cannot retrieve locked instance %s" % self.op.instance_name
6972     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6973                      " offline, cannot reinstall")
6974     for node in instance.secondary_nodes:
6975       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6976                        " cannot reinstall")
6977
6978     if instance.disk_template == constants.DT_DISKLESS:
6979       raise errors.OpPrereqError("Instance '%s' has no disks" %
6980                                  self.op.instance_name,
6981                                  errors.ECODE_INVAL)
6982     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6983
6984     if self.op.os_type is not None:
6985       # OS verification
6986       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6987       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6988       instance_os = self.op.os_type
6989     else:
6990       instance_os = instance.os
6991
6992     nodelist = list(instance.all_nodes)
6993
6994     if self.op.osparams:
6995       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6996       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6997       self.os_inst = i_osdict # the new dict (without defaults)
6998     else:
6999       self.os_inst = None
7000
7001     self.instance = instance
7002
7003   def Exec(self, feedback_fn):
7004     """Reinstall the instance.
7005
7006     """
7007     inst = self.instance
7008
7009     if self.op.os_type is not None:
7010       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7011       inst.os = self.op.os_type
7012       # Write to configuration
7013       self.cfg.Update(inst, feedback_fn)
7014
7015     _StartInstanceDisks(self, inst, None)
7016     try:
7017       feedback_fn("Running the instance OS create scripts...")
7018       # FIXME: pass debug option from opcode to backend
7019       result = self.rpc.call_instance_os_add(inst.primary_node,
7020                                              (inst, self.os_inst), True,
7021                                              self.op.debug_level)
7022       result.Raise("Could not install OS for instance %s on node %s" %
7023                    (inst.name, inst.primary_node))
7024     finally:
7025       _ShutdownInstanceDisks(self, inst)
7026
7027
7028 class LUInstanceRecreateDisks(LogicalUnit):
7029   """Recreate an instance's missing disks.
7030
7031   """
7032   HPATH = "instance-recreate-disks"
7033   HTYPE = constants.HTYPE_INSTANCE
7034   REQ_BGL = False
7035
7036   _MODIFYABLE = frozenset([
7037     constants.IDISK_SIZE,
7038     constants.IDISK_MODE,
7039     ])
7040
7041   # New or changed disk parameters may have different semantics
7042   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7043     constants.IDISK_ADOPT,
7044
7045     # TODO: Implement support changing VG while recreating
7046     constants.IDISK_VG,
7047     constants.IDISK_METAVG,
7048     ]))
7049
7050   def CheckArguments(self):
7051     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7052       # Normalize and convert deprecated list of disk indices
7053       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7054
7055     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7056     if duplicates:
7057       raise errors.OpPrereqError("Some disks have been specified more than"
7058                                  " once: %s" % utils.CommaJoin(duplicates),
7059                                  errors.ECODE_INVAL)
7060
7061     for (idx, params) in self.op.disks:
7062       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7063       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7064       if unsupported:
7065         raise errors.OpPrereqError("Parameters for disk %s try to change"
7066                                    " unmodifyable parameter(s): %s" %
7067                                    (idx, utils.CommaJoin(unsupported)),
7068                                    errors.ECODE_INVAL)
7069
7070   def ExpandNames(self):
7071     self._ExpandAndLockInstance()
7072     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7073     if self.op.nodes:
7074       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7075       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7076     else:
7077       self.needed_locks[locking.LEVEL_NODE] = []
7078     self.needed_locks[locking.LEVEL_NODE_RES] = []
7079
7080   def DeclareLocks(self, level):
7081     if level == locking.LEVEL_NODE:
7082       # if we replace the nodes, we only need to lock the old primary,
7083       # otherwise we need to lock all nodes for disk re-creation
7084       primary_only = bool(self.op.nodes)
7085       self._LockInstancesNodes(primary_only=primary_only)
7086     elif level == locking.LEVEL_NODE_RES:
7087       # Copy node locks
7088       self.needed_locks[locking.LEVEL_NODE_RES] = \
7089         self.needed_locks[locking.LEVEL_NODE][:]
7090
7091   def BuildHooksEnv(self):
7092     """Build hooks env.
7093
7094     This runs on master, primary and secondary nodes of the instance.
7095
7096     """
7097     return _BuildInstanceHookEnvByObject(self, self.instance)
7098
7099   def BuildHooksNodes(self):
7100     """Build hooks nodes.
7101
7102     """
7103     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7104     return (nl, nl)
7105
7106   def CheckPrereq(self):
7107     """Check prerequisites.
7108
7109     This checks that the instance is in the cluster and is not running.
7110
7111     """
7112     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7113     assert instance is not None, \
7114       "Cannot retrieve locked instance %s" % self.op.instance_name
7115     if self.op.nodes:
7116       if len(self.op.nodes) != len(instance.all_nodes):
7117         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7118                                    " %d replacement nodes were specified" %
7119                                    (instance.name, len(instance.all_nodes),
7120                                     len(self.op.nodes)),
7121                                    errors.ECODE_INVAL)
7122       assert instance.disk_template != constants.DT_DRBD8 or \
7123           len(self.op.nodes) == 2
7124       assert instance.disk_template != constants.DT_PLAIN or \
7125           len(self.op.nodes) == 1
7126       primary_node = self.op.nodes[0]
7127     else:
7128       primary_node = instance.primary_node
7129     _CheckNodeOnline(self, primary_node)
7130
7131     if instance.disk_template == constants.DT_DISKLESS:
7132       raise errors.OpPrereqError("Instance '%s' has no disks" %
7133                                  self.op.instance_name, errors.ECODE_INVAL)
7134
7135     # if we replace nodes *and* the old primary is offline, we don't
7136     # check
7137     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7138     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7139     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7140     if not (self.op.nodes and old_pnode.offline):
7141       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7142                           msg="cannot recreate disks")
7143
7144     if self.op.disks:
7145       self.disks = dict(self.op.disks)
7146     else:
7147       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7148
7149     maxidx = max(self.disks.keys())
7150     if maxidx >= len(instance.disks):
7151       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7152                                  errors.ECODE_INVAL)
7153
7154     if (self.op.nodes and
7155         sorted(self.disks.keys()) != range(len(instance.disks))):
7156       raise errors.OpPrereqError("Can't recreate disks partially and"
7157                                  " change the nodes at the same time",
7158                                  errors.ECODE_INVAL)
7159
7160     self.instance = instance
7161
7162   def Exec(self, feedback_fn):
7163     """Recreate the disks.
7164
7165     """
7166     instance = self.instance
7167
7168     assert (self.owned_locks(locking.LEVEL_NODE) ==
7169             self.owned_locks(locking.LEVEL_NODE_RES))
7170
7171     to_skip = []
7172     mods = [] # keeps track of needed changes
7173
7174     for idx, disk in enumerate(instance.disks):
7175       try:
7176         changes = self.disks[idx]
7177       except KeyError:
7178         # Disk should not be recreated
7179         to_skip.append(idx)
7180         continue
7181
7182       # update secondaries for disks, if needed
7183       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7184         # need to update the nodes and minors
7185         assert len(self.op.nodes) == 2
7186         assert len(disk.logical_id) == 6 # otherwise disk internals
7187                                          # have changed
7188         (_, _, old_port, _, _, old_secret) = disk.logical_id
7189         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7190         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7191                   new_minors[0], new_minors[1], old_secret)
7192         assert len(disk.logical_id) == len(new_id)
7193       else:
7194         new_id = None
7195
7196       mods.append((idx, new_id, changes))
7197
7198     # now that we have passed all asserts above, we can apply the mods
7199     # in a single run (to avoid partial changes)
7200     for idx, new_id, changes in mods:
7201       disk = instance.disks[idx]
7202       if new_id is not None:
7203         assert disk.dev_type == constants.LD_DRBD8
7204         disk.logical_id = new_id
7205       if changes:
7206         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7207                     mode=changes.get(constants.IDISK_MODE, None))
7208
7209     # change primary node, if needed
7210     if self.op.nodes:
7211       instance.primary_node = self.op.nodes[0]
7212       self.LogWarning("Changing the instance's nodes, you will have to"
7213                       " remove any disks left on the older nodes manually")
7214
7215     if self.op.nodes:
7216       self.cfg.Update(instance, feedback_fn)
7217
7218     _CreateDisks(self, instance, to_skip=to_skip)
7219
7220
7221 class LUInstanceRename(LogicalUnit):
7222   """Rename an instance.
7223
7224   """
7225   HPATH = "instance-rename"
7226   HTYPE = constants.HTYPE_INSTANCE
7227
7228   def CheckArguments(self):
7229     """Check arguments.
7230
7231     """
7232     if self.op.ip_check and not self.op.name_check:
7233       # TODO: make the ip check more flexible and not depend on the name check
7234       raise errors.OpPrereqError("IP address check requires a name check",
7235                                  errors.ECODE_INVAL)
7236
7237   def BuildHooksEnv(self):
7238     """Build hooks env.
7239
7240     This runs on master, primary and secondary nodes of the instance.
7241
7242     """
7243     env = _BuildInstanceHookEnvByObject(self, self.instance)
7244     env["INSTANCE_NEW_NAME"] = self.op.new_name
7245     return env
7246
7247   def BuildHooksNodes(self):
7248     """Build hooks nodes.
7249
7250     """
7251     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7252     return (nl, nl)
7253
7254   def CheckPrereq(self):
7255     """Check prerequisites.
7256
7257     This checks that the instance is in the cluster and is not running.
7258
7259     """
7260     self.op.instance_name = _ExpandInstanceName(self.cfg,
7261                                                 self.op.instance_name)
7262     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7263     assert instance is not None
7264     _CheckNodeOnline(self, instance.primary_node)
7265     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7266                         msg="cannot rename")
7267     self.instance = instance
7268
7269     new_name = self.op.new_name
7270     if self.op.name_check:
7271       hostname = netutils.GetHostname(name=new_name)
7272       if hostname.name != new_name:
7273         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7274                      hostname.name)
7275       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7276         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7277                                     " same as given hostname '%s'") %
7278                                     (hostname.name, self.op.new_name),
7279                                     errors.ECODE_INVAL)
7280       new_name = self.op.new_name = hostname.name
7281       if (self.op.ip_check and
7282           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7283         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7284                                    (hostname.ip, new_name),
7285                                    errors.ECODE_NOTUNIQUE)
7286
7287     instance_list = self.cfg.GetInstanceList()
7288     if new_name in instance_list and new_name != instance.name:
7289       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7290                                  new_name, errors.ECODE_EXISTS)
7291
7292   def Exec(self, feedback_fn):
7293     """Rename the instance.
7294
7295     """
7296     inst = self.instance
7297     old_name = inst.name
7298
7299     rename_file_storage = False
7300     if (inst.disk_template in constants.DTS_FILEBASED and
7301         self.op.new_name != inst.name):
7302       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7303       rename_file_storage = True
7304
7305     self.cfg.RenameInstance(inst.name, self.op.new_name)
7306     # Change the instance lock. This is definitely safe while we hold the BGL.
7307     # Otherwise the new lock would have to be added in acquired mode.
7308     assert self.REQ_BGL
7309     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7310     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7311
7312     # re-read the instance from the configuration after rename
7313     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7314
7315     if rename_file_storage:
7316       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7317       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7318                                                      old_file_storage_dir,
7319                                                      new_file_storage_dir)
7320       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7321                    " (but the instance has been renamed in Ganeti)" %
7322                    (inst.primary_node, old_file_storage_dir,
7323                     new_file_storage_dir))
7324
7325     _StartInstanceDisks(self, inst, None)
7326     try:
7327       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7328                                                  old_name, self.op.debug_level)
7329       msg = result.fail_msg
7330       if msg:
7331         msg = ("Could not run OS rename script for instance %s on node %s"
7332                " (but the instance has been renamed in Ganeti): %s" %
7333                (inst.name, inst.primary_node, msg))
7334         self.proc.LogWarning(msg)
7335     finally:
7336       _ShutdownInstanceDisks(self, inst)
7337
7338     return inst.name
7339
7340
7341 class LUInstanceRemove(LogicalUnit):
7342   """Remove an instance.
7343
7344   """
7345   HPATH = "instance-remove"
7346   HTYPE = constants.HTYPE_INSTANCE
7347   REQ_BGL = False
7348
7349   def ExpandNames(self):
7350     self._ExpandAndLockInstance()
7351     self.needed_locks[locking.LEVEL_NODE] = []
7352     self.needed_locks[locking.LEVEL_NODE_RES] = []
7353     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7354
7355   def DeclareLocks(self, level):
7356     if level == locking.LEVEL_NODE:
7357       self._LockInstancesNodes()
7358     elif level == locking.LEVEL_NODE_RES:
7359       # Copy node locks
7360       self.needed_locks[locking.LEVEL_NODE_RES] = \
7361         self.needed_locks[locking.LEVEL_NODE][:]
7362
7363   def BuildHooksEnv(self):
7364     """Build hooks env.
7365
7366     This runs on master, primary and secondary nodes of the instance.
7367
7368     """
7369     env = _BuildInstanceHookEnvByObject(self, self.instance)
7370     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7371     return env
7372
7373   def BuildHooksNodes(self):
7374     """Build hooks nodes.
7375
7376     """
7377     nl = [self.cfg.GetMasterNode()]
7378     nl_post = list(self.instance.all_nodes) + nl
7379     return (nl, nl_post)
7380
7381   def CheckPrereq(self):
7382     """Check prerequisites.
7383
7384     This checks that the instance is in the cluster.
7385
7386     """
7387     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7388     assert self.instance is not None, \
7389       "Cannot retrieve locked instance %s" % self.op.instance_name
7390
7391   def Exec(self, feedback_fn):
7392     """Remove the instance.
7393
7394     """
7395     instance = self.instance
7396     logging.info("Shutting down instance %s on node %s",
7397                  instance.name, instance.primary_node)
7398
7399     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7400                                              self.op.shutdown_timeout)
7401     msg = result.fail_msg
7402     if msg:
7403       if self.op.ignore_failures:
7404         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7405       else:
7406         raise errors.OpExecError("Could not shutdown instance %s on"
7407                                  " node %s: %s" %
7408                                  (instance.name, instance.primary_node, msg))
7409
7410     assert (self.owned_locks(locking.LEVEL_NODE) ==
7411             self.owned_locks(locking.LEVEL_NODE_RES))
7412     assert not (set(instance.all_nodes) -
7413                 self.owned_locks(locking.LEVEL_NODE)), \
7414       "Not owning correct locks"
7415
7416     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7417
7418
7419 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7420   """Utility function to remove an instance.
7421
7422   """
7423   logging.info("Removing block devices for instance %s", instance.name)
7424
7425   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7426     if not ignore_failures:
7427       raise errors.OpExecError("Can't remove instance's disks")
7428     feedback_fn("Warning: can't remove instance's disks")
7429
7430   logging.info("Removing instance %s out of cluster config", instance.name)
7431
7432   lu.cfg.RemoveInstance(instance.name)
7433
7434   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7435     "Instance lock removal conflict"
7436
7437   # Remove lock for the instance
7438   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7439
7440
7441 class LUInstanceQuery(NoHooksLU):
7442   """Logical unit for querying instances.
7443
7444   """
7445   # pylint: disable=W0142
7446   REQ_BGL = False
7447
7448   def CheckArguments(self):
7449     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7450                              self.op.output_fields, self.op.use_locking)
7451
7452   def ExpandNames(self):
7453     self.iq.ExpandNames(self)
7454
7455   def DeclareLocks(self, level):
7456     self.iq.DeclareLocks(self, level)
7457
7458   def Exec(self, feedback_fn):
7459     return self.iq.OldStyleQuery(self)
7460
7461
7462 class LUInstanceFailover(LogicalUnit):
7463   """Failover an instance.
7464
7465   """
7466   HPATH = "instance-failover"
7467   HTYPE = constants.HTYPE_INSTANCE
7468   REQ_BGL = False
7469
7470   def CheckArguments(self):
7471     """Check the arguments.
7472
7473     """
7474     self.iallocator = getattr(self.op, "iallocator", None)
7475     self.target_node = getattr(self.op, "target_node", None)
7476
7477   def ExpandNames(self):
7478     self._ExpandAndLockInstance()
7479
7480     if self.op.target_node is not None:
7481       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7482
7483     self.needed_locks[locking.LEVEL_NODE] = []
7484     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7485
7486     self.needed_locks[locking.LEVEL_NODE_RES] = []
7487     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7488
7489     ignore_consistency = self.op.ignore_consistency
7490     shutdown_timeout = self.op.shutdown_timeout
7491     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7492                                        cleanup=False,
7493                                        failover=True,
7494                                        ignore_consistency=ignore_consistency,
7495                                        shutdown_timeout=shutdown_timeout,
7496                                        ignore_ipolicy=self.op.ignore_ipolicy)
7497     self.tasklets = [self._migrater]
7498
7499   def DeclareLocks(self, level):
7500     if level == locking.LEVEL_NODE:
7501       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7502       if instance.disk_template in constants.DTS_EXT_MIRROR:
7503         if self.op.target_node is None:
7504           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7505         else:
7506           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7507                                                    self.op.target_node]
7508         del self.recalculate_locks[locking.LEVEL_NODE]
7509       else:
7510         self._LockInstancesNodes()
7511     elif level == locking.LEVEL_NODE_RES:
7512       # Copy node locks
7513       self.needed_locks[locking.LEVEL_NODE_RES] = \
7514         self.needed_locks[locking.LEVEL_NODE][:]
7515
7516   def BuildHooksEnv(self):
7517     """Build hooks env.
7518
7519     This runs on master, primary and secondary nodes of the instance.
7520
7521     """
7522     instance = self._migrater.instance
7523     source_node = instance.primary_node
7524     target_node = self.op.target_node
7525     env = {
7526       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7527       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7528       "OLD_PRIMARY": source_node,
7529       "NEW_PRIMARY": target_node,
7530       }
7531
7532     if instance.disk_template in constants.DTS_INT_MIRROR:
7533       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7534       env["NEW_SECONDARY"] = source_node
7535     else:
7536       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7537
7538     env.update(_BuildInstanceHookEnvByObject(self, instance))
7539
7540     return env
7541
7542   def BuildHooksNodes(self):
7543     """Build hooks nodes.
7544
7545     """
7546     instance = self._migrater.instance
7547     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7548     return (nl, nl + [instance.primary_node])
7549
7550
7551 class LUInstanceMigrate(LogicalUnit):
7552   """Migrate an instance.
7553
7554   This is migration without shutting down, compared to the failover,
7555   which is done with shutdown.
7556
7557   """
7558   HPATH = "instance-migrate"
7559   HTYPE = constants.HTYPE_INSTANCE
7560   REQ_BGL = False
7561
7562   def ExpandNames(self):
7563     self._ExpandAndLockInstance()
7564
7565     if self.op.target_node is not None:
7566       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7567
7568     self.needed_locks[locking.LEVEL_NODE] = []
7569     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7570
7571     self.needed_locks[locking.LEVEL_NODE] = []
7572     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7573
7574     self._migrater = \
7575       TLMigrateInstance(self, self.op.instance_name,
7576                         cleanup=self.op.cleanup,
7577                         failover=False,
7578                         fallback=self.op.allow_failover,
7579                         allow_runtime_changes=self.op.allow_runtime_changes,
7580                         ignore_ipolicy=self.op.ignore_ipolicy)
7581     self.tasklets = [self._migrater]
7582
7583   def DeclareLocks(self, level):
7584     if level == locking.LEVEL_NODE:
7585       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7586       if instance.disk_template in constants.DTS_EXT_MIRROR:
7587         if self.op.target_node is None:
7588           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7589         else:
7590           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7591                                                    self.op.target_node]
7592         del self.recalculate_locks[locking.LEVEL_NODE]
7593       else:
7594         self._LockInstancesNodes()
7595     elif level == locking.LEVEL_NODE_RES:
7596       # Copy node locks
7597       self.needed_locks[locking.LEVEL_NODE_RES] = \
7598         self.needed_locks[locking.LEVEL_NODE][:]
7599
7600   def BuildHooksEnv(self):
7601     """Build hooks env.
7602
7603     This runs on master, primary and secondary nodes of the instance.
7604
7605     """
7606     instance = self._migrater.instance
7607     source_node = instance.primary_node
7608     target_node = self.op.target_node
7609     env = _BuildInstanceHookEnvByObject(self, instance)
7610     env.update({
7611       "MIGRATE_LIVE": self._migrater.live,
7612       "MIGRATE_CLEANUP": self.op.cleanup,
7613       "OLD_PRIMARY": source_node,
7614       "NEW_PRIMARY": target_node,
7615       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7616       })
7617
7618     if instance.disk_template in constants.DTS_INT_MIRROR:
7619       env["OLD_SECONDARY"] = target_node
7620       env["NEW_SECONDARY"] = source_node
7621     else:
7622       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7623
7624     return env
7625
7626   def BuildHooksNodes(self):
7627     """Build hooks nodes.
7628
7629     """
7630     instance = self._migrater.instance
7631     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7632     return (nl, nl + [instance.primary_node])
7633
7634
7635 class LUInstanceMove(LogicalUnit):
7636   """Move an instance by data-copying.
7637
7638   """
7639   HPATH = "instance-move"
7640   HTYPE = constants.HTYPE_INSTANCE
7641   REQ_BGL = False
7642
7643   def ExpandNames(self):
7644     self._ExpandAndLockInstance()
7645     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7646     self.op.target_node = target_node
7647     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7648     self.needed_locks[locking.LEVEL_NODE_RES] = []
7649     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7650
7651   def DeclareLocks(self, level):
7652     if level == locking.LEVEL_NODE:
7653       self._LockInstancesNodes(primary_only=True)
7654     elif level == locking.LEVEL_NODE_RES:
7655       # Copy node locks
7656       self.needed_locks[locking.LEVEL_NODE_RES] = \
7657         self.needed_locks[locking.LEVEL_NODE][:]
7658
7659   def BuildHooksEnv(self):
7660     """Build hooks env.
7661
7662     This runs on master, primary and secondary nodes of the instance.
7663
7664     """
7665     env = {
7666       "TARGET_NODE": self.op.target_node,
7667       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7668       }
7669     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7670     return env
7671
7672   def BuildHooksNodes(self):
7673     """Build hooks nodes.
7674
7675     """
7676     nl = [
7677       self.cfg.GetMasterNode(),
7678       self.instance.primary_node,
7679       self.op.target_node,
7680       ]
7681     return (nl, nl)
7682
7683   def CheckPrereq(self):
7684     """Check prerequisites.
7685
7686     This checks that the instance is in the cluster.
7687
7688     """
7689     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7690     assert self.instance is not None, \
7691       "Cannot retrieve locked instance %s" % self.op.instance_name
7692
7693     node = self.cfg.GetNodeInfo(self.op.target_node)
7694     assert node is not None, \
7695       "Cannot retrieve locked node %s" % self.op.target_node
7696
7697     self.target_node = target_node = node.name
7698
7699     if target_node == instance.primary_node:
7700       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7701                                  (instance.name, target_node),
7702                                  errors.ECODE_STATE)
7703
7704     bep = self.cfg.GetClusterInfo().FillBE(instance)
7705
7706     for idx, dsk in enumerate(instance.disks):
7707       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7708         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7709                                    " cannot copy" % idx, errors.ECODE_STATE)
7710
7711     _CheckNodeOnline(self, target_node)
7712     _CheckNodeNotDrained(self, target_node)
7713     _CheckNodeVmCapable(self, target_node)
7714     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7715                                      self.cfg.GetNodeGroup(node.group))
7716     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7717                             ignore=self.op.ignore_ipolicy)
7718
7719     if instance.admin_state == constants.ADMINST_UP:
7720       # check memory requirements on the secondary node
7721       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7722                            instance.name, bep[constants.BE_MAXMEM],
7723                            instance.hypervisor)
7724     else:
7725       self.LogInfo("Not checking memory on the secondary node as"
7726                    " instance will not be started")
7727
7728     # check bridge existance
7729     _CheckInstanceBridgesExist(self, instance, node=target_node)
7730
7731   def Exec(self, feedback_fn):
7732     """Move an instance.
7733
7734     The move is done by shutting it down on its present node, copying
7735     the data over (slow) and starting it on the new node.
7736
7737     """
7738     instance = self.instance
7739
7740     source_node = instance.primary_node
7741     target_node = self.target_node
7742
7743     self.LogInfo("Shutting down instance %s on source node %s",
7744                  instance.name, source_node)
7745
7746     assert (self.owned_locks(locking.LEVEL_NODE) ==
7747             self.owned_locks(locking.LEVEL_NODE_RES))
7748
7749     result = self.rpc.call_instance_shutdown(source_node, instance,
7750                                              self.op.shutdown_timeout)
7751     msg = result.fail_msg
7752     if msg:
7753       if self.op.ignore_consistency:
7754         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7755                              " Proceeding anyway. Please make sure node"
7756                              " %s is down. Error details: %s",
7757                              instance.name, source_node, source_node, msg)
7758       else:
7759         raise errors.OpExecError("Could not shutdown instance %s on"
7760                                  " node %s: %s" %
7761                                  (instance.name, source_node, msg))
7762
7763     # create the target disks
7764     try:
7765       _CreateDisks(self, instance, target_node=target_node)
7766     except errors.OpExecError:
7767       self.LogWarning("Device creation failed, reverting...")
7768       try:
7769         _RemoveDisks(self, instance, target_node=target_node)
7770       finally:
7771         self.cfg.ReleaseDRBDMinors(instance.name)
7772         raise
7773
7774     cluster_name = self.cfg.GetClusterInfo().cluster_name
7775
7776     errs = []
7777     # activate, get path, copy the data over
7778     for idx, disk in enumerate(instance.disks):
7779       self.LogInfo("Copying data for disk %d", idx)
7780       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7781                                                instance.name, True, idx)
7782       if result.fail_msg:
7783         self.LogWarning("Can't assemble newly created disk %d: %s",
7784                         idx, result.fail_msg)
7785         errs.append(result.fail_msg)
7786         break
7787       dev_path = result.payload
7788       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7789                                              target_node, dev_path,
7790                                              cluster_name)
7791       if result.fail_msg:
7792         self.LogWarning("Can't copy data over for disk %d: %s",
7793                         idx, result.fail_msg)
7794         errs.append(result.fail_msg)
7795         break
7796
7797     if errs:
7798       self.LogWarning("Some disks failed to copy, aborting")
7799       try:
7800         _RemoveDisks(self, instance, target_node=target_node)
7801       finally:
7802         self.cfg.ReleaseDRBDMinors(instance.name)
7803         raise errors.OpExecError("Errors during disk copy: %s" %
7804                                  (",".join(errs),))
7805
7806     instance.primary_node = target_node
7807     self.cfg.Update(instance, feedback_fn)
7808
7809     self.LogInfo("Removing the disks on the original node")
7810     _RemoveDisks(self, instance, target_node=source_node)
7811
7812     # Only start the instance if it's marked as up
7813     if instance.admin_state == constants.ADMINST_UP:
7814       self.LogInfo("Starting instance %s on node %s",
7815                    instance.name, target_node)
7816
7817       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7818                                            ignore_secondaries=True)
7819       if not disks_ok:
7820         _ShutdownInstanceDisks(self, instance)
7821         raise errors.OpExecError("Can't activate the instance's disks")
7822
7823       result = self.rpc.call_instance_start(target_node,
7824                                             (instance, None, None), False)
7825       msg = result.fail_msg
7826       if msg:
7827         _ShutdownInstanceDisks(self, instance)
7828         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7829                                  (instance.name, target_node, msg))
7830
7831
7832 class LUNodeMigrate(LogicalUnit):
7833   """Migrate all instances from a node.
7834
7835   """
7836   HPATH = "node-migrate"
7837   HTYPE = constants.HTYPE_NODE
7838   REQ_BGL = False
7839
7840   def CheckArguments(self):
7841     pass
7842
7843   def ExpandNames(self):
7844     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7845
7846     self.share_locks = _ShareAll()
7847     self.needed_locks = {
7848       locking.LEVEL_NODE: [self.op.node_name],
7849       }
7850
7851   def BuildHooksEnv(self):
7852     """Build hooks env.
7853
7854     This runs on the master, the primary and all the secondaries.
7855
7856     """
7857     return {
7858       "NODE_NAME": self.op.node_name,
7859       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7860       }
7861
7862   def BuildHooksNodes(self):
7863     """Build hooks nodes.
7864
7865     """
7866     nl = [self.cfg.GetMasterNode()]
7867     return (nl, nl)
7868
7869   def CheckPrereq(self):
7870     pass
7871
7872   def Exec(self, feedback_fn):
7873     # Prepare jobs for migration instances
7874     allow_runtime_changes = self.op.allow_runtime_changes
7875     jobs = [
7876       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7877                                  mode=self.op.mode,
7878                                  live=self.op.live,
7879                                  iallocator=self.op.iallocator,
7880                                  target_node=self.op.target_node,
7881                                  allow_runtime_changes=allow_runtime_changes,
7882                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7883       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7884       ]
7885
7886     # TODO: Run iallocator in this opcode and pass correct placement options to
7887     # OpInstanceMigrate. Since other jobs can modify the cluster between
7888     # running the iallocator and the actual migration, a good consistency model
7889     # will have to be found.
7890
7891     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7892             frozenset([self.op.node_name]))
7893
7894     return ResultWithJobs(jobs)
7895
7896
7897 class TLMigrateInstance(Tasklet):
7898   """Tasklet class for instance migration.
7899
7900   @type live: boolean
7901   @ivar live: whether the migration will be done live or non-live;
7902       this variable is initalized only after CheckPrereq has run
7903   @type cleanup: boolean
7904   @ivar cleanup: Wheater we cleanup from a failed migration
7905   @type iallocator: string
7906   @ivar iallocator: The iallocator used to determine target_node
7907   @type target_node: string
7908   @ivar target_node: If given, the target_node to reallocate the instance to
7909   @type failover: boolean
7910   @ivar failover: Whether operation results in failover or migration
7911   @type fallback: boolean
7912   @ivar fallback: Whether fallback to failover is allowed if migration not
7913                   possible
7914   @type ignore_consistency: boolean
7915   @ivar ignore_consistency: Wheter we should ignore consistency between source
7916                             and target node
7917   @type shutdown_timeout: int
7918   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7919   @type ignore_ipolicy: bool
7920   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7921
7922   """
7923
7924   # Constants
7925   _MIGRATION_POLL_INTERVAL = 1      # seconds
7926   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7927
7928   def __init__(self, lu, instance_name, cleanup=False,
7929                failover=False, fallback=False,
7930                ignore_consistency=False,
7931                allow_runtime_changes=True,
7932                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7933                ignore_ipolicy=False):
7934     """Initializes this class.
7935
7936     """
7937     Tasklet.__init__(self, lu)
7938
7939     # Parameters
7940     self.instance_name = instance_name
7941     self.cleanup = cleanup
7942     self.live = False # will be overridden later
7943     self.failover = failover
7944     self.fallback = fallback
7945     self.ignore_consistency = ignore_consistency
7946     self.shutdown_timeout = shutdown_timeout
7947     self.ignore_ipolicy = ignore_ipolicy
7948     self.allow_runtime_changes = allow_runtime_changes
7949
7950   def CheckPrereq(self):
7951     """Check prerequisites.
7952
7953     This checks that the instance is in the cluster.
7954
7955     """
7956     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7957     instance = self.cfg.GetInstanceInfo(instance_name)
7958     assert instance is not None
7959     self.instance = instance
7960     cluster = self.cfg.GetClusterInfo()
7961
7962     if (not self.cleanup and
7963         not instance.admin_state == constants.ADMINST_UP and
7964         not self.failover and self.fallback):
7965       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7966                       " switching to failover")
7967       self.failover = True
7968
7969     if instance.disk_template not in constants.DTS_MIRRORED:
7970       if self.failover:
7971         text = "failovers"
7972       else:
7973         text = "migrations"
7974       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7975                                  " %s" % (instance.disk_template, text),
7976                                  errors.ECODE_STATE)
7977
7978     if instance.disk_template in constants.DTS_EXT_MIRROR:
7979       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7980
7981       if self.lu.op.iallocator:
7982         self._RunAllocator()
7983       else:
7984         # We set set self.target_node as it is required by
7985         # BuildHooksEnv
7986         self.target_node = self.lu.op.target_node
7987
7988       # Check that the target node is correct in terms of instance policy
7989       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7990       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7991       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7992       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7993                               ignore=self.ignore_ipolicy)
7994
7995       # self.target_node is already populated, either directly or by the
7996       # iallocator run
7997       target_node = self.target_node
7998       if self.target_node == instance.primary_node:
7999         raise errors.OpPrereqError("Cannot migrate instance %s"
8000                                    " to its primary (%s)" %
8001                                    (instance.name, instance.primary_node))
8002
8003       if len(self.lu.tasklets) == 1:
8004         # It is safe to release locks only when we're the only tasklet
8005         # in the LU
8006         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8007                       keep=[instance.primary_node, self.target_node])
8008
8009     else:
8010       secondary_nodes = instance.secondary_nodes
8011       if not secondary_nodes:
8012         raise errors.ConfigurationError("No secondary node but using"
8013                                         " %s disk template" %
8014                                         instance.disk_template)
8015       target_node = secondary_nodes[0]
8016       if self.lu.op.iallocator or (self.lu.op.target_node and
8017                                    self.lu.op.target_node != target_node):
8018         if self.failover:
8019           text = "failed over"
8020         else:
8021           text = "migrated"
8022         raise errors.OpPrereqError("Instances with disk template %s cannot"
8023                                    " be %s to arbitrary nodes"
8024                                    " (neither an iallocator nor a target"
8025                                    " node can be passed)" %
8026                                    (instance.disk_template, text),
8027                                    errors.ECODE_INVAL)
8028       nodeinfo = self.cfg.GetNodeInfo(target_node)
8029       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8030       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8031       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8032                               ignore=self.ignore_ipolicy)
8033
8034     i_be = cluster.FillBE(instance)
8035
8036     # check memory requirements on the secondary node
8037     if (not self.cleanup and
8038          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8039       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8040                                                "migrating instance %s" %
8041                                                instance.name,
8042                                                i_be[constants.BE_MINMEM],
8043                                                instance.hypervisor)
8044     else:
8045       self.lu.LogInfo("Not checking memory on the secondary node as"
8046                       " instance will not be started")
8047
8048     # check if failover must be forced instead of migration
8049     if (not self.cleanup and not self.failover and
8050         i_be[constants.BE_ALWAYS_FAILOVER]):
8051       if self.fallback:
8052         self.lu.LogInfo("Instance configured to always failover; fallback"
8053                         " to failover")
8054         self.failover = True
8055       else:
8056         raise errors.OpPrereqError("This instance has been configured to"
8057                                    " always failover, please allow failover",
8058                                    errors.ECODE_STATE)
8059
8060     # check bridge existance
8061     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8062
8063     if not self.cleanup:
8064       _CheckNodeNotDrained(self.lu, target_node)
8065       if not self.failover:
8066         result = self.rpc.call_instance_migratable(instance.primary_node,
8067                                                    instance)
8068         if result.fail_msg and self.fallback:
8069           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8070                           " failover")
8071           self.failover = True
8072         else:
8073           result.Raise("Can't migrate, please use failover",
8074                        prereq=True, ecode=errors.ECODE_STATE)
8075
8076     assert not (self.failover and self.cleanup)
8077
8078     if not self.failover:
8079       if self.lu.op.live is not None and self.lu.op.mode is not None:
8080         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8081                                    " parameters are accepted",
8082                                    errors.ECODE_INVAL)
8083       if self.lu.op.live is not None:
8084         if self.lu.op.live:
8085           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8086         else:
8087           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8088         # reset the 'live' parameter to None so that repeated
8089         # invocations of CheckPrereq do not raise an exception
8090         self.lu.op.live = None
8091       elif self.lu.op.mode is None:
8092         # read the default value from the hypervisor
8093         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8094         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8095
8096       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8097     else:
8098       # Failover is never live
8099       self.live = False
8100
8101     if not (self.failover or self.cleanup):
8102       remote_info = self.rpc.call_instance_info(instance.primary_node,
8103                                                 instance.name,
8104                                                 instance.hypervisor)
8105       remote_info.Raise("Error checking instance on node %s" %
8106                         instance.primary_node)
8107       instance_running = bool(remote_info.payload)
8108       if instance_running:
8109         self.current_mem = int(remote_info.payload["memory"])
8110
8111   def _RunAllocator(self):
8112     """Run the allocator based on input opcode.
8113
8114     """
8115     # FIXME: add a self.ignore_ipolicy option
8116     ial = IAllocator(self.cfg, self.rpc,
8117                      mode=constants.IALLOCATOR_MODE_RELOC,
8118                      name=self.instance_name,
8119                      relocate_from=[self.instance.primary_node],
8120                      )
8121
8122     ial.Run(self.lu.op.iallocator)
8123
8124     if not ial.success:
8125       raise errors.OpPrereqError("Can't compute nodes using"
8126                                  " iallocator '%s': %s" %
8127                                  (self.lu.op.iallocator, ial.info),
8128                                  errors.ECODE_NORES)
8129     if len(ial.result) != ial.required_nodes:
8130       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8131                                  " of nodes (%s), required %s" %
8132                                  (self.lu.op.iallocator, len(ial.result),
8133                                   ial.required_nodes), errors.ECODE_FAULT)
8134     self.target_node = ial.result[0]
8135     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8136                  self.instance_name, self.lu.op.iallocator,
8137                  utils.CommaJoin(ial.result))
8138
8139   def _WaitUntilSync(self):
8140     """Poll with custom rpc for disk sync.
8141
8142     This uses our own step-based rpc call.
8143
8144     """
8145     self.feedback_fn("* wait until resync is done")
8146     all_done = False
8147     while not all_done:
8148       all_done = True
8149       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8150                                             self.nodes_ip,
8151                                             (self.instance.disks,
8152                                              self.instance))
8153       min_percent = 100
8154       for node, nres in result.items():
8155         nres.Raise("Cannot resync disks on node %s" % node)
8156         node_done, node_percent = nres.payload
8157         all_done = all_done and node_done
8158         if node_percent is not None:
8159           min_percent = min(min_percent, node_percent)
8160       if not all_done:
8161         if min_percent < 100:
8162           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8163         time.sleep(2)
8164
8165   def _EnsureSecondary(self, node):
8166     """Demote a node to secondary.
8167
8168     """
8169     self.feedback_fn("* switching node %s to secondary mode" % node)
8170
8171     for dev in self.instance.disks:
8172       self.cfg.SetDiskID(dev, node)
8173
8174     result = self.rpc.call_blockdev_close(node, self.instance.name,
8175                                           self.instance.disks)
8176     result.Raise("Cannot change disk to secondary on node %s" % node)
8177
8178   def _GoStandalone(self):
8179     """Disconnect from the network.
8180
8181     """
8182     self.feedback_fn("* changing into standalone mode")
8183     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8184                                                self.instance.disks)
8185     for node, nres in result.items():
8186       nres.Raise("Cannot disconnect disks node %s" % node)
8187
8188   def _GoReconnect(self, multimaster):
8189     """Reconnect to the network.
8190
8191     """
8192     if multimaster:
8193       msg = "dual-master"
8194     else:
8195       msg = "single-master"
8196     self.feedback_fn("* changing disks into %s mode" % msg)
8197     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8198                                            (self.instance.disks, self.instance),
8199                                            self.instance.name, multimaster)
8200     for node, nres in result.items():
8201       nres.Raise("Cannot change disks config on node %s" % node)
8202
8203   def _ExecCleanup(self):
8204     """Try to cleanup after a failed migration.
8205
8206     The cleanup is done by:
8207       - check that the instance is running only on one node
8208         (and update the config if needed)
8209       - change disks on its secondary node to secondary
8210       - wait until disks are fully synchronized
8211       - disconnect from the network
8212       - change disks into single-master mode
8213       - wait again until disks are fully synchronized
8214
8215     """
8216     instance = self.instance
8217     target_node = self.target_node
8218     source_node = self.source_node
8219
8220     # check running on only one node
8221     self.feedback_fn("* checking where the instance actually runs"
8222                      " (if this hangs, the hypervisor might be in"
8223                      " a bad state)")
8224     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8225     for node, result in ins_l.items():
8226       result.Raise("Can't contact node %s" % node)
8227
8228     runningon_source = instance.name in ins_l[source_node].payload
8229     runningon_target = instance.name in ins_l[target_node].payload
8230
8231     if runningon_source and runningon_target:
8232       raise errors.OpExecError("Instance seems to be running on two nodes,"
8233                                " or the hypervisor is confused; you will have"
8234                                " to ensure manually that it runs only on one"
8235                                " and restart this operation")
8236
8237     if not (runningon_source or runningon_target):
8238       raise errors.OpExecError("Instance does not seem to be running at all;"
8239                                " in this case it's safer to repair by"
8240                                " running 'gnt-instance stop' to ensure disk"
8241                                " shutdown, and then restarting it")
8242
8243     if runningon_target:
8244       # the migration has actually succeeded, we need to update the config
8245       self.feedback_fn("* instance running on secondary node (%s),"
8246                        " updating config" % target_node)
8247       instance.primary_node = target_node
8248       self.cfg.Update(instance, self.feedback_fn)
8249       demoted_node = source_node
8250     else:
8251       self.feedback_fn("* instance confirmed to be running on its"
8252                        " primary node (%s)" % source_node)
8253       demoted_node = target_node
8254
8255     if instance.disk_template in constants.DTS_INT_MIRROR:
8256       self._EnsureSecondary(demoted_node)
8257       try:
8258         self._WaitUntilSync()
8259       except errors.OpExecError:
8260         # we ignore here errors, since if the device is standalone, it
8261         # won't be able to sync
8262         pass
8263       self._GoStandalone()
8264       self._GoReconnect(False)
8265       self._WaitUntilSync()
8266
8267     self.feedback_fn("* done")
8268
8269   def _RevertDiskStatus(self):
8270     """Try to revert the disk status after a failed migration.
8271
8272     """
8273     target_node = self.target_node
8274     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8275       return
8276
8277     try:
8278       self._EnsureSecondary(target_node)
8279       self._GoStandalone()
8280       self._GoReconnect(False)
8281       self._WaitUntilSync()
8282     except errors.OpExecError, err:
8283       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8284                          " please try to recover the instance manually;"
8285                          " error '%s'" % str(err))
8286
8287   def _AbortMigration(self):
8288     """Call the hypervisor code to abort a started migration.
8289
8290     """
8291     instance = self.instance
8292     target_node = self.target_node
8293     source_node = self.source_node
8294     migration_info = self.migration_info
8295
8296     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8297                                                                  instance,
8298                                                                  migration_info,
8299                                                                  False)
8300     abort_msg = abort_result.fail_msg
8301     if abort_msg:
8302       logging.error("Aborting migration failed on target node %s: %s",
8303                     target_node, abort_msg)
8304       # Don't raise an exception here, as we stil have to try to revert the
8305       # disk status, even if this step failed.
8306
8307     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8308         instance, False, self.live)
8309     abort_msg = abort_result.fail_msg
8310     if abort_msg:
8311       logging.error("Aborting migration failed on source node %s: %s",
8312                     source_node, abort_msg)
8313
8314   def _ExecMigration(self):
8315     """Migrate an instance.
8316
8317     The migrate is done by:
8318       - change the disks into dual-master mode
8319       - wait until disks are fully synchronized again
8320       - migrate the instance
8321       - change disks on the new secondary node (the old primary) to secondary
8322       - wait until disks are fully synchronized
8323       - change disks into single-master mode
8324
8325     """
8326     instance = self.instance
8327     target_node = self.target_node
8328     source_node = self.source_node
8329
8330     # Check for hypervisor version mismatch and warn the user.
8331     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8332                                        None, [self.instance.hypervisor])
8333     for ninfo in nodeinfo.values():
8334       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8335                   ninfo.node)
8336     (_, _, (src_info, )) = nodeinfo[source_node].payload
8337     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8338
8339     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8340         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8341       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8342       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8343       if src_version != dst_version:
8344         self.feedback_fn("* warning: hypervisor version mismatch between"
8345                          " source (%s) and target (%s) node" %
8346                          (src_version, dst_version))
8347
8348     self.feedback_fn("* checking disk consistency between source and target")
8349     for (idx, dev) in enumerate(instance.disks):
8350       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8351         raise errors.OpExecError("Disk %s is degraded or not fully"
8352                                  " synchronized on target node,"
8353                                  " aborting migration" % idx)
8354
8355     if self.current_mem > self.tgt_free_mem:
8356       if not self.allow_runtime_changes:
8357         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8358                                  " free memory to fit instance %s on target"
8359                                  " node %s (have %dMB, need %dMB)" %
8360                                  (instance.name, target_node,
8361                                   self.tgt_free_mem, self.current_mem))
8362       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8363       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8364                                                      instance,
8365                                                      self.tgt_free_mem)
8366       rpcres.Raise("Cannot modify instance runtime memory")
8367
8368     # First get the migration information from the remote node
8369     result = self.rpc.call_migration_info(source_node, instance)
8370     msg = result.fail_msg
8371     if msg:
8372       log_err = ("Failed fetching source migration information from %s: %s" %
8373                  (source_node, msg))
8374       logging.error(log_err)
8375       raise errors.OpExecError(log_err)
8376
8377     self.migration_info = migration_info = result.payload
8378
8379     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8380       # Then switch the disks to master/master mode
8381       self._EnsureSecondary(target_node)
8382       self._GoStandalone()
8383       self._GoReconnect(True)
8384       self._WaitUntilSync()
8385
8386     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8387     result = self.rpc.call_accept_instance(target_node,
8388                                            instance,
8389                                            migration_info,
8390                                            self.nodes_ip[target_node])
8391
8392     msg = result.fail_msg
8393     if msg:
8394       logging.error("Instance pre-migration failed, trying to revert"
8395                     " disk status: %s", msg)
8396       self.feedback_fn("Pre-migration failed, aborting")
8397       self._AbortMigration()
8398       self._RevertDiskStatus()
8399       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8400                                (instance.name, msg))
8401
8402     self.feedback_fn("* migrating instance to %s" % target_node)
8403     result = self.rpc.call_instance_migrate(source_node, instance,
8404                                             self.nodes_ip[target_node],
8405                                             self.live)
8406     msg = result.fail_msg
8407     if msg:
8408       logging.error("Instance migration failed, trying to revert"
8409                     " disk status: %s", msg)
8410       self.feedback_fn("Migration failed, aborting")
8411       self._AbortMigration()
8412       self._RevertDiskStatus()
8413       raise errors.OpExecError("Could not migrate instance %s: %s" %
8414                                (instance.name, msg))
8415
8416     self.feedback_fn("* starting memory transfer")
8417     last_feedback = time.time()
8418     while True:
8419       result = self.rpc.call_instance_get_migration_status(source_node,
8420                                                            instance)
8421       msg = result.fail_msg
8422       ms = result.payload   # MigrationStatus instance
8423       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8424         logging.error("Instance migration failed, trying to revert"
8425                       " disk status: %s", msg)
8426         self.feedback_fn("Migration failed, aborting")
8427         self._AbortMigration()
8428         self._RevertDiskStatus()
8429         raise errors.OpExecError("Could not migrate instance %s: %s" %
8430                                  (instance.name, msg))
8431
8432       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8433         self.feedback_fn("* memory transfer complete")
8434         break
8435
8436       if (utils.TimeoutExpired(last_feedback,
8437                                self._MIGRATION_FEEDBACK_INTERVAL) and
8438           ms.transferred_ram is not None):
8439         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8440         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8441         last_feedback = time.time()
8442
8443       time.sleep(self._MIGRATION_POLL_INTERVAL)
8444
8445     result = self.rpc.call_instance_finalize_migration_src(source_node,
8446                                                            instance,
8447                                                            True,
8448                                                            self.live)
8449     msg = result.fail_msg
8450     if msg:
8451       logging.error("Instance migration succeeded, but finalization failed"
8452                     " on the source node: %s", msg)
8453       raise errors.OpExecError("Could not finalize instance migration: %s" %
8454                                msg)
8455
8456     instance.primary_node = target_node
8457
8458     # distribute new instance config to the other nodes
8459     self.cfg.Update(instance, self.feedback_fn)
8460
8461     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8462                                                            instance,
8463                                                            migration_info,
8464                                                            True)
8465     msg = result.fail_msg
8466     if msg:
8467       logging.error("Instance migration succeeded, but finalization failed"
8468                     " on the target node: %s", msg)
8469       raise errors.OpExecError("Could not finalize instance migration: %s" %
8470                                msg)
8471
8472     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8473       self._EnsureSecondary(source_node)
8474       self._WaitUntilSync()
8475       self._GoStandalone()
8476       self._GoReconnect(False)
8477       self._WaitUntilSync()
8478
8479     # If the instance's disk template is `rbd' and there was a successful
8480     # migration, unmap the device from the source node.
8481     if self.instance.disk_template == constants.DT_RBD:
8482       disks = _ExpandCheckDisks(instance, instance.disks)
8483       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8484       for disk in disks:
8485         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8486         msg = result.fail_msg
8487         if msg:
8488           logging.error("Migration was successful, but couldn't unmap the"
8489                         " block device %s on source node %s: %s",
8490                         disk.iv_name, source_node, msg)
8491           logging.error("You need to unmap the device %s manually on %s",
8492                         disk.iv_name, source_node)
8493
8494     self.feedback_fn("* done")
8495
8496   def _ExecFailover(self):
8497     """Failover an instance.
8498
8499     The failover is done by shutting it down on its present node and
8500     starting it on the secondary.
8501
8502     """
8503     instance = self.instance
8504     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8505
8506     source_node = instance.primary_node
8507     target_node = self.target_node
8508
8509     if instance.admin_state == constants.ADMINST_UP:
8510       self.feedback_fn("* checking disk consistency between source and target")
8511       for (idx, dev) in enumerate(instance.disks):
8512         # for drbd, these are drbd over lvm
8513         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8514                                      False):
8515           if primary_node.offline:
8516             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8517                              " target node %s" %
8518                              (primary_node.name, idx, target_node))
8519           elif not self.ignore_consistency:
8520             raise errors.OpExecError("Disk %s is degraded on target node,"
8521                                      " aborting failover" % idx)
8522     else:
8523       self.feedback_fn("* not checking disk consistency as instance is not"
8524                        " running")
8525
8526     self.feedback_fn("* shutting down instance on source node")
8527     logging.info("Shutting down instance %s on node %s",
8528                  instance.name, source_node)
8529
8530     result = self.rpc.call_instance_shutdown(source_node, instance,
8531                                              self.shutdown_timeout)
8532     msg = result.fail_msg
8533     if msg:
8534       if self.ignore_consistency or primary_node.offline:
8535         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8536                            " proceeding anyway; please make sure node"
8537                            " %s is down; error details: %s",
8538                            instance.name, source_node, source_node, msg)
8539       else:
8540         raise errors.OpExecError("Could not shutdown instance %s on"
8541                                  " node %s: %s" %
8542                                  (instance.name, source_node, msg))
8543
8544     self.feedback_fn("* deactivating the instance's disks on source node")
8545     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8546       raise errors.OpExecError("Can't shut down the instance's disks")
8547
8548     instance.primary_node = target_node
8549     # distribute new instance config to the other nodes
8550     self.cfg.Update(instance, self.feedback_fn)
8551
8552     # Only start the instance if it's marked as up
8553     if instance.admin_state == constants.ADMINST_UP:
8554       self.feedback_fn("* activating the instance's disks on target node %s" %
8555                        target_node)
8556       logging.info("Starting instance %s on node %s",
8557                    instance.name, target_node)
8558
8559       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8560                                            ignore_secondaries=True)
8561       if not disks_ok:
8562         _ShutdownInstanceDisks(self.lu, instance)
8563         raise errors.OpExecError("Can't activate the instance's disks")
8564
8565       self.feedback_fn("* starting the instance on the target node %s" %
8566                        target_node)
8567       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8568                                             False)
8569       msg = result.fail_msg
8570       if msg:
8571         _ShutdownInstanceDisks(self.lu, instance)
8572         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8573                                  (instance.name, target_node, msg))
8574
8575   def Exec(self, feedback_fn):
8576     """Perform the migration.
8577
8578     """
8579     self.feedback_fn = feedback_fn
8580     self.source_node = self.instance.primary_node
8581
8582     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8583     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8584       self.target_node = self.instance.secondary_nodes[0]
8585       # Otherwise self.target_node has been populated either
8586       # directly, or through an iallocator.
8587
8588     self.all_nodes = [self.source_node, self.target_node]
8589     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8590                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8591
8592     if self.failover:
8593       feedback_fn("Failover instance %s" % self.instance.name)
8594       self._ExecFailover()
8595     else:
8596       feedback_fn("Migrating instance %s" % self.instance.name)
8597
8598       if self.cleanup:
8599         return self._ExecCleanup()
8600       else:
8601         return self._ExecMigration()
8602
8603
8604 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8605                     force_open):
8606   """Wrapper around L{_CreateBlockDevInner}.
8607
8608   This method annotates the root device first.
8609
8610   """
8611   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8612   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8613                               force_open)
8614
8615
8616 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8617                          info, force_open):
8618   """Create a tree of block devices on a given node.
8619
8620   If this device type has to be created on secondaries, create it and
8621   all its children.
8622
8623   If not, just recurse to children keeping the same 'force' value.
8624
8625   @attention: The device has to be annotated already.
8626
8627   @param lu: the lu on whose behalf we execute
8628   @param node: the node on which to create the device
8629   @type instance: L{objects.Instance}
8630   @param instance: the instance which owns the device
8631   @type device: L{objects.Disk}
8632   @param device: the device to create
8633   @type force_create: boolean
8634   @param force_create: whether to force creation of this device; this
8635       will be change to True whenever we find a device which has
8636       CreateOnSecondary() attribute
8637   @param info: the extra 'metadata' we should attach to the device
8638       (this will be represented as a LVM tag)
8639   @type force_open: boolean
8640   @param force_open: this parameter will be passes to the
8641       L{backend.BlockdevCreate} function where it specifies
8642       whether we run on primary or not, and it affects both
8643       the child assembly and the device own Open() execution
8644
8645   """
8646   if device.CreateOnSecondary():
8647     force_create = True
8648
8649   if device.children:
8650     for child in device.children:
8651       _CreateBlockDevInner(lu, node, instance, child, force_create,
8652                            info, force_open)
8653
8654   if not force_create:
8655     return
8656
8657   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8658
8659
8660 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8661   """Create a single block device on a given node.
8662
8663   This will not recurse over children of the device, so they must be
8664   created in advance.
8665
8666   @param lu: the lu on whose behalf we execute
8667   @param node: the node on which to create the device
8668   @type instance: L{objects.Instance}
8669   @param instance: the instance which owns the device
8670   @type device: L{objects.Disk}
8671   @param device: the device to create
8672   @param info: the extra 'metadata' we should attach to the device
8673       (this will be represented as a LVM tag)
8674   @type force_open: boolean
8675   @param force_open: this parameter will be passes to the
8676       L{backend.BlockdevCreate} function where it specifies
8677       whether we run on primary or not, and it affects both
8678       the child assembly and the device own Open() execution
8679
8680   """
8681   lu.cfg.SetDiskID(device, node)
8682   result = lu.rpc.call_blockdev_create(node, device, device.size,
8683                                        instance.name, force_open, info)
8684   result.Raise("Can't create block device %s on"
8685                " node %s for instance %s" % (device, node, instance.name))
8686   if device.physical_id is None:
8687     device.physical_id = result.payload
8688
8689
8690 def _GenerateUniqueNames(lu, exts):
8691   """Generate a suitable LV name.
8692
8693   This will generate a logical volume name for the given instance.
8694
8695   """
8696   results = []
8697   for val in exts:
8698     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8699     results.append("%s%s" % (new_id, val))
8700   return results
8701
8702
8703 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8704                          iv_name, p_minor, s_minor):
8705   """Generate a drbd8 device complete with its children.
8706
8707   """
8708   assert len(vgnames) == len(names) == 2
8709   port = lu.cfg.AllocatePort()
8710   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8711
8712   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8713                           logical_id=(vgnames[0], names[0]),
8714                           params={})
8715   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8716                           logical_id=(vgnames[1], names[1]),
8717                           params={})
8718   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8719                           logical_id=(primary, secondary, port,
8720                                       p_minor, s_minor,
8721                                       shared_secret),
8722                           children=[dev_data, dev_meta],
8723                           iv_name=iv_name, params={})
8724   return drbd_dev
8725
8726
8727 _DISK_TEMPLATE_NAME_PREFIX = {
8728   constants.DT_PLAIN: "",
8729   constants.DT_RBD: ".rbd",
8730   }
8731
8732
8733 _DISK_TEMPLATE_DEVICE_TYPE = {
8734   constants.DT_PLAIN: constants.LD_LV,
8735   constants.DT_FILE: constants.LD_FILE,
8736   constants.DT_SHARED_FILE: constants.LD_FILE,
8737   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8738   constants.DT_RBD: constants.LD_RBD,
8739   }
8740
8741
8742 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8743     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8744     feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8745     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8746   """Generate the entire disk layout for a given template type.
8747
8748   """
8749   #TODO: compute space requirements
8750
8751   vgname = lu.cfg.GetVGName()
8752   disk_count = len(disk_info)
8753   disks = []
8754
8755   if template_name == constants.DT_DISKLESS:
8756     pass
8757   elif template_name == constants.DT_DRBD8:
8758     if len(secondary_nodes) != 1:
8759       raise errors.ProgrammerError("Wrong template configuration")
8760     remote_node = secondary_nodes[0]
8761     minors = lu.cfg.AllocateDRBDMinor(
8762       [primary_node, remote_node] * len(disk_info), instance_name)
8763
8764     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8765                                                        full_disk_params)
8766     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8767
8768     names = []
8769     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8770                                                for i in range(disk_count)]):
8771       names.append(lv_prefix + "_data")
8772       names.append(lv_prefix + "_meta")
8773     for idx, disk in enumerate(disk_info):
8774       disk_index = idx + base_index
8775       data_vg = disk.get(constants.IDISK_VG, vgname)
8776       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8777       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8778                                       disk[constants.IDISK_SIZE],
8779                                       [data_vg, meta_vg],
8780                                       names[idx * 2:idx * 2 + 2],
8781                                       "disk/%d" % disk_index,
8782                                       minors[idx * 2], minors[idx * 2 + 1])
8783       disk_dev.mode = disk[constants.IDISK_MODE]
8784       disks.append(disk_dev)
8785   else:
8786     if secondary_nodes:
8787       raise errors.ProgrammerError("Wrong template configuration")
8788
8789     if template_name == constants.DT_FILE:
8790       _req_file_storage()
8791     elif template_name == constants.DT_SHARED_FILE:
8792       _req_shr_file_storage()
8793
8794     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8795     if name_prefix is None:
8796       names = None
8797     else:
8798       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8799                                         (name_prefix, base_index + i)
8800                                         for i in range(disk_count)])
8801
8802     if template_name == constants.DT_PLAIN:
8803       def logical_id_fn(idx, _, disk):
8804         vg = disk.get(constants.IDISK_VG, vgname)
8805         return (vg, names[idx])
8806     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8807       logical_id_fn = \
8808         lambda _, disk_index, disk: (file_driver,
8809                                      "%s/disk%d" % (file_storage_dir,
8810                                                     disk_index))
8811     elif template_name == constants.DT_BLOCK:
8812       logical_id_fn = \
8813         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8814                                        disk[constants.IDISK_ADOPT])
8815     elif template_name == constants.DT_RBD:
8816       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8817     else:
8818       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8819
8820     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8821
8822     for idx, disk in enumerate(disk_info):
8823       disk_index = idx + base_index
8824       size = disk[constants.IDISK_SIZE]
8825       feedback_fn("* disk %s, size %s" %
8826                   (disk_index, utils.FormatUnit(size, "h")))
8827       disks.append(objects.Disk(dev_type=dev_type, size=size,
8828                                 logical_id=logical_id_fn(idx, disk_index, disk),
8829                                 iv_name="disk/%d" % disk_index,
8830                                 mode=disk[constants.IDISK_MODE],
8831                                 params={}))
8832
8833   return disks
8834
8835
8836 def _GetInstanceInfoText(instance):
8837   """Compute that text that should be added to the disk's metadata.
8838
8839   """
8840   return "originstname+%s" % instance.name
8841
8842
8843 def _CalcEta(time_taken, written, total_size):
8844   """Calculates the ETA based on size written and total size.
8845
8846   @param time_taken: The time taken so far
8847   @param written: amount written so far
8848   @param total_size: The total size of data to be written
8849   @return: The remaining time in seconds
8850
8851   """
8852   avg_time = time_taken / float(written)
8853   return (total_size - written) * avg_time
8854
8855
8856 def _WipeDisks(lu, instance):
8857   """Wipes instance disks.
8858
8859   @type lu: L{LogicalUnit}
8860   @param lu: the logical unit on whose behalf we execute
8861   @type instance: L{objects.Instance}
8862   @param instance: the instance whose disks we should create
8863   @return: the success of the wipe
8864
8865   """
8866   node = instance.primary_node
8867
8868   for device in instance.disks:
8869     lu.cfg.SetDiskID(device, node)
8870
8871   logging.info("Pause sync of instance %s disks", instance.name)
8872   result = lu.rpc.call_blockdev_pause_resume_sync(node,
8873                                                   (instance.disks, instance),
8874                                                   True)
8875
8876   for idx, success in enumerate(result.payload):
8877     if not success:
8878       logging.warn("pause-sync of instance %s for disks %d failed",
8879                    instance.name, idx)
8880
8881   try:
8882     for idx, device in enumerate(instance.disks):
8883       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8884       # MAX_WIPE_CHUNK at max
8885       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8886                             constants.MIN_WIPE_CHUNK_PERCENT)
8887       # we _must_ make this an int, otherwise rounding errors will
8888       # occur
8889       wipe_chunk_size = int(wipe_chunk_size)
8890
8891       lu.LogInfo("* Wiping disk %d", idx)
8892       logging.info("Wiping disk %d for instance %s, node %s using"
8893                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8894
8895       offset = 0
8896       size = device.size
8897       last_output = 0
8898       start_time = time.time()
8899
8900       while offset < size:
8901         wipe_size = min(wipe_chunk_size, size - offset)
8902         logging.debug("Wiping disk %d, offset %s, chunk %s",
8903                       idx, offset, wipe_size)
8904         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8905                                            wipe_size)
8906         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8907                      (idx, offset, wipe_size))
8908         now = time.time()
8909         offset += wipe_size
8910         if now - last_output >= 60:
8911           eta = _CalcEta(now - start_time, offset, size)
8912           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8913                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8914           last_output = now
8915   finally:
8916     logging.info("Resume sync of instance %s disks", instance.name)
8917
8918     result = lu.rpc.call_blockdev_pause_resume_sync(node,
8919                                                     (instance.disks, instance),
8920                                                     False)
8921
8922     for idx, success in enumerate(result.payload):
8923       if not success:
8924         lu.LogWarning("Resume sync of disk %d failed, please have a"
8925                       " look at the status and troubleshoot the issue", idx)
8926         logging.warn("resume-sync of instance %s for disks %d failed",
8927                      instance.name, idx)
8928
8929
8930 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8931   """Create all disks for an instance.
8932
8933   This abstracts away some work from AddInstance.
8934
8935   @type lu: L{LogicalUnit}
8936   @param lu: the logical unit on whose behalf we execute
8937   @type instance: L{objects.Instance}
8938   @param instance: the instance whose disks we should create
8939   @type to_skip: list
8940   @param to_skip: list of indices to skip
8941   @type target_node: string
8942   @param target_node: if passed, overrides the target node for creation
8943   @rtype: boolean
8944   @return: the success of the creation
8945
8946   """
8947   info = _GetInstanceInfoText(instance)
8948   if target_node is None:
8949     pnode = instance.primary_node
8950     all_nodes = instance.all_nodes
8951   else:
8952     pnode = target_node
8953     all_nodes = [pnode]
8954
8955   if instance.disk_template in constants.DTS_FILEBASED:
8956     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8957     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8958
8959     result.Raise("Failed to create directory '%s' on"
8960                  " node %s" % (file_storage_dir, pnode))
8961
8962   # Note: this needs to be kept in sync with adding of disks in
8963   # LUInstanceSetParams
8964   for idx, device in enumerate(instance.disks):
8965     if to_skip and idx in to_skip:
8966       continue
8967     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8968     #HARDCODE
8969     for node in all_nodes:
8970       f_create = node == pnode
8971       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8972
8973
8974 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8975   """Remove all disks for an instance.
8976
8977   This abstracts away some work from `AddInstance()` and
8978   `RemoveInstance()`. Note that in case some of the devices couldn't
8979   be removed, the removal will continue with the other ones (compare
8980   with `_CreateDisks()`).
8981
8982   @type lu: L{LogicalUnit}
8983   @param lu: the logical unit on whose behalf we execute
8984   @type instance: L{objects.Instance}
8985   @param instance: the instance whose disks we should remove
8986   @type target_node: string
8987   @param target_node: used to override the node on which to remove the disks
8988   @rtype: boolean
8989   @return: the success of the removal
8990
8991   """
8992   logging.info("Removing block devices for instance %s", instance.name)
8993
8994   all_result = True
8995   ports_to_release = set()
8996   for (idx, device) in enumerate(instance.disks):
8997     if target_node:
8998       edata = [(target_node, device)]
8999     else:
9000       edata = device.ComputeNodeTree(instance.primary_node)
9001     for node, disk in edata:
9002       lu.cfg.SetDiskID(disk, node)
9003       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
9004       if msg:
9005         lu.LogWarning("Could not remove disk %s on node %s,"
9006                       " continuing anyway: %s", idx, node, msg)
9007         all_result = False
9008
9009     # if this is a DRBD disk, return its port to the pool
9010     if device.dev_type in constants.LDS_DRBD:
9011       ports_to_release.add(device.logical_id[2])
9012
9013   if all_result or ignore_failures:
9014     for port in ports_to_release:
9015       lu.cfg.AddTcpUdpPort(port)
9016
9017   if instance.disk_template == constants.DT_FILE:
9018     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9019     if target_node:
9020       tgt = target_node
9021     else:
9022       tgt = instance.primary_node
9023     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9024     if result.fail_msg:
9025       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9026                     file_storage_dir, instance.primary_node, result.fail_msg)
9027       all_result = False
9028
9029   return all_result
9030
9031
9032 def _ComputeDiskSizePerVG(disk_template, disks):
9033   """Compute disk size requirements in the volume group
9034
9035   """
9036   def _compute(disks, payload):
9037     """Universal algorithm.
9038
9039     """
9040     vgs = {}
9041     for disk in disks:
9042       vgs[disk[constants.IDISK_VG]] = \
9043         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9044
9045     return vgs
9046
9047   # Required free disk space as a function of disk and swap space
9048   req_size_dict = {
9049     constants.DT_DISKLESS: {},
9050     constants.DT_PLAIN: _compute(disks, 0),
9051     # 128 MB are added for drbd metadata for each disk
9052     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9053     constants.DT_FILE: {},
9054     constants.DT_SHARED_FILE: {},
9055   }
9056
9057   if disk_template not in req_size_dict:
9058     raise errors.ProgrammerError("Disk template '%s' size requirement"
9059                                  " is unknown" % disk_template)
9060
9061   return req_size_dict[disk_template]
9062
9063
9064 def _ComputeDiskSize(disk_template, disks):
9065   """Compute disk size requirements in the volume group
9066
9067   """
9068   # Required free disk space as a function of disk and swap space
9069   req_size_dict = {
9070     constants.DT_DISKLESS: None,
9071     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9072     # 128 MB are added for drbd metadata for each disk
9073     constants.DT_DRBD8:
9074       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9075     constants.DT_FILE: None,
9076     constants.DT_SHARED_FILE: 0,
9077     constants.DT_BLOCK: 0,
9078     constants.DT_RBD: 0,
9079   }
9080
9081   if disk_template not in req_size_dict:
9082     raise errors.ProgrammerError("Disk template '%s' size requirement"
9083                                  " is unknown" % disk_template)
9084
9085   return req_size_dict[disk_template]
9086
9087
9088 def _FilterVmNodes(lu, nodenames):
9089   """Filters out non-vm_capable nodes from a list.
9090
9091   @type lu: L{LogicalUnit}
9092   @param lu: the logical unit for which we check
9093   @type nodenames: list
9094   @param nodenames: the list of nodes on which we should check
9095   @rtype: list
9096   @return: the list of vm-capable nodes
9097
9098   """
9099   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9100   return [name for name in nodenames if name not in vm_nodes]
9101
9102
9103 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9104   """Hypervisor parameter validation.
9105
9106   This function abstract the hypervisor parameter validation to be
9107   used in both instance create and instance modify.
9108
9109   @type lu: L{LogicalUnit}
9110   @param lu: the logical unit for which we check
9111   @type nodenames: list
9112   @param nodenames: the list of nodes on which we should check
9113   @type hvname: string
9114   @param hvname: the name of the hypervisor we should use
9115   @type hvparams: dict
9116   @param hvparams: the parameters which we need to check
9117   @raise errors.OpPrereqError: if the parameters are not valid
9118
9119   """
9120   nodenames = _FilterVmNodes(lu, nodenames)
9121
9122   cluster = lu.cfg.GetClusterInfo()
9123   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9124
9125   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9126   for node in nodenames:
9127     info = hvinfo[node]
9128     if info.offline:
9129       continue
9130     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9131
9132
9133 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9134   """OS parameters validation.
9135
9136   @type lu: L{LogicalUnit}
9137   @param lu: the logical unit for which we check
9138   @type required: boolean
9139   @param required: whether the validation should fail if the OS is not
9140       found
9141   @type nodenames: list
9142   @param nodenames: the list of nodes on which we should check
9143   @type osname: string
9144   @param osname: the name of the hypervisor we should use
9145   @type osparams: dict
9146   @param osparams: the parameters which we need to check
9147   @raise errors.OpPrereqError: if the parameters are not valid
9148
9149   """
9150   nodenames = _FilterVmNodes(lu, nodenames)
9151   result = lu.rpc.call_os_validate(nodenames, required, osname,
9152                                    [constants.OS_VALIDATE_PARAMETERS],
9153                                    osparams)
9154   for node, nres in result.items():
9155     # we don't check for offline cases since this should be run only
9156     # against the master node and/or an instance's nodes
9157     nres.Raise("OS Parameters validation failed on node %s" % node)
9158     if not nres.payload:
9159       lu.LogInfo("OS %s not found on node %s, validation skipped",
9160                  osname, node)
9161
9162
9163 class LUInstanceCreate(LogicalUnit):
9164   """Create an instance.
9165
9166   """
9167   HPATH = "instance-add"
9168   HTYPE = constants.HTYPE_INSTANCE
9169   REQ_BGL = False
9170
9171   def CheckArguments(self):
9172     """Check arguments.
9173
9174     """
9175     # do not require name_check to ease forward/backward compatibility
9176     # for tools
9177     if self.op.no_install and self.op.start:
9178       self.LogInfo("No-installation mode selected, disabling startup")
9179       self.op.start = False
9180     # validate/normalize the instance name
9181     self.op.instance_name = \
9182       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9183
9184     if self.op.ip_check and not self.op.name_check:
9185       # TODO: make the ip check more flexible and not depend on the name check
9186       raise errors.OpPrereqError("Cannot do IP address check without a name"
9187                                  " check", errors.ECODE_INVAL)
9188
9189     # check nics' parameter names
9190     for nic in self.op.nics:
9191       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9192
9193     # check disks. parameter names and consistent adopt/no-adopt strategy
9194     has_adopt = has_no_adopt = False
9195     for disk in self.op.disks:
9196       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9197       if constants.IDISK_ADOPT in disk:
9198         has_adopt = True
9199       else:
9200         has_no_adopt = True
9201     if has_adopt and has_no_adopt:
9202       raise errors.OpPrereqError("Either all disks are adopted or none is",
9203                                  errors.ECODE_INVAL)
9204     if has_adopt:
9205       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9206         raise errors.OpPrereqError("Disk adoption is not supported for the"
9207                                    " '%s' disk template" %
9208                                    self.op.disk_template,
9209                                    errors.ECODE_INVAL)
9210       if self.op.iallocator is not None:
9211         raise errors.OpPrereqError("Disk adoption not allowed with an"
9212                                    " iallocator script", errors.ECODE_INVAL)
9213       if self.op.mode == constants.INSTANCE_IMPORT:
9214         raise errors.OpPrereqError("Disk adoption not allowed for"
9215                                    " instance import", errors.ECODE_INVAL)
9216     else:
9217       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9218         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9219                                    " but no 'adopt' parameter given" %
9220                                    self.op.disk_template,
9221                                    errors.ECODE_INVAL)
9222
9223     self.adopt_disks = has_adopt
9224
9225     # instance name verification
9226     if self.op.name_check:
9227       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9228       self.op.instance_name = self.hostname1.name
9229       # used in CheckPrereq for ip ping check
9230       self.check_ip = self.hostname1.ip
9231     else:
9232       self.check_ip = None
9233
9234     # file storage checks
9235     if (self.op.file_driver and
9236         not self.op.file_driver in constants.FILE_DRIVER):
9237       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9238                                  self.op.file_driver, errors.ECODE_INVAL)
9239
9240     if self.op.disk_template == constants.DT_FILE:
9241       opcodes.RequireFileStorage()
9242     elif self.op.disk_template == constants.DT_SHARED_FILE:
9243       opcodes.RequireSharedFileStorage()
9244
9245     ### Node/iallocator related checks
9246     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9247
9248     if self.op.pnode is not None:
9249       if self.op.disk_template in constants.DTS_INT_MIRROR:
9250         if self.op.snode is None:
9251           raise errors.OpPrereqError("The networked disk templates need"
9252                                      " a mirror node", errors.ECODE_INVAL)
9253       elif self.op.snode:
9254         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9255                         " template")
9256         self.op.snode = None
9257
9258     self._cds = _GetClusterDomainSecret()
9259
9260     if self.op.mode == constants.INSTANCE_IMPORT:
9261       # On import force_variant must be True, because if we forced it at
9262       # initial install, our only chance when importing it back is that it
9263       # works again!
9264       self.op.force_variant = True
9265
9266       if self.op.no_install:
9267         self.LogInfo("No-installation mode has no effect during import")
9268
9269     elif self.op.mode == constants.INSTANCE_CREATE:
9270       if self.op.os_type is None:
9271         raise errors.OpPrereqError("No guest OS specified",
9272                                    errors.ECODE_INVAL)
9273       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9274         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9275                                    " installation" % self.op.os_type,
9276                                    errors.ECODE_STATE)
9277       if self.op.disk_template is None:
9278         raise errors.OpPrereqError("No disk template specified",
9279                                    errors.ECODE_INVAL)
9280
9281     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9282       # Check handshake to ensure both clusters have the same domain secret
9283       src_handshake = self.op.source_handshake
9284       if not src_handshake:
9285         raise errors.OpPrereqError("Missing source handshake",
9286                                    errors.ECODE_INVAL)
9287
9288       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9289                                                            src_handshake)
9290       if errmsg:
9291         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9292                                    errors.ECODE_INVAL)
9293
9294       # Load and check source CA
9295       self.source_x509_ca_pem = self.op.source_x509_ca
9296       if not self.source_x509_ca_pem:
9297         raise errors.OpPrereqError("Missing source X509 CA",
9298                                    errors.ECODE_INVAL)
9299
9300       try:
9301         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9302                                                     self._cds)
9303       except OpenSSL.crypto.Error, err:
9304         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9305                                    (err, ), errors.ECODE_INVAL)
9306
9307       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9308       if errcode is not None:
9309         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9310                                    errors.ECODE_INVAL)
9311
9312       self.source_x509_ca = cert
9313
9314       src_instance_name = self.op.source_instance_name
9315       if not src_instance_name:
9316         raise errors.OpPrereqError("Missing source instance name",
9317                                    errors.ECODE_INVAL)
9318
9319       self.source_instance_name = \
9320           netutils.GetHostname(name=src_instance_name).name
9321
9322     else:
9323       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9324                                  self.op.mode, errors.ECODE_INVAL)
9325
9326   def ExpandNames(self):
9327     """ExpandNames for CreateInstance.
9328
9329     Figure out the right locks for instance creation.
9330
9331     """
9332     self.needed_locks = {}
9333
9334     instance_name = self.op.instance_name
9335     # this is just a preventive check, but someone might still add this
9336     # instance in the meantime, and creation will fail at lock-add time
9337     if instance_name in self.cfg.GetInstanceList():
9338       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9339                                  instance_name, errors.ECODE_EXISTS)
9340
9341     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9342
9343     if self.op.iallocator:
9344       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9345       # specifying a group on instance creation and then selecting nodes from
9346       # that group
9347       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9348       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9349     else:
9350       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9351       nodelist = [self.op.pnode]
9352       if self.op.snode is not None:
9353         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9354         nodelist.append(self.op.snode)
9355       self.needed_locks[locking.LEVEL_NODE] = nodelist
9356       # Lock resources of instance's primary and secondary nodes (copy to
9357       # prevent accidential modification)
9358       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9359
9360     # in case of import lock the source node too
9361     if self.op.mode == constants.INSTANCE_IMPORT:
9362       src_node = self.op.src_node
9363       src_path = self.op.src_path
9364
9365       if src_path is None:
9366         self.op.src_path = src_path = self.op.instance_name
9367
9368       if src_node is None:
9369         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9370         self.op.src_node = None
9371         if os.path.isabs(src_path):
9372           raise errors.OpPrereqError("Importing an instance from a path"
9373                                      " requires a source node option",
9374                                      errors.ECODE_INVAL)
9375       else:
9376         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9377         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9378           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9379         if not os.path.isabs(src_path):
9380           self.op.src_path = src_path = \
9381             utils.PathJoin(constants.EXPORT_DIR, src_path)
9382
9383   def _RunAllocator(self):
9384     """Run the allocator based on input opcode.
9385
9386     """
9387     nics = [n.ToDict() for n in self.nics]
9388     ial = IAllocator(self.cfg, self.rpc,
9389                      mode=constants.IALLOCATOR_MODE_ALLOC,
9390                      name=self.op.instance_name,
9391                      disk_template=self.op.disk_template,
9392                      tags=self.op.tags,
9393                      os=self.op.os_type,
9394                      vcpus=self.be_full[constants.BE_VCPUS],
9395                      memory=self.be_full[constants.BE_MAXMEM],
9396                      spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9397                      disks=self.disks,
9398                      nics=nics,
9399                      hypervisor=self.op.hypervisor,
9400                      )
9401
9402     ial.Run(self.op.iallocator)
9403
9404     if not ial.success:
9405       raise errors.OpPrereqError("Can't compute nodes using"
9406                                  " iallocator '%s': %s" %
9407                                  (self.op.iallocator, ial.info),
9408                                  errors.ECODE_NORES)
9409     if len(ial.result) != ial.required_nodes:
9410       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9411                                  " of nodes (%s), required %s" %
9412                                  (self.op.iallocator, len(ial.result),
9413                                   ial.required_nodes), errors.ECODE_FAULT)
9414     self.op.pnode = ial.result[0]
9415     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9416                  self.op.instance_name, self.op.iallocator,
9417                  utils.CommaJoin(ial.result))
9418     if ial.required_nodes == 2:
9419       self.op.snode = ial.result[1]
9420
9421   def BuildHooksEnv(self):
9422     """Build hooks env.
9423
9424     This runs on master, primary and secondary nodes of the instance.
9425
9426     """
9427     env = {
9428       "ADD_MODE": self.op.mode,
9429       }
9430     if self.op.mode == constants.INSTANCE_IMPORT:
9431       env["SRC_NODE"] = self.op.src_node
9432       env["SRC_PATH"] = self.op.src_path
9433       env["SRC_IMAGES"] = self.src_images
9434
9435     env.update(_BuildInstanceHookEnv(
9436       name=self.op.instance_name,
9437       primary_node=self.op.pnode,
9438       secondary_nodes=self.secondaries,
9439       status=self.op.start,
9440       os_type=self.op.os_type,
9441       minmem=self.be_full[constants.BE_MINMEM],
9442       maxmem=self.be_full[constants.BE_MAXMEM],
9443       vcpus=self.be_full[constants.BE_VCPUS],
9444       nics=_NICListToTuple(self, self.nics),
9445       disk_template=self.op.disk_template,
9446       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9447              for d in self.disks],
9448       bep=self.be_full,
9449       hvp=self.hv_full,
9450       hypervisor_name=self.op.hypervisor,
9451       tags=self.op.tags,
9452     ))
9453
9454     return env
9455
9456   def BuildHooksNodes(self):
9457     """Build hooks nodes.
9458
9459     """
9460     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9461     return nl, nl
9462
9463   def _ReadExportInfo(self):
9464     """Reads the export information from disk.
9465
9466     It will override the opcode source node and path with the actual
9467     information, if these two were not specified before.
9468
9469     @return: the export information
9470
9471     """
9472     assert self.op.mode == constants.INSTANCE_IMPORT
9473
9474     src_node = self.op.src_node
9475     src_path = self.op.src_path
9476
9477     if src_node is None:
9478       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9479       exp_list = self.rpc.call_export_list(locked_nodes)
9480       found = False
9481       for node in exp_list:
9482         if exp_list[node].fail_msg:
9483           continue
9484         if src_path in exp_list[node].payload:
9485           found = True
9486           self.op.src_node = src_node = node
9487           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9488                                                        src_path)
9489           break
9490       if not found:
9491         raise errors.OpPrereqError("No export found for relative path %s" %
9492                                     src_path, errors.ECODE_INVAL)
9493
9494     _CheckNodeOnline(self, src_node)
9495     result = self.rpc.call_export_info(src_node, src_path)
9496     result.Raise("No export or invalid export found in dir %s" % src_path)
9497
9498     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9499     if not export_info.has_section(constants.INISECT_EXP):
9500       raise errors.ProgrammerError("Corrupted export config",
9501                                    errors.ECODE_ENVIRON)
9502
9503     ei_version = export_info.get(constants.INISECT_EXP, "version")
9504     if (int(ei_version) != constants.EXPORT_VERSION):
9505       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9506                                  (ei_version, constants.EXPORT_VERSION),
9507                                  errors.ECODE_ENVIRON)
9508     return export_info
9509
9510   def _ReadExportParams(self, einfo):
9511     """Use export parameters as defaults.
9512
9513     In case the opcode doesn't specify (as in override) some instance
9514     parameters, then try to use them from the export information, if
9515     that declares them.
9516
9517     """
9518     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9519
9520     if self.op.disk_template is None:
9521       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9522         self.op.disk_template = einfo.get(constants.INISECT_INS,
9523                                           "disk_template")
9524         if self.op.disk_template not in constants.DISK_TEMPLATES:
9525           raise errors.OpPrereqError("Disk template specified in configuration"
9526                                      " file is not one of the allowed values:"
9527                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9528       else:
9529         raise errors.OpPrereqError("No disk template specified and the export"
9530                                    " is missing the disk_template information",
9531                                    errors.ECODE_INVAL)
9532
9533     if not self.op.disks:
9534       disks = []
9535       # TODO: import the disk iv_name too
9536       for idx in range(constants.MAX_DISKS):
9537         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9538           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9539           disks.append({constants.IDISK_SIZE: disk_sz})
9540       self.op.disks = disks
9541       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9542         raise errors.OpPrereqError("No disk info specified and the export"
9543                                    " is missing the disk information",
9544                                    errors.ECODE_INVAL)
9545
9546     if not self.op.nics:
9547       nics = []
9548       for idx in range(constants.MAX_NICS):
9549         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9550           ndict = {}
9551           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9552             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9553             ndict[name] = v
9554           nics.append(ndict)
9555         else:
9556           break
9557       self.op.nics = nics
9558
9559     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9560       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9561
9562     if (self.op.hypervisor is None and
9563         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9564       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9565
9566     if einfo.has_section(constants.INISECT_HYP):
9567       # use the export parameters but do not override the ones
9568       # specified by the user
9569       for name, value in einfo.items(constants.INISECT_HYP):
9570         if name not in self.op.hvparams:
9571           self.op.hvparams[name] = value
9572
9573     if einfo.has_section(constants.INISECT_BEP):
9574       # use the parameters, without overriding
9575       for name, value in einfo.items(constants.INISECT_BEP):
9576         if name not in self.op.beparams:
9577           self.op.beparams[name] = value
9578         # Compatibility for the old "memory" be param
9579         if name == constants.BE_MEMORY:
9580           if constants.BE_MAXMEM not in self.op.beparams:
9581             self.op.beparams[constants.BE_MAXMEM] = value
9582           if constants.BE_MINMEM not in self.op.beparams:
9583             self.op.beparams[constants.BE_MINMEM] = value
9584     else:
9585       # try to read the parameters old style, from the main section
9586       for name in constants.BES_PARAMETERS:
9587         if (name not in self.op.beparams and
9588             einfo.has_option(constants.INISECT_INS, name)):
9589           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9590
9591     if einfo.has_section(constants.INISECT_OSP):
9592       # use the parameters, without overriding
9593       for name, value in einfo.items(constants.INISECT_OSP):
9594         if name not in self.op.osparams:
9595           self.op.osparams[name] = value
9596
9597   def _RevertToDefaults(self, cluster):
9598     """Revert the instance parameters to the default values.
9599
9600     """
9601     # hvparams
9602     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9603     for name in self.op.hvparams.keys():
9604       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9605         del self.op.hvparams[name]
9606     # beparams
9607     be_defs = cluster.SimpleFillBE({})
9608     for name in self.op.beparams.keys():
9609       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9610         del self.op.beparams[name]
9611     # nic params
9612     nic_defs = cluster.SimpleFillNIC({})
9613     for nic in self.op.nics:
9614       for name in constants.NICS_PARAMETERS:
9615         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9616           del nic[name]
9617     # osparams
9618     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9619     for name in self.op.osparams.keys():
9620       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9621         del self.op.osparams[name]
9622
9623   def _CalculateFileStorageDir(self):
9624     """Calculate final instance file storage dir.
9625
9626     """
9627     # file storage dir calculation/check
9628     self.instance_file_storage_dir = None
9629     if self.op.disk_template in constants.DTS_FILEBASED:
9630       # build the full file storage dir path
9631       joinargs = []
9632
9633       if self.op.disk_template == constants.DT_SHARED_FILE:
9634         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9635       else:
9636         get_fsd_fn = self.cfg.GetFileStorageDir
9637
9638       cfg_storagedir = get_fsd_fn()
9639       if not cfg_storagedir:
9640         raise errors.OpPrereqError("Cluster file storage dir not defined")
9641       joinargs.append(cfg_storagedir)
9642
9643       if self.op.file_storage_dir is not None:
9644         joinargs.append(self.op.file_storage_dir)
9645
9646       joinargs.append(self.op.instance_name)
9647
9648       # pylint: disable=W0142
9649       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9650
9651   def CheckPrereq(self): # pylint: disable=R0914
9652     """Check prerequisites.
9653
9654     """
9655     self._CalculateFileStorageDir()
9656
9657     if self.op.mode == constants.INSTANCE_IMPORT:
9658       export_info = self._ReadExportInfo()
9659       self._ReadExportParams(export_info)
9660
9661     if (not self.cfg.GetVGName() and
9662         self.op.disk_template not in constants.DTS_NOT_LVM):
9663       raise errors.OpPrereqError("Cluster does not support lvm-based"
9664                                  " instances", errors.ECODE_STATE)
9665
9666     if (self.op.hypervisor is None or
9667         self.op.hypervisor == constants.VALUE_AUTO):
9668       self.op.hypervisor = self.cfg.GetHypervisorType()
9669
9670     cluster = self.cfg.GetClusterInfo()
9671     enabled_hvs = cluster.enabled_hypervisors
9672     if self.op.hypervisor not in enabled_hvs:
9673       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9674                                  " cluster (%s)" % (self.op.hypervisor,
9675                                   ",".join(enabled_hvs)),
9676                                  errors.ECODE_STATE)
9677
9678     # Check tag validity
9679     for tag in self.op.tags:
9680       objects.TaggableObject.ValidateTag(tag)
9681
9682     # check hypervisor parameter syntax (locally)
9683     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9684     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9685                                       self.op.hvparams)
9686     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9687     hv_type.CheckParameterSyntax(filled_hvp)
9688     self.hv_full = filled_hvp
9689     # check that we don't specify global parameters on an instance
9690     _CheckGlobalHvParams(self.op.hvparams)
9691
9692     # fill and remember the beparams dict
9693     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9694     for param, value in self.op.beparams.iteritems():
9695       if value == constants.VALUE_AUTO:
9696         self.op.beparams[param] = default_beparams[param]
9697     objects.UpgradeBeParams(self.op.beparams)
9698     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9699     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9700
9701     # build os parameters
9702     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9703
9704     # now that hvp/bep are in final format, let's reset to defaults,
9705     # if told to do so
9706     if self.op.identify_defaults:
9707       self._RevertToDefaults(cluster)
9708
9709     # NIC buildup
9710     self.nics = []
9711     for idx, nic in enumerate(self.op.nics):
9712       nic_mode_req = nic.get(constants.INIC_MODE, None)
9713       nic_mode = nic_mode_req
9714       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9715         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9716
9717       # in routed mode, for the first nic, the default ip is 'auto'
9718       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9719         default_ip_mode = constants.VALUE_AUTO
9720       else:
9721         default_ip_mode = constants.VALUE_NONE
9722
9723       # ip validity checks
9724       ip = nic.get(constants.INIC_IP, default_ip_mode)
9725       if ip is None or ip.lower() == constants.VALUE_NONE:
9726         nic_ip = None
9727       elif ip.lower() == constants.VALUE_AUTO:
9728         if not self.op.name_check:
9729           raise errors.OpPrereqError("IP address set to auto but name checks"
9730                                      " have been skipped",
9731                                      errors.ECODE_INVAL)
9732         nic_ip = self.hostname1.ip
9733       else:
9734         if not netutils.IPAddress.IsValid(ip):
9735           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9736                                      errors.ECODE_INVAL)
9737         nic_ip = ip
9738
9739       # TODO: check the ip address for uniqueness
9740       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9741         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9742                                    errors.ECODE_INVAL)
9743
9744       # MAC address verification
9745       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9746       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9747         mac = utils.NormalizeAndValidateMac(mac)
9748
9749         try:
9750           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9751         except errors.ReservationError:
9752           raise errors.OpPrereqError("MAC address %s already in use"
9753                                      " in cluster" % mac,
9754                                      errors.ECODE_NOTUNIQUE)
9755
9756       #  Build nic parameters
9757       link = nic.get(constants.INIC_LINK, None)
9758       if link == constants.VALUE_AUTO:
9759         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9760       nicparams = {}
9761       if nic_mode_req:
9762         nicparams[constants.NIC_MODE] = nic_mode
9763       if link:
9764         nicparams[constants.NIC_LINK] = link
9765
9766       check_params = cluster.SimpleFillNIC(nicparams)
9767       objects.NIC.CheckParameterSyntax(check_params)
9768       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9769
9770     # disk checks/pre-build
9771     default_vg = self.cfg.GetVGName()
9772     self.disks = []
9773     for disk in self.op.disks:
9774       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9775       if mode not in constants.DISK_ACCESS_SET:
9776         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9777                                    mode, errors.ECODE_INVAL)
9778       size = disk.get(constants.IDISK_SIZE, None)
9779       if size is None:
9780         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9781       try:
9782         size = int(size)
9783       except (TypeError, ValueError):
9784         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9785                                    errors.ECODE_INVAL)
9786
9787       data_vg = disk.get(constants.IDISK_VG, default_vg)
9788       new_disk = {
9789         constants.IDISK_SIZE: size,
9790         constants.IDISK_MODE: mode,
9791         constants.IDISK_VG: data_vg,
9792         }
9793       if constants.IDISK_METAVG in disk:
9794         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9795       if constants.IDISK_ADOPT in disk:
9796         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9797       self.disks.append(new_disk)
9798
9799     if self.op.mode == constants.INSTANCE_IMPORT:
9800       disk_images = []
9801       for idx in range(len(self.disks)):
9802         option = "disk%d_dump" % idx
9803         if export_info.has_option(constants.INISECT_INS, option):
9804           # FIXME: are the old os-es, disk sizes, etc. useful?
9805           export_name = export_info.get(constants.INISECT_INS, option)
9806           image = utils.PathJoin(self.op.src_path, export_name)
9807           disk_images.append(image)
9808         else:
9809           disk_images.append(False)
9810
9811       self.src_images = disk_images
9812
9813       old_name = export_info.get(constants.INISECT_INS, "name")
9814       if self.op.instance_name == old_name:
9815         for idx, nic in enumerate(self.nics):
9816           if nic.mac == constants.VALUE_AUTO:
9817             nic_mac_ini = "nic%d_mac" % idx
9818             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9819
9820     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9821
9822     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9823     if self.op.ip_check:
9824       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9825         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9826                                    (self.check_ip, self.op.instance_name),
9827                                    errors.ECODE_NOTUNIQUE)
9828
9829     #### mac address generation
9830     # By generating here the mac address both the allocator and the hooks get
9831     # the real final mac address rather than the 'auto' or 'generate' value.
9832     # There is a race condition between the generation and the instance object
9833     # creation, which means that we know the mac is valid now, but we're not
9834     # sure it will be when we actually add the instance. If things go bad
9835     # adding the instance will abort because of a duplicate mac, and the
9836     # creation job will fail.
9837     for nic in self.nics:
9838       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9839         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9840
9841     #### allocator run
9842
9843     if self.op.iallocator is not None:
9844       self._RunAllocator()
9845
9846     # Release all unneeded node locks
9847     _ReleaseLocks(self, locking.LEVEL_NODE,
9848                   keep=filter(None, [self.op.pnode, self.op.snode,
9849                                      self.op.src_node]))
9850     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9851                   keep=filter(None, [self.op.pnode, self.op.snode,
9852                                      self.op.src_node]))
9853
9854     #### node related checks
9855
9856     # check primary node
9857     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9858     assert self.pnode is not None, \
9859       "Cannot retrieve locked node %s" % self.op.pnode
9860     if pnode.offline:
9861       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9862                                  pnode.name, errors.ECODE_STATE)
9863     if pnode.drained:
9864       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9865                                  pnode.name, errors.ECODE_STATE)
9866     if not pnode.vm_capable:
9867       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9868                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9869
9870     self.secondaries = []
9871
9872     # mirror node verification
9873     if self.op.disk_template in constants.DTS_INT_MIRROR:
9874       if self.op.snode == pnode.name:
9875         raise errors.OpPrereqError("The secondary node cannot be the"
9876                                    " primary node", errors.ECODE_INVAL)
9877       _CheckNodeOnline(self, self.op.snode)
9878       _CheckNodeNotDrained(self, self.op.snode)
9879       _CheckNodeVmCapable(self, self.op.snode)
9880       self.secondaries.append(self.op.snode)
9881
9882       snode = self.cfg.GetNodeInfo(self.op.snode)
9883       if pnode.group != snode.group:
9884         self.LogWarning("The primary and secondary nodes are in two"
9885                         " different node groups; the disk parameters"
9886                         " from the first disk's node group will be"
9887                         " used")
9888
9889     nodenames = [pnode.name] + self.secondaries
9890
9891     # Verify instance specs
9892     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9893     ispec = {
9894       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9895       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9896       constants.ISPEC_DISK_COUNT: len(self.disks),
9897       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9898       constants.ISPEC_NIC_COUNT: len(self.nics),
9899       constants.ISPEC_SPINDLE_USE: spindle_use,
9900       }
9901
9902     group_info = self.cfg.GetNodeGroup(pnode.group)
9903     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9904     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9905     if not self.op.ignore_ipolicy and res:
9906       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9907                                   " policy: %s") % (pnode.group,
9908                                                     utils.CommaJoin(res)),
9909                                   errors.ECODE_INVAL)
9910
9911     if not self.adopt_disks:
9912       if self.op.disk_template == constants.DT_RBD:
9913         # _CheckRADOSFreeSpace() is just a placeholder.
9914         # Any function that checks prerequisites can be placed here.
9915         # Check if there is enough space on the RADOS cluster.
9916         _CheckRADOSFreeSpace()
9917       else:
9918         # Check lv size requirements, if not adopting
9919         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9920         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9921
9922     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9923       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9924                                 disk[constants.IDISK_ADOPT])
9925                      for disk in self.disks])
9926       if len(all_lvs) != len(self.disks):
9927         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9928                                    errors.ECODE_INVAL)
9929       for lv_name in all_lvs:
9930         try:
9931           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9932           # to ReserveLV uses the same syntax
9933           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9934         except errors.ReservationError:
9935           raise errors.OpPrereqError("LV named %s used by another instance" %
9936                                      lv_name, errors.ECODE_NOTUNIQUE)
9937
9938       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9939       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9940
9941       node_lvs = self.rpc.call_lv_list([pnode.name],
9942                                        vg_names.payload.keys())[pnode.name]
9943       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9944       node_lvs = node_lvs.payload
9945
9946       delta = all_lvs.difference(node_lvs.keys())
9947       if delta:
9948         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9949                                    utils.CommaJoin(delta),
9950                                    errors.ECODE_INVAL)
9951       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9952       if online_lvs:
9953         raise errors.OpPrereqError("Online logical volumes found, cannot"
9954                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9955                                    errors.ECODE_STATE)
9956       # update the size of disk based on what is found
9957       for dsk in self.disks:
9958         dsk[constants.IDISK_SIZE] = \
9959           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9960                                         dsk[constants.IDISK_ADOPT])][0]))
9961
9962     elif self.op.disk_template == constants.DT_BLOCK:
9963       # Normalize and de-duplicate device paths
9964       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9965                        for disk in self.disks])
9966       if len(all_disks) != len(self.disks):
9967         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9968                                    errors.ECODE_INVAL)
9969       baddisks = [d for d in all_disks
9970                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9971       if baddisks:
9972         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9973                                    " cannot be adopted" %
9974                                    (", ".join(baddisks),
9975                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9976                                    errors.ECODE_INVAL)
9977
9978       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9979                                             list(all_disks))[pnode.name]
9980       node_disks.Raise("Cannot get block device information from node %s" %
9981                        pnode.name)
9982       node_disks = node_disks.payload
9983       delta = all_disks.difference(node_disks.keys())
9984       if delta:
9985         raise errors.OpPrereqError("Missing block device(s): %s" %
9986                                    utils.CommaJoin(delta),
9987                                    errors.ECODE_INVAL)
9988       for dsk in self.disks:
9989         dsk[constants.IDISK_SIZE] = \
9990           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9991
9992     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9993
9994     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9995     # check OS parameters (remotely)
9996     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9997
9998     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9999
10000     # memory check on primary node
10001     #TODO(dynmem): use MINMEM for checking
10002     if self.op.start:
10003       _CheckNodeFreeMemory(self, self.pnode.name,
10004                            "creating instance %s" % self.op.instance_name,
10005                            self.be_full[constants.BE_MAXMEM],
10006                            self.op.hypervisor)
10007
10008     self.dry_run_result = list(nodenames)
10009
10010   def Exec(self, feedback_fn):
10011     """Create and add the instance to the cluster.
10012
10013     """
10014     instance = self.op.instance_name
10015     pnode_name = self.pnode.name
10016
10017     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10018                 self.owned_locks(locking.LEVEL_NODE)), \
10019       "Node locks differ from node resource locks"
10020
10021     ht_kind = self.op.hypervisor
10022     if ht_kind in constants.HTS_REQ_PORT:
10023       network_port = self.cfg.AllocatePort()
10024     else:
10025       network_port = None
10026
10027     # This is ugly but we got a chicken-egg problem here
10028     # We can only take the group disk parameters, as the instance
10029     # has no disks yet (we are generating them right here).
10030     node = self.cfg.GetNodeInfo(pnode_name)
10031     nodegroup = self.cfg.GetNodeGroup(node.group)
10032     disks = _GenerateDiskTemplate(self,
10033                                   self.op.disk_template,
10034                                   instance, pnode_name,
10035                                   self.secondaries,
10036                                   self.disks,
10037                                   self.instance_file_storage_dir,
10038                                   self.op.file_driver,
10039                                   0,
10040                                   feedback_fn,
10041                                   self.cfg.GetGroupDiskParams(nodegroup))
10042
10043     iobj = objects.Instance(name=instance, os=self.op.os_type,
10044                             primary_node=pnode_name,
10045                             nics=self.nics, disks=disks,
10046                             disk_template=self.op.disk_template,
10047                             admin_state=constants.ADMINST_DOWN,
10048                             network_port=network_port,
10049                             beparams=self.op.beparams,
10050                             hvparams=self.op.hvparams,
10051                             hypervisor=self.op.hypervisor,
10052                             osparams=self.op.osparams,
10053                             )
10054
10055     if self.op.tags:
10056       for tag in self.op.tags:
10057         iobj.AddTag(tag)
10058
10059     if self.adopt_disks:
10060       if self.op.disk_template == constants.DT_PLAIN:
10061         # rename LVs to the newly-generated names; we need to construct
10062         # 'fake' LV disks with the old data, plus the new unique_id
10063         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10064         rename_to = []
10065         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10066           rename_to.append(t_dsk.logical_id)
10067           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10068           self.cfg.SetDiskID(t_dsk, pnode_name)
10069         result = self.rpc.call_blockdev_rename(pnode_name,
10070                                                zip(tmp_disks, rename_to))
10071         result.Raise("Failed to rename adoped LVs")
10072     else:
10073       feedback_fn("* creating instance disks...")
10074       try:
10075         _CreateDisks(self, iobj)
10076       except errors.OpExecError:
10077         self.LogWarning("Device creation failed, reverting...")
10078         try:
10079           _RemoveDisks(self, iobj)
10080         finally:
10081           self.cfg.ReleaseDRBDMinors(instance)
10082           raise
10083
10084     feedback_fn("adding instance %s to cluster config" % instance)
10085
10086     self.cfg.AddInstance(iobj, self.proc.GetECId())
10087
10088     # Declare that we don't want to remove the instance lock anymore, as we've
10089     # added the instance to the config
10090     del self.remove_locks[locking.LEVEL_INSTANCE]
10091
10092     if self.op.mode == constants.INSTANCE_IMPORT:
10093       # Release unused nodes
10094       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10095     else:
10096       # Release all nodes
10097       _ReleaseLocks(self, locking.LEVEL_NODE)
10098
10099     disk_abort = False
10100     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10101       feedback_fn("* wiping instance disks...")
10102       try:
10103         _WipeDisks(self, iobj)
10104       except errors.OpExecError, err:
10105         logging.exception("Wiping disks failed")
10106         self.LogWarning("Wiping instance disks failed (%s)", err)
10107         disk_abort = True
10108
10109     if disk_abort:
10110       # Something is already wrong with the disks, don't do anything else
10111       pass
10112     elif self.op.wait_for_sync:
10113       disk_abort = not _WaitForSync(self, iobj)
10114     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10115       # make sure the disks are not degraded (still sync-ing is ok)
10116       feedback_fn("* checking mirrors status")
10117       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10118     else:
10119       disk_abort = False
10120
10121     if disk_abort:
10122       _RemoveDisks(self, iobj)
10123       self.cfg.RemoveInstance(iobj.name)
10124       # Make sure the instance lock gets removed
10125       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10126       raise errors.OpExecError("There are some degraded disks for"
10127                                " this instance")
10128
10129     # Release all node resource locks
10130     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10131
10132     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10133       if self.op.mode == constants.INSTANCE_CREATE:
10134         if not self.op.no_install:
10135           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10136                         not self.op.wait_for_sync)
10137           if pause_sync:
10138             feedback_fn("* pausing disk sync to install instance OS")
10139             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10140                                                               (iobj.disks,
10141                                                                iobj), True)
10142             for idx, success in enumerate(result.payload):
10143               if not success:
10144                 logging.warn("pause-sync of instance %s for disk %d failed",
10145                              instance, idx)
10146
10147           feedback_fn("* running the instance OS create scripts...")
10148           # FIXME: pass debug option from opcode to backend
10149           os_add_result = \
10150             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10151                                           self.op.debug_level)
10152           if pause_sync:
10153             feedback_fn("* resuming disk sync")
10154             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10155                                                               (iobj.disks,
10156                                                                iobj), False)
10157             for idx, success in enumerate(result.payload):
10158               if not success:
10159                 logging.warn("resume-sync of instance %s for disk %d failed",
10160                              instance, idx)
10161
10162           os_add_result.Raise("Could not add os for instance %s"
10163                               " on node %s" % (instance, pnode_name))
10164
10165       elif self.op.mode == constants.INSTANCE_IMPORT:
10166         feedback_fn("* running the instance OS import scripts...")
10167
10168         transfers = []
10169
10170         for idx, image in enumerate(self.src_images):
10171           if not image:
10172             continue
10173
10174           # FIXME: pass debug option from opcode to backend
10175           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10176                                              constants.IEIO_FILE, (image, ),
10177                                              constants.IEIO_SCRIPT,
10178                                              (iobj.disks[idx], idx),
10179                                              None)
10180           transfers.append(dt)
10181
10182         import_result = \
10183           masterd.instance.TransferInstanceData(self, feedback_fn,
10184                                                 self.op.src_node, pnode_name,
10185                                                 self.pnode.secondary_ip,
10186                                                 iobj, transfers)
10187         if not compat.all(import_result):
10188           self.LogWarning("Some disks for instance %s on node %s were not"
10189                           " imported successfully" % (instance, pnode_name))
10190
10191       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10192         feedback_fn("* preparing remote import...")
10193         # The source cluster will stop the instance before attempting to make a
10194         # connection. In some cases stopping an instance can take a long time,
10195         # hence the shutdown timeout is added to the connection timeout.
10196         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10197                            self.op.source_shutdown_timeout)
10198         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10199
10200         assert iobj.primary_node == self.pnode.name
10201         disk_results = \
10202           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10203                                         self.source_x509_ca,
10204                                         self._cds, timeouts)
10205         if not compat.all(disk_results):
10206           # TODO: Should the instance still be started, even if some disks
10207           # failed to import (valid for local imports, too)?
10208           self.LogWarning("Some disks for instance %s on node %s were not"
10209                           " imported successfully" % (instance, pnode_name))
10210
10211         # Run rename script on newly imported instance
10212         assert iobj.name == instance
10213         feedback_fn("Running rename script for %s" % instance)
10214         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10215                                                    self.source_instance_name,
10216                                                    self.op.debug_level)
10217         if result.fail_msg:
10218           self.LogWarning("Failed to run rename script for %s on node"
10219                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10220
10221       else:
10222         # also checked in the prereq part
10223         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10224                                      % self.op.mode)
10225
10226     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10227
10228     if self.op.start:
10229       iobj.admin_state = constants.ADMINST_UP
10230       self.cfg.Update(iobj, feedback_fn)
10231       logging.info("Starting instance %s on node %s", instance, pnode_name)
10232       feedback_fn("* starting instance...")
10233       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10234                                             False)
10235       result.Raise("Could not start instance")
10236
10237     return list(iobj.all_nodes)
10238
10239
10240 def _CheckRADOSFreeSpace():
10241   """Compute disk size requirements inside the RADOS cluster.
10242
10243   """
10244   # For the RADOS cluster we assume there is always enough space.
10245   pass
10246
10247
10248 class LUInstanceConsole(NoHooksLU):
10249   """Connect to an instance's console.
10250
10251   This is somewhat special in that it returns the command line that
10252   you need to run on the master node in order to connect to the
10253   console.
10254
10255   """
10256   REQ_BGL = False
10257
10258   def ExpandNames(self):
10259     self.share_locks = _ShareAll()
10260     self._ExpandAndLockInstance()
10261
10262   def CheckPrereq(self):
10263     """Check prerequisites.
10264
10265     This checks that the instance is in the cluster.
10266
10267     """
10268     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10269     assert self.instance is not None, \
10270       "Cannot retrieve locked instance %s" % self.op.instance_name
10271     _CheckNodeOnline(self, self.instance.primary_node)
10272
10273   def Exec(self, feedback_fn):
10274     """Connect to the console of an instance
10275
10276     """
10277     instance = self.instance
10278     node = instance.primary_node
10279
10280     node_insts = self.rpc.call_instance_list([node],
10281                                              [instance.hypervisor])[node]
10282     node_insts.Raise("Can't get node information from %s" % node)
10283
10284     if instance.name not in node_insts.payload:
10285       if instance.admin_state == constants.ADMINST_UP:
10286         state = constants.INSTST_ERRORDOWN
10287       elif instance.admin_state == constants.ADMINST_DOWN:
10288         state = constants.INSTST_ADMINDOWN
10289       else:
10290         state = constants.INSTST_ADMINOFFLINE
10291       raise errors.OpExecError("Instance %s is not running (state %s)" %
10292                                (instance.name, state))
10293
10294     logging.debug("Connecting to console of %s on %s", instance.name, node)
10295
10296     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10297
10298
10299 def _GetInstanceConsole(cluster, instance):
10300   """Returns console information for an instance.
10301
10302   @type cluster: L{objects.Cluster}
10303   @type instance: L{objects.Instance}
10304   @rtype: dict
10305
10306   """
10307   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10308   # beparams and hvparams are passed separately, to avoid editing the
10309   # instance and then saving the defaults in the instance itself.
10310   hvparams = cluster.FillHV(instance)
10311   beparams = cluster.FillBE(instance)
10312   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10313
10314   assert console.instance == instance.name
10315   assert console.Validate()
10316
10317   return console.ToDict()
10318
10319
10320 class LUInstanceReplaceDisks(LogicalUnit):
10321   """Replace the disks of an instance.
10322
10323   """
10324   HPATH = "mirrors-replace"
10325   HTYPE = constants.HTYPE_INSTANCE
10326   REQ_BGL = False
10327
10328   def CheckArguments(self):
10329     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10330                                   self.op.iallocator)
10331
10332   def ExpandNames(self):
10333     self._ExpandAndLockInstance()
10334
10335     assert locking.LEVEL_NODE not in self.needed_locks
10336     assert locking.LEVEL_NODE_RES not in self.needed_locks
10337     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10338
10339     assert self.op.iallocator is None or self.op.remote_node is None, \
10340       "Conflicting options"
10341
10342     if self.op.remote_node is not None:
10343       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10344
10345       # Warning: do not remove the locking of the new secondary here
10346       # unless DRBD8.AddChildren is changed to work in parallel;
10347       # currently it doesn't since parallel invocations of
10348       # FindUnusedMinor will conflict
10349       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10350       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10351     else:
10352       self.needed_locks[locking.LEVEL_NODE] = []
10353       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10354
10355       if self.op.iallocator is not None:
10356         # iallocator will select a new node in the same group
10357         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10358
10359     self.needed_locks[locking.LEVEL_NODE_RES] = []
10360
10361     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10362                                    self.op.iallocator, self.op.remote_node,
10363                                    self.op.disks, False, self.op.early_release,
10364                                    self.op.ignore_ipolicy)
10365
10366     self.tasklets = [self.replacer]
10367
10368   def DeclareLocks(self, level):
10369     if level == locking.LEVEL_NODEGROUP:
10370       assert self.op.remote_node is None
10371       assert self.op.iallocator is not None
10372       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10373
10374       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10375       # Lock all groups used by instance optimistically; this requires going
10376       # via the node before it's locked, requiring verification later on
10377       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10378         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10379
10380     elif level == locking.LEVEL_NODE:
10381       if self.op.iallocator is not None:
10382         assert self.op.remote_node is None
10383         assert not self.needed_locks[locking.LEVEL_NODE]
10384
10385         # Lock member nodes of all locked groups
10386         self.needed_locks[locking.LEVEL_NODE] = [node_name
10387           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10388           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10389       else:
10390         self._LockInstancesNodes()
10391     elif level == locking.LEVEL_NODE_RES:
10392       # Reuse node locks
10393       self.needed_locks[locking.LEVEL_NODE_RES] = \
10394         self.needed_locks[locking.LEVEL_NODE]
10395
10396   def BuildHooksEnv(self):
10397     """Build hooks env.
10398
10399     This runs on the master, the primary and all the secondaries.
10400
10401     """
10402     instance = self.replacer.instance
10403     env = {
10404       "MODE": self.op.mode,
10405       "NEW_SECONDARY": self.op.remote_node,
10406       "OLD_SECONDARY": instance.secondary_nodes[0],
10407       }
10408     env.update(_BuildInstanceHookEnvByObject(self, instance))
10409     return env
10410
10411   def BuildHooksNodes(self):
10412     """Build hooks nodes.
10413
10414     """
10415     instance = self.replacer.instance
10416     nl = [
10417       self.cfg.GetMasterNode(),
10418       instance.primary_node,
10419       ]
10420     if self.op.remote_node is not None:
10421       nl.append(self.op.remote_node)
10422     return nl, nl
10423
10424   def CheckPrereq(self):
10425     """Check prerequisites.
10426
10427     """
10428     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10429             self.op.iallocator is None)
10430
10431     # Verify if node group locks are still correct
10432     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10433     if owned_groups:
10434       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10435
10436     return LogicalUnit.CheckPrereq(self)
10437
10438
10439 class TLReplaceDisks(Tasklet):
10440   """Replaces disks for an instance.
10441
10442   Note: Locking is not within the scope of this class.
10443
10444   """
10445   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10446                disks, delay_iallocator, early_release, ignore_ipolicy):
10447     """Initializes this class.
10448
10449     """
10450     Tasklet.__init__(self, lu)
10451
10452     # Parameters
10453     self.instance_name = instance_name
10454     self.mode = mode
10455     self.iallocator_name = iallocator_name
10456     self.remote_node = remote_node
10457     self.disks = disks
10458     self.delay_iallocator = delay_iallocator
10459     self.early_release = early_release
10460     self.ignore_ipolicy = ignore_ipolicy
10461
10462     # Runtime data
10463     self.instance = None
10464     self.new_node = None
10465     self.target_node = None
10466     self.other_node = None
10467     self.remote_node_info = None
10468     self.node_secondary_ip = None
10469
10470   @staticmethod
10471   def CheckArguments(mode, remote_node, iallocator):
10472     """Helper function for users of this class.
10473
10474     """
10475     # check for valid parameter combination
10476     if mode == constants.REPLACE_DISK_CHG:
10477       if remote_node is None and iallocator is None:
10478         raise errors.OpPrereqError("When changing the secondary either an"
10479                                    " iallocator script must be used or the"
10480                                    " new node given", errors.ECODE_INVAL)
10481
10482       if remote_node is not None and iallocator is not None:
10483         raise errors.OpPrereqError("Give either the iallocator or the new"
10484                                    " secondary, not both", errors.ECODE_INVAL)
10485
10486     elif remote_node is not None or iallocator is not None:
10487       # Not replacing the secondary
10488       raise errors.OpPrereqError("The iallocator and new node options can"
10489                                  " only be used when changing the"
10490                                  " secondary node", errors.ECODE_INVAL)
10491
10492   @staticmethod
10493   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10494     """Compute a new secondary node using an IAllocator.
10495
10496     """
10497     ial = IAllocator(lu.cfg, lu.rpc,
10498                      mode=constants.IALLOCATOR_MODE_RELOC,
10499                      name=instance_name,
10500                      relocate_from=list(relocate_from))
10501
10502     ial.Run(iallocator_name)
10503
10504     if not ial.success:
10505       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10506                                  " %s" % (iallocator_name, ial.info),
10507                                  errors.ECODE_NORES)
10508
10509     if len(ial.result) != ial.required_nodes:
10510       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10511                                  " of nodes (%s), required %s" %
10512                                  (iallocator_name,
10513                                   len(ial.result), ial.required_nodes),
10514                                  errors.ECODE_FAULT)
10515
10516     remote_node_name = ial.result[0]
10517
10518     lu.LogInfo("Selected new secondary for instance '%s': %s",
10519                instance_name, remote_node_name)
10520
10521     return remote_node_name
10522
10523   def _FindFaultyDisks(self, node_name):
10524     """Wrapper for L{_FindFaultyInstanceDisks}.
10525
10526     """
10527     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10528                                     node_name, True)
10529
10530   def _CheckDisksActivated(self, instance):
10531     """Checks if the instance disks are activated.
10532
10533     @param instance: The instance to check disks
10534     @return: True if they are activated, False otherwise
10535
10536     """
10537     nodes = instance.all_nodes
10538
10539     for idx, dev in enumerate(instance.disks):
10540       for node in nodes:
10541         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10542         self.cfg.SetDiskID(dev, node)
10543
10544         result = _BlockdevFind(self, node, dev, instance)
10545
10546         if result.offline:
10547           continue
10548         elif result.fail_msg or not result.payload:
10549           return False
10550
10551     return True
10552
10553   def CheckPrereq(self):
10554     """Check prerequisites.
10555
10556     This checks that the instance is in the cluster.
10557
10558     """
10559     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10560     assert instance is not None, \
10561       "Cannot retrieve locked instance %s" % self.instance_name
10562
10563     if instance.disk_template != constants.DT_DRBD8:
10564       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10565                                  " instances", errors.ECODE_INVAL)
10566
10567     if len(instance.secondary_nodes) != 1:
10568       raise errors.OpPrereqError("The instance has a strange layout,"
10569                                  " expected one secondary but found %d" %
10570                                  len(instance.secondary_nodes),
10571                                  errors.ECODE_FAULT)
10572
10573     if not self.delay_iallocator:
10574       self._CheckPrereq2()
10575
10576   def _CheckPrereq2(self):
10577     """Check prerequisites, second part.
10578
10579     This function should always be part of CheckPrereq. It was separated and is
10580     now called from Exec because during node evacuation iallocator was only
10581     called with an unmodified cluster model, not taking planned changes into
10582     account.
10583
10584     """
10585     instance = self.instance
10586     secondary_node = instance.secondary_nodes[0]
10587
10588     if self.iallocator_name is None:
10589       remote_node = self.remote_node
10590     else:
10591       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10592                                        instance.name, instance.secondary_nodes)
10593
10594     if remote_node is None:
10595       self.remote_node_info = None
10596     else:
10597       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10598              "Remote node '%s' is not locked" % remote_node
10599
10600       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10601       assert self.remote_node_info is not None, \
10602         "Cannot retrieve locked node %s" % remote_node
10603
10604     if remote_node == self.instance.primary_node:
10605       raise errors.OpPrereqError("The specified node is the primary node of"
10606                                  " the instance", errors.ECODE_INVAL)
10607
10608     if remote_node == secondary_node:
10609       raise errors.OpPrereqError("The specified node is already the"
10610                                  " secondary node of the instance",
10611                                  errors.ECODE_INVAL)
10612
10613     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10614                                     constants.REPLACE_DISK_CHG):
10615       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10616                                  errors.ECODE_INVAL)
10617
10618     if self.mode == constants.REPLACE_DISK_AUTO:
10619       if not self._CheckDisksActivated(instance):
10620         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10621                                    " first" % self.instance_name,
10622                                    errors.ECODE_STATE)
10623       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10624       faulty_secondary = self._FindFaultyDisks(secondary_node)
10625
10626       if faulty_primary and faulty_secondary:
10627         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10628                                    " one node and can not be repaired"
10629                                    " automatically" % self.instance_name,
10630                                    errors.ECODE_STATE)
10631
10632       if faulty_primary:
10633         self.disks = faulty_primary
10634         self.target_node = instance.primary_node
10635         self.other_node = secondary_node
10636         check_nodes = [self.target_node, self.other_node]
10637       elif faulty_secondary:
10638         self.disks = faulty_secondary
10639         self.target_node = secondary_node
10640         self.other_node = instance.primary_node
10641         check_nodes = [self.target_node, self.other_node]
10642       else:
10643         self.disks = []
10644         check_nodes = []
10645
10646     else:
10647       # Non-automatic modes
10648       if self.mode == constants.REPLACE_DISK_PRI:
10649         self.target_node = instance.primary_node
10650         self.other_node = secondary_node
10651         check_nodes = [self.target_node, self.other_node]
10652
10653       elif self.mode == constants.REPLACE_DISK_SEC:
10654         self.target_node = secondary_node
10655         self.other_node = instance.primary_node
10656         check_nodes = [self.target_node, self.other_node]
10657
10658       elif self.mode == constants.REPLACE_DISK_CHG:
10659         self.new_node = remote_node
10660         self.other_node = instance.primary_node
10661         self.target_node = secondary_node
10662         check_nodes = [self.new_node, self.other_node]
10663
10664         _CheckNodeNotDrained(self.lu, remote_node)
10665         _CheckNodeVmCapable(self.lu, remote_node)
10666
10667         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10668         assert old_node_info is not None
10669         if old_node_info.offline and not self.early_release:
10670           # doesn't make sense to delay the release
10671           self.early_release = True
10672           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10673                           " early-release mode", secondary_node)
10674
10675       else:
10676         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10677                                      self.mode)
10678
10679       # If not specified all disks should be replaced
10680       if not self.disks:
10681         self.disks = range(len(self.instance.disks))
10682
10683     # TODO: This is ugly, but right now we can't distinguish between internal
10684     # submitted opcode and external one. We should fix that.
10685     if self.remote_node_info:
10686       # We change the node, lets verify it still meets instance policy
10687       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10688       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10689                                        new_group_info)
10690       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10691                               ignore=self.ignore_ipolicy)
10692
10693     for node in check_nodes:
10694       _CheckNodeOnline(self.lu, node)
10695
10696     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10697                                                           self.other_node,
10698                                                           self.target_node]
10699                               if node_name is not None)
10700
10701     # Release unneeded node and node resource locks
10702     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10703     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10704
10705     # Release any owned node group
10706     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10707       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10708
10709     # Check whether disks are valid
10710     for disk_idx in self.disks:
10711       instance.FindDisk(disk_idx)
10712
10713     # Get secondary node IP addresses
10714     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10715                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10716
10717   def Exec(self, feedback_fn):
10718     """Execute disk replacement.
10719
10720     This dispatches the disk replacement to the appropriate handler.
10721
10722     """
10723     if self.delay_iallocator:
10724       self._CheckPrereq2()
10725
10726     if __debug__:
10727       # Verify owned locks before starting operation
10728       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10729       assert set(owned_nodes) == set(self.node_secondary_ip), \
10730           ("Incorrect node locks, owning %s, expected %s" %
10731            (owned_nodes, self.node_secondary_ip.keys()))
10732       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10733               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10734
10735       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10736       assert list(owned_instances) == [self.instance_name], \
10737           "Instance '%s' not locked" % self.instance_name
10738
10739       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10740           "Should not own any node group lock at this point"
10741
10742     if not self.disks:
10743       feedback_fn("No disks need replacement")
10744       return
10745
10746     feedback_fn("Replacing disk(s) %s for %s" %
10747                 (utils.CommaJoin(self.disks), self.instance.name))
10748
10749     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10750
10751     # Activate the instance disks if we're replacing them on a down instance
10752     if activate_disks:
10753       _StartInstanceDisks(self.lu, self.instance, True)
10754
10755     try:
10756       # Should we replace the secondary node?
10757       if self.new_node is not None:
10758         fn = self._ExecDrbd8Secondary
10759       else:
10760         fn = self._ExecDrbd8DiskOnly
10761
10762       result = fn(feedback_fn)
10763     finally:
10764       # Deactivate the instance disks if we're replacing them on a
10765       # down instance
10766       if activate_disks:
10767         _SafeShutdownInstanceDisks(self.lu, self.instance)
10768
10769     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10770
10771     if __debug__:
10772       # Verify owned locks
10773       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10774       nodes = frozenset(self.node_secondary_ip)
10775       assert ((self.early_release and not owned_nodes) or
10776               (not self.early_release and not (set(owned_nodes) - nodes))), \
10777         ("Not owning the correct locks, early_release=%s, owned=%r,"
10778          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10779
10780     return result
10781
10782   def _CheckVolumeGroup(self, nodes):
10783     self.lu.LogInfo("Checking volume groups")
10784
10785     vgname = self.cfg.GetVGName()
10786
10787     # Make sure volume group exists on all involved nodes
10788     results = self.rpc.call_vg_list(nodes)
10789     if not results:
10790       raise errors.OpExecError("Can't list volume groups on the nodes")
10791
10792     for node in nodes:
10793       res = results[node]
10794       res.Raise("Error checking node %s" % node)
10795       if vgname not in res.payload:
10796         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10797                                  (vgname, node))
10798
10799   def _CheckDisksExistence(self, nodes):
10800     # Check disk existence
10801     for idx, dev in enumerate(self.instance.disks):
10802       if idx not in self.disks:
10803         continue
10804
10805       for node in nodes:
10806         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10807         self.cfg.SetDiskID(dev, node)
10808
10809         result = _BlockdevFind(self, node, dev, self.instance)
10810
10811         msg = result.fail_msg
10812         if msg or not result.payload:
10813           if not msg:
10814             msg = "disk not found"
10815           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10816                                    (idx, node, msg))
10817
10818   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10819     for idx, dev in enumerate(self.instance.disks):
10820       if idx not in self.disks:
10821         continue
10822
10823       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10824                       (idx, node_name))
10825
10826       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10827                                    on_primary, ldisk=ldisk):
10828         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10829                                  " replace disks for instance %s" %
10830                                  (node_name, self.instance.name))
10831
10832   def _CreateNewStorage(self, node_name):
10833     """Create new storage on the primary or secondary node.
10834
10835     This is only used for same-node replaces, not for changing the
10836     secondary node, hence we don't want to modify the existing disk.
10837
10838     """
10839     iv_names = {}
10840
10841     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10842     for idx, dev in enumerate(disks):
10843       if idx not in self.disks:
10844         continue
10845
10846       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10847
10848       self.cfg.SetDiskID(dev, node_name)
10849
10850       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10851       names = _GenerateUniqueNames(self.lu, lv_names)
10852
10853       (data_disk, meta_disk) = dev.children
10854       vg_data = data_disk.logical_id[0]
10855       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10856                              logical_id=(vg_data, names[0]),
10857                              params=data_disk.params)
10858       vg_meta = meta_disk.logical_id[0]
10859       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10860                              logical_id=(vg_meta, names[1]),
10861                              params=meta_disk.params)
10862
10863       new_lvs = [lv_data, lv_meta]
10864       old_lvs = [child.Copy() for child in dev.children]
10865       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10866
10867       # we pass force_create=True to force the LVM creation
10868       for new_lv in new_lvs:
10869         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10870                              _GetInstanceInfoText(self.instance), False)
10871
10872     return iv_names
10873
10874   def _CheckDevices(self, node_name, iv_names):
10875     for name, (dev, _, _) in iv_names.iteritems():
10876       self.cfg.SetDiskID(dev, node_name)
10877
10878       result = _BlockdevFind(self, node_name, dev, self.instance)
10879
10880       msg = result.fail_msg
10881       if msg or not result.payload:
10882         if not msg:
10883           msg = "disk not found"
10884         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10885                                  (name, msg))
10886
10887       if result.payload.is_degraded:
10888         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10889
10890   def _RemoveOldStorage(self, node_name, iv_names):
10891     for name, (_, old_lvs, _) in iv_names.iteritems():
10892       self.lu.LogInfo("Remove logical volumes for %s" % name)
10893
10894       for lv in old_lvs:
10895         self.cfg.SetDiskID(lv, node_name)
10896
10897         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10898         if msg:
10899           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10900                              hint="remove unused LVs manually")
10901
10902   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10903     """Replace a disk on the primary or secondary for DRBD 8.
10904
10905     The algorithm for replace is quite complicated:
10906
10907       1. for each disk to be replaced:
10908
10909         1. create new LVs on the target node with unique names
10910         1. detach old LVs from the drbd device
10911         1. rename old LVs to name_replaced.<time_t>
10912         1. rename new LVs to old LVs
10913         1. attach the new LVs (with the old names now) to the drbd device
10914
10915       1. wait for sync across all devices
10916
10917       1. for each modified disk:
10918
10919         1. remove old LVs (which have the name name_replaces.<time_t>)
10920
10921     Failures are not very well handled.
10922
10923     """
10924     steps_total = 6
10925
10926     # Step: check device activation
10927     self.lu.LogStep(1, steps_total, "Check device existence")
10928     self._CheckDisksExistence([self.other_node, self.target_node])
10929     self._CheckVolumeGroup([self.target_node, self.other_node])
10930
10931     # Step: check other node consistency
10932     self.lu.LogStep(2, steps_total, "Check peer consistency")
10933     self._CheckDisksConsistency(self.other_node,
10934                                 self.other_node == self.instance.primary_node,
10935                                 False)
10936
10937     # Step: create new storage
10938     self.lu.LogStep(3, steps_total, "Allocate new storage")
10939     iv_names = self._CreateNewStorage(self.target_node)
10940
10941     # Step: for each lv, detach+rename*2+attach
10942     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10943     for dev, old_lvs, new_lvs in iv_names.itervalues():
10944       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10945
10946       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10947                                                      old_lvs)
10948       result.Raise("Can't detach drbd from local storage on node"
10949                    " %s for device %s" % (self.target_node, dev.iv_name))
10950       #dev.children = []
10951       #cfg.Update(instance)
10952
10953       # ok, we created the new LVs, so now we know we have the needed
10954       # storage; as such, we proceed on the target node to rename
10955       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10956       # using the assumption that logical_id == physical_id (which in
10957       # turn is the unique_id on that node)
10958
10959       # FIXME(iustin): use a better name for the replaced LVs
10960       temp_suffix = int(time.time())
10961       ren_fn = lambda d, suff: (d.physical_id[0],
10962                                 d.physical_id[1] + "_replaced-%s" % suff)
10963
10964       # Build the rename list based on what LVs exist on the node
10965       rename_old_to_new = []
10966       for to_ren in old_lvs:
10967         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10968         if not result.fail_msg and result.payload:
10969           # device exists
10970           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10971
10972       self.lu.LogInfo("Renaming the old LVs on the target node")
10973       result = self.rpc.call_blockdev_rename(self.target_node,
10974                                              rename_old_to_new)
10975       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10976
10977       # Now we rename the new LVs to the old LVs
10978       self.lu.LogInfo("Renaming the new LVs on the target node")
10979       rename_new_to_old = [(new, old.physical_id)
10980                            for old, new in zip(old_lvs, new_lvs)]
10981       result = self.rpc.call_blockdev_rename(self.target_node,
10982                                              rename_new_to_old)
10983       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10984
10985       # Intermediate steps of in memory modifications
10986       for old, new in zip(old_lvs, new_lvs):
10987         new.logical_id = old.logical_id
10988         self.cfg.SetDiskID(new, self.target_node)
10989
10990       # We need to modify old_lvs so that removal later removes the
10991       # right LVs, not the newly added ones; note that old_lvs is a
10992       # copy here
10993       for disk in old_lvs:
10994         disk.logical_id = ren_fn(disk, temp_suffix)
10995         self.cfg.SetDiskID(disk, self.target_node)
10996
10997       # Now that the new lvs have the old name, we can add them to the device
10998       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10999       result = self.rpc.call_blockdev_addchildren(self.target_node,
11000                                                   (dev, self.instance), new_lvs)
11001       msg = result.fail_msg
11002       if msg:
11003         for new_lv in new_lvs:
11004           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11005                                                new_lv).fail_msg
11006           if msg2:
11007             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11008                                hint=("cleanup manually the unused logical"
11009                                      "volumes"))
11010         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11011
11012     cstep = itertools.count(5)
11013
11014     if self.early_release:
11015       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11016       self._RemoveOldStorage(self.target_node, iv_names)
11017       # TODO: Check if releasing locks early still makes sense
11018       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11019     else:
11020       # Release all resource locks except those used by the instance
11021       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11022                     keep=self.node_secondary_ip.keys())
11023
11024     # Release all node locks while waiting for sync
11025     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11026
11027     # TODO: Can the instance lock be downgraded here? Take the optional disk
11028     # shutdown in the caller into consideration.
11029
11030     # Wait for sync
11031     # This can fail as the old devices are degraded and _WaitForSync
11032     # does a combined result over all disks, so we don't check its return value
11033     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11034     _WaitForSync(self.lu, self.instance)
11035
11036     # Check all devices manually
11037     self._CheckDevices(self.instance.primary_node, iv_names)
11038
11039     # Step: remove old storage
11040     if not self.early_release:
11041       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11042       self._RemoveOldStorage(self.target_node, iv_names)
11043
11044   def _ExecDrbd8Secondary(self, feedback_fn):
11045     """Replace the secondary node for DRBD 8.
11046
11047     The algorithm for replace is quite complicated:
11048       - for all disks of the instance:
11049         - create new LVs on the new node with same names
11050         - shutdown the drbd device on the old secondary
11051         - disconnect the drbd network on the primary
11052         - create the drbd device on the new secondary
11053         - network attach the drbd on the primary, using an artifice:
11054           the drbd code for Attach() will connect to the network if it
11055           finds a device which is connected to the good local disks but
11056           not network enabled
11057       - wait for sync across all devices
11058       - remove all disks from the old secondary
11059
11060     Failures are not very well handled.
11061
11062     """
11063     steps_total = 6
11064
11065     pnode = self.instance.primary_node
11066
11067     # Step: check device activation
11068     self.lu.LogStep(1, steps_total, "Check device existence")
11069     self._CheckDisksExistence([self.instance.primary_node])
11070     self._CheckVolumeGroup([self.instance.primary_node])
11071
11072     # Step: check other node consistency
11073     self.lu.LogStep(2, steps_total, "Check peer consistency")
11074     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11075
11076     # Step: create new storage
11077     self.lu.LogStep(3, steps_total, "Allocate new storage")
11078     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11079     for idx, dev in enumerate(disks):
11080       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11081                       (self.new_node, idx))
11082       # we pass force_create=True to force LVM creation
11083       for new_lv in dev.children:
11084         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11085                              True, _GetInstanceInfoText(self.instance), False)
11086
11087     # Step 4: dbrd minors and drbd setups changes
11088     # after this, we must manually remove the drbd minors on both the
11089     # error and the success paths
11090     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11091     minors = self.cfg.AllocateDRBDMinor([self.new_node
11092                                          for dev in self.instance.disks],
11093                                         self.instance.name)
11094     logging.debug("Allocated minors %r", minors)
11095
11096     iv_names = {}
11097     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11098       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11099                       (self.new_node, idx))
11100       # create new devices on new_node; note that we create two IDs:
11101       # one without port, so the drbd will be activated without
11102       # networking information on the new node at this stage, and one
11103       # with network, for the latter activation in step 4
11104       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11105       if self.instance.primary_node == o_node1:
11106         p_minor = o_minor1
11107       else:
11108         assert self.instance.primary_node == o_node2, "Three-node instance?"
11109         p_minor = o_minor2
11110
11111       new_alone_id = (self.instance.primary_node, self.new_node, None,
11112                       p_minor, new_minor, o_secret)
11113       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11114                     p_minor, new_minor, o_secret)
11115
11116       iv_names[idx] = (dev, dev.children, new_net_id)
11117       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11118                     new_net_id)
11119       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11120                               logical_id=new_alone_id,
11121                               children=dev.children,
11122                               size=dev.size,
11123                               params={})
11124       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11125                                              self.cfg)
11126       try:
11127         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11128                               anno_new_drbd,
11129                               _GetInstanceInfoText(self.instance), False)
11130       except errors.GenericError:
11131         self.cfg.ReleaseDRBDMinors(self.instance.name)
11132         raise
11133
11134     # We have new devices, shutdown the drbd on the old secondary
11135     for idx, dev in enumerate(self.instance.disks):
11136       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11137       self.cfg.SetDiskID(dev, self.target_node)
11138       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11139       if msg:
11140         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11141                            "node: %s" % (idx, msg),
11142                            hint=("Please cleanup this device manually as"
11143                                  " soon as possible"))
11144
11145     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11146     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11147                                                self.instance.disks)[pnode]
11148
11149     msg = result.fail_msg
11150     if msg:
11151       # detaches didn't succeed (unlikely)
11152       self.cfg.ReleaseDRBDMinors(self.instance.name)
11153       raise errors.OpExecError("Can't detach the disks from the network on"
11154                                " old node: %s" % (msg,))
11155
11156     # if we managed to detach at least one, we update all the disks of
11157     # the instance to point to the new secondary
11158     self.lu.LogInfo("Updating instance configuration")
11159     for dev, _, new_logical_id in iv_names.itervalues():
11160       dev.logical_id = new_logical_id
11161       self.cfg.SetDiskID(dev, self.instance.primary_node)
11162
11163     self.cfg.Update(self.instance, feedback_fn)
11164
11165     # Release all node locks (the configuration has been updated)
11166     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11167
11168     # and now perform the drbd attach
11169     self.lu.LogInfo("Attaching primary drbds to new secondary"
11170                     " (standalone => connected)")
11171     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11172                                             self.new_node],
11173                                            self.node_secondary_ip,
11174                                            (self.instance.disks, self.instance),
11175                                            self.instance.name,
11176                                            False)
11177     for to_node, to_result in result.items():
11178       msg = to_result.fail_msg
11179       if msg:
11180         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11181                            to_node, msg,
11182                            hint=("please do a gnt-instance info to see the"
11183                                  " status of disks"))
11184
11185     cstep = itertools.count(5)
11186
11187     if self.early_release:
11188       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11189       self._RemoveOldStorage(self.target_node, iv_names)
11190       # TODO: Check if releasing locks early still makes sense
11191       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11192     else:
11193       # Release all resource locks except those used by the instance
11194       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11195                     keep=self.node_secondary_ip.keys())
11196
11197     # TODO: Can the instance lock be downgraded here? Take the optional disk
11198     # shutdown in the caller into consideration.
11199
11200     # Wait for sync
11201     # This can fail as the old devices are degraded and _WaitForSync
11202     # does a combined result over all disks, so we don't check its return value
11203     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11204     _WaitForSync(self.lu, self.instance)
11205
11206     # Check all devices manually
11207     self._CheckDevices(self.instance.primary_node, iv_names)
11208
11209     # Step: remove old storage
11210     if not self.early_release:
11211       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11212       self._RemoveOldStorage(self.target_node, iv_names)
11213
11214
11215 class LURepairNodeStorage(NoHooksLU):
11216   """Repairs the volume group on a node.
11217
11218   """
11219   REQ_BGL = False
11220
11221   def CheckArguments(self):
11222     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11223
11224     storage_type = self.op.storage_type
11225
11226     if (constants.SO_FIX_CONSISTENCY not in
11227         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11228       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11229                                  " repaired" % storage_type,
11230                                  errors.ECODE_INVAL)
11231
11232   def ExpandNames(self):
11233     self.needed_locks = {
11234       locking.LEVEL_NODE: [self.op.node_name],
11235       }
11236
11237   def _CheckFaultyDisks(self, instance, node_name):
11238     """Ensure faulty disks abort the opcode or at least warn."""
11239     try:
11240       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11241                                   node_name, True):
11242         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11243                                    " node '%s'" % (instance.name, node_name),
11244                                    errors.ECODE_STATE)
11245     except errors.OpPrereqError, err:
11246       if self.op.ignore_consistency:
11247         self.proc.LogWarning(str(err.args[0]))
11248       else:
11249         raise
11250
11251   def CheckPrereq(self):
11252     """Check prerequisites.
11253
11254     """
11255     # Check whether any instance on this node has faulty disks
11256     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11257       if inst.admin_state != constants.ADMINST_UP:
11258         continue
11259       check_nodes = set(inst.all_nodes)
11260       check_nodes.discard(self.op.node_name)
11261       for inst_node_name in check_nodes:
11262         self._CheckFaultyDisks(inst, inst_node_name)
11263
11264   def Exec(self, feedback_fn):
11265     feedback_fn("Repairing storage unit '%s' on %s ..." %
11266                 (self.op.name, self.op.node_name))
11267
11268     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11269     result = self.rpc.call_storage_execute(self.op.node_name,
11270                                            self.op.storage_type, st_args,
11271                                            self.op.name,
11272                                            constants.SO_FIX_CONSISTENCY)
11273     result.Raise("Failed to repair storage unit '%s' on %s" %
11274                  (self.op.name, self.op.node_name))
11275
11276
11277 class LUNodeEvacuate(NoHooksLU):
11278   """Evacuates instances off a list of nodes.
11279
11280   """
11281   REQ_BGL = False
11282
11283   _MODE2IALLOCATOR = {
11284     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11285     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11286     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11287     }
11288   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11289   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11290           constants.IALLOCATOR_NEVAC_MODES)
11291
11292   def CheckArguments(self):
11293     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11294
11295   def ExpandNames(self):
11296     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11297
11298     if self.op.remote_node is not None:
11299       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11300       assert self.op.remote_node
11301
11302       if self.op.remote_node == self.op.node_name:
11303         raise errors.OpPrereqError("Can not use evacuated node as a new"
11304                                    " secondary node", errors.ECODE_INVAL)
11305
11306       if self.op.mode != constants.NODE_EVAC_SEC:
11307         raise errors.OpPrereqError("Without the use of an iallocator only"
11308                                    " secondary instances can be evacuated",
11309                                    errors.ECODE_INVAL)
11310
11311     # Declare locks
11312     self.share_locks = _ShareAll()
11313     self.needed_locks = {
11314       locking.LEVEL_INSTANCE: [],
11315       locking.LEVEL_NODEGROUP: [],
11316       locking.LEVEL_NODE: [],
11317       }
11318
11319     # Determine nodes (via group) optimistically, needs verification once locks
11320     # have been acquired
11321     self.lock_nodes = self._DetermineNodes()
11322
11323   def _DetermineNodes(self):
11324     """Gets the list of nodes to operate on.
11325
11326     """
11327     if self.op.remote_node is None:
11328       # Iallocator will choose any node(s) in the same group
11329       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11330     else:
11331       group_nodes = frozenset([self.op.remote_node])
11332
11333     # Determine nodes to be locked
11334     return set([self.op.node_name]) | group_nodes
11335
11336   def _DetermineInstances(self):
11337     """Builds list of instances to operate on.
11338
11339     """
11340     assert self.op.mode in constants.NODE_EVAC_MODES
11341
11342     if self.op.mode == constants.NODE_EVAC_PRI:
11343       # Primary instances only
11344       inst_fn = _GetNodePrimaryInstances
11345       assert self.op.remote_node is None, \
11346         "Evacuating primary instances requires iallocator"
11347     elif self.op.mode == constants.NODE_EVAC_SEC:
11348       # Secondary instances only
11349       inst_fn = _GetNodeSecondaryInstances
11350     else:
11351       # All instances
11352       assert self.op.mode == constants.NODE_EVAC_ALL
11353       inst_fn = _GetNodeInstances
11354       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11355       # per instance
11356       raise errors.OpPrereqError("Due to an issue with the iallocator"
11357                                  " interface it is not possible to evacuate"
11358                                  " all instances at once; specify explicitly"
11359                                  " whether to evacuate primary or secondary"
11360                                  " instances",
11361                                  errors.ECODE_INVAL)
11362
11363     return inst_fn(self.cfg, self.op.node_name)
11364
11365   def DeclareLocks(self, level):
11366     if level == locking.LEVEL_INSTANCE:
11367       # Lock instances optimistically, needs verification once node and group
11368       # locks have been acquired
11369       self.needed_locks[locking.LEVEL_INSTANCE] = \
11370         set(i.name for i in self._DetermineInstances())
11371
11372     elif level == locking.LEVEL_NODEGROUP:
11373       # Lock node groups for all potential target nodes optimistically, needs
11374       # verification once nodes have been acquired
11375       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11376         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11377
11378     elif level == locking.LEVEL_NODE:
11379       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11380
11381   def CheckPrereq(self):
11382     # Verify locks
11383     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11384     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11385     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11386
11387     need_nodes = self._DetermineNodes()
11388
11389     if not owned_nodes.issuperset(need_nodes):
11390       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11391                                  " locks were acquired, current nodes are"
11392                                  " are '%s', used to be '%s'; retry the"
11393                                  " operation" %
11394                                  (self.op.node_name,
11395                                   utils.CommaJoin(need_nodes),
11396                                   utils.CommaJoin(owned_nodes)),
11397                                  errors.ECODE_STATE)
11398
11399     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11400     if owned_groups != wanted_groups:
11401       raise errors.OpExecError("Node groups changed since locks were acquired,"
11402                                " current groups are '%s', used to be '%s';"
11403                                " retry the operation" %
11404                                (utils.CommaJoin(wanted_groups),
11405                                 utils.CommaJoin(owned_groups)))
11406
11407     # Determine affected instances
11408     self.instances = self._DetermineInstances()
11409     self.instance_names = [i.name for i in self.instances]
11410
11411     if set(self.instance_names) != owned_instances:
11412       raise errors.OpExecError("Instances on node '%s' changed since locks"
11413                                " were acquired, current instances are '%s',"
11414                                " used to be '%s'; retry the operation" %
11415                                (self.op.node_name,
11416                                 utils.CommaJoin(self.instance_names),
11417                                 utils.CommaJoin(owned_instances)))
11418
11419     if self.instance_names:
11420       self.LogInfo("Evacuating instances from node '%s': %s",
11421                    self.op.node_name,
11422                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11423     else:
11424       self.LogInfo("No instances to evacuate from node '%s'",
11425                    self.op.node_name)
11426
11427     if self.op.remote_node is not None:
11428       for i in self.instances:
11429         if i.primary_node == self.op.remote_node:
11430           raise errors.OpPrereqError("Node %s is the primary node of"
11431                                      " instance %s, cannot use it as"
11432                                      " secondary" %
11433                                      (self.op.remote_node, i.name),
11434                                      errors.ECODE_INVAL)
11435
11436   def Exec(self, feedback_fn):
11437     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11438
11439     if not self.instance_names:
11440       # No instances to evacuate
11441       jobs = []
11442
11443     elif self.op.iallocator is not None:
11444       # TODO: Implement relocation to other group
11445       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11446                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11447                        instances=list(self.instance_names))
11448
11449       ial.Run(self.op.iallocator)
11450
11451       if not ial.success:
11452         raise errors.OpPrereqError("Can't compute node evacuation using"
11453                                    " iallocator '%s': %s" %
11454                                    (self.op.iallocator, ial.info),
11455                                    errors.ECODE_NORES)
11456
11457       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11458
11459     elif self.op.remote_node is not None:
11460       assert self.op.mode == constants.NODE_EVAC_SEC
11461       jobs = [
11462         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11463                                         remote_node=self.op.remote_node,
11464                                         disks=[],
11465                                         mode=constants.REPLACE_DISK_CHG,
11466                                         early_release=self.op.early_release)]
11467         for instance_name in self.instance_names
11468         ]
11469
11470     else:
11471       raise errors.ProgrammerError("No iallocator or remote node")
11472
11473     return ResultWithJobs(jobs)
11474
11475
11476 def _SetOpEarlyRelease(early_release, op):
11477   """Sets C{early_release} flag on opcodes if available.
11478
11479   """
11480   try:
11481     op.early_release = early_release
11482   except AttributeError:
11483     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11484
11485   return op
11486
11487
11488 def _NodeEvacDest(use_nodes, group, nodes):
11489   """Returns group or nodes depending on caller's choice.
11490
11491   """
11492   if use_nodes:
11493     return utils.CommaJoin(nodes)
11494   else:
11495     return group
11496
11497
11498 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11499   """Unpacks the result of change-group and node-evacuate iallocator requests.
11500
11501   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11502   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11503
11504   @type lu: L{LogicalUnit}
11505   @param lu: Logical unit instance
11506   @type alloc_result: tuple/list
11507   @param alloc_result: Result from iallocator
11508   @type early_release: bool
11509   @param early_release: Whether to release locks early if possible
11510   @type use_nodes: bool
11511   @param use_nodes: Whether to display node names instead of groups
11512
11513   """
11514   (moved, failed, jobs) = alloc_result
11515
11516   if failed:
11517     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11518                                  for (name, reason) in failed)
11519     lu.LogWarning("Unable to evacuate instances %s", failreason)
11520     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11521
11522   if moved:
11523     lu.LogInfo("Instances to be moved: %s",
11524                utils.CommaJoin("%s (to %s)" %
11525                                (name, _NodeEvacDest(use_nodes, group, nodes))
11526                                for (name, group, nodes) in moved))
11527
11528   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11529               map(opcodes.OpCode.LoadOpCode, ops))
11530           for ops in jobs]
11531
11532
11533 class LUInstanceGrowDisk(LogicalUnit):
11534   """Grow a disk of an instance.
11535
11536   """
11537   HPATH = "disk-grow"
11538   HTYPE = constants.HTYPE_INSTANCE
11539   REQ_BGL = False
11540
11541   def ExpandNames(self):
11542     self._ExpandAndLockInstance()
11543     self.needed_locks[locking.LEVEL_NODE] = []
11544     self.needed_locks[locking.LEVEL_NODE_RES] = []
11545     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11546     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11547
11548   def DeclareLocks(self, level):
11549     if level == locking.LEVEL_NODE:
11550       self._LockInstancesNodes()
11551     elif level == locking.LEVEL_NODE_RES:
11552       # Copy node locks
11553       self.needed_locks[locking.LEVEL_NODE_RES] = \
11554         self.needed_locks[locking.LEVEL_NODE][:]
11555
11556   def BuildHooksEnv(self):
11557     """Build hooks env.
11558
11559     This runs on the master, the primary and all the secondaries.
11560
11561     """
11562     env = {
11563       "DISK": self.op.disk,
11564       "AMOUNT": self.op.amount,
11565       "ABSOLUTE": self.op.absolute,
11566       }
11567     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11568     return env
11569
11570   def BuildHooksNodes(self):
11571     """Build hooks nodes.
11572
11573     """
11574     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11575     return (nl, nl)
11576
11577   def CheckPrereq(self):
11578     """Check prerequisites.
11579
11580     This checks that the instance is in the cluster.
11581
11582     """
11583     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11584     assert instance is not None, \
11585       "Cannot retrieve locked instance %s" % self.op.instance_name
11586     nodenames = list(instance.all_nodes)
11587     for node in nodenames:
11588       _CheckNodeOnline(self, node)
11589
11590     self.instance = instance
11591
11592     if instance.disk_template not in constants.DTS_GROWABLE:
11593       raise errors.OpPrereqError("Instance's disk layout does not support"
11594                                  " growing", errors.ECODE_INVAL)
11595
11596     self.disk = instance.FindDisk(self.op.disk)
11597
11598     if self.op.absolute:
11599       self.target = self.op.amount
11600       self.delta = self.target - self.disk.size
11601       if self.delta < 0:
11602         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11603                                    "current disk size (%s)" %
11604                                    (utils.FormatUnit(self.target, "h"),
11605                                     utils.FormatUnit(self.disk.size, "h")),
11606                                    errors.ECODE_STATE)
11607     else:
11608       self.delta = self.op.amount
11609       self.target = self.disk.size + self.delta
11610       if self.delta < 0:
11611         raise errors.OpPrereqError("Requested increment (%s) is negative" %
11612                                    utils.FormatUnit(self.delta, "h"),
11613                                    errors.ECODE_INVAL)
11614
11615     if instance.disk_template not in (constants.DT_FILE,
11616                                       constants.DT_SHARED_FILE,
11617                                       constants.DT_RBD):
11618       # TODO: check the free disk space for file, when that feature will be
11619       # supported
11620       _CheckNodesFreeDiskPerVG(self, nodenames,
11621                                self.disk.ComputeGrowth(self.delta))
11622
11623   def Exec(self, feedback_fn):
11624     """Execute disk grow.
11625
11626     """
11627     instance = self.instance
11628     disk = self.disk
11629
11630     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11631     assert (self.owned_locks(locking.LEVEL_NODE) ==
11632             self.owned_locks(locking.LEVEL_NODE_RES))
11633
11634     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11635     if not disks_ok:
11636       raise errors.OpExecError("Cannot activate block device to grow")
11637
11638     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11639                 (self.op.disk, instance.name,
11640                  utils.FormatUnit(self.delta, "h"),
11641                  utils.FormatUnit(self.target, "h")))
11642
11643     # First run all grow ops in dry-run mode
11644     for node in instance.all_nodes:
11645       self.cfg.SetDiskID(disk, node)
11646       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11647                                            True)
11648       result.Raise("Grow request failed to node %s" % node)
11649
11650     # We know that (as far as we can test) operations across different
11651     # nodes will succeed, time to run it for real
11652     for node in instance.all_nodes:
11653       self.cfg.SetDiskID(disk, node)
11654       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11655                                            False)
11656       result.Raise("Grow request failed to node %s" % node)
11657
11658       # TODO: Rewrite code to work properly
11659       # DRBD goes into sync mode for a short amount of time after executing the
11660       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11661       # calling "resize" in sync mode fails. Sleeping for a short amount of
11662       # time is a work-around.
11663       time.sleep(5)
11664
11665     disk.RecordGrow(self.delta)
11666     self.cfg.Update(instance, feedback_fn)
11667
11668     # Changes have been recorded, release node lock
11669     _ReleaseLocks(self, locking.LEVEL_NODE)
11670
11671     # Downgrade lock while waiting for sync
11672     self.glm.downgrade(locking.LEVEL_INSTANCE)
11673
11674     if self.op.wait_for_sync:
11675       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11676       if disk_abort:
11677         self.proc.LogWarning("Disk sync-ing has not returned a good"
11678                              " status; please check the instance")
11679       if instance.admin_state != constants.ADMINST_UP:
11680         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11681     elif instance.admin_state != constants.ADMINST_UP:
11682       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11683                            " not supposed to be running because no wait for"
11684                            " sync mode was requested")
11685
11686     assert self.owned_locks(locking.LEVEL_NODE_RES)
11687     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11688
11689
11690 class LUInstanceQueryData(NoHooksLU):
11691   """Query runtime instance data.
11692
11693   """
11694   REQ_BGL = False
11695
11696   def ExpandNames(self):
11697     self.needed_locks = {}
11698
11699     # Use locking if requested or when non-static information is wanted
11700     if not (self.op.static or self.op.use_locking):
11701       self.LogWarning("Non-static data requested, locks need to be acquired")
11702       self.op.use_locking = True
11703
11704     if self.op.instances or not self.op.use_locking:
11705       # Expand instance names right here
11706       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11707     else:
11708       # Will use acquired locks
11709       self.wanted_names = None
11710
11711     if self.op.use_locking:
11712       self.share_locks = _ShareAll()
11713
11714       if self.wanted_names is None:
11715         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11716       else:
11717         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11718
11719       self.needed_locks[locking.LEVEL_NODEGROUP] = []
11720       self.needed_locks[locking.LEVEL_NODE] = []
11721       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11722
11723   def DeclareLocks(self, level):
11724     if self.op.use_locking:
11725       if level == locking.LEVEL_NODEGROUP:
11726         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11727
11728         # Lock all groups used by instances optimistically; this requires going
11729         # via the node before it's locked, requiring verification later on
11730         self.needed_locks[locking.LEVEL_NODEGROUP] = \
11731           frozenset(group_uuid
11732                     for instance_name in owned_instances
11733                     for group_uuid in
11734                       self.cfg.GetInstanceNodeGroups(instance_name))
11735
11736       elif level == locking.LEVEL_NODE:
11737         self._LockInstancesNodes()
11738
11739   def CheckPrereq(self):
11740     """Check prerequisites.
11741
11742     This only checks the optional instance list against the existing names.
11743
11744     """
11745     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11746     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11747     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11748
11749     if self.wanted_names is None:
11750       assert self.op.use_locking, "Locking was not used"
11751       self.wanted_names = owned_instances
11752
11753     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11754
11755     if self.op.use_locking:
11756       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11757                                 None)
11758     else:
11759       assert not (owned_instances or owned_groups or owned_nodes)
11760
11761     self.wanted_instances = instances.values()
11762
11763   def _ComputeBlockdevStatus(self, node, instance, dev):
11764     """Returns the status of a block device
11765
11766     """
11767     if self.op.static or not node:
11768       return None
11769
11770     self.cfg.SetDiskID(dev, node)
11771
11772     result = self.rpc.call_blockdev_find(node, dev)
11773     if result.offline:
11774       return None
11775
11776     result.Raise("Can't compute disk status for %s" % instance.name)
11777
11778     status = result.payload
11779     if status is None:
11780       return None
11781
11782     return (status.dev_path, status.major, status.minor,
11783             status.sync_percent, status.estimated_time,
11784             status.is_degraded, status.ldisk_status)
11785
11786   def _ComputeDiskStatus(self, instance, snode, dev):
11787     """Compute block device status.
11788
11789     """
11790     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11791
11792     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11793
11794   def _ComputeDiskStatusInner(self, instance, snode, dev):
11795     """Compute block device status.
11796
11797     @attention: The device has to be annotated already.
11798
11799     """
11800     if dev.dev_type in constants.LDS_DRBD:
11801       # we change the snode then (otherwise we use the one passed in)
11802       if dev.logical_id[0] == instance.primary_node:
11803         snode = dev.logical_id[1]
11804       else:
11805         snode = dev.logical_id[0]
11806
11807     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11808                                               instance, dev)
11809     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11810
11811     if dev.children:
11812       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11813                                         instance, snode),
11814                          dev.children)
11815     else:
11816       dev_children = []
11817
11818     return {
11819       "iv_name": dev.iv_name,
11820       "dev_type": dev.dev_type,
11821       "logical_id": dev.logical_id,
11822       "physical_id": dev.physical_id,
11823       "pstatus": dev_pstatus,
11824       "sstatus": dev_sstatus,
11825       "children": dev_children,
11826       "mode": dev.mode,
11827       "size": dev.size,
11828       }
11829
11830   def Exec(self, feedback_fn):
11831     """Gather and return data"""
11832     result = {}
11833
11834     cluster = self.cfg.GetClusterInfo()
11835
11836     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11837     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11838
11839     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11840                                                  for node in nodes.values()))
11841
11842     group2name_fn = lambda uuid: groups[uuid].name
11843
11844     for instance in self.wanted_instances:
11845       pnode = nodes[instance.primary_node]
11846
11847       if self.op.static or pnode.offline:
11848         remote_state = None
11849         if pnode.offline:
11850           self.LogWarning("Primary node %s is marked offline, returning static"
11851                           " information only for instance %s" %
11852                           (pnode.name, instance.name))
11853       else:
11854         remote_info = self.rpc.call_instance_info(instance.primary_node,
11855                                                   instance.name,
11856                                                   instance.hypervisor)
11857         remote_info.Raise("Error checking node %s" % instance.primary_node)
11858         remote_info = remote_info.payload
11859         if remote_info and "state" in remote_info:
11860           remote_state = "up"
11861         else:
11862           if instance.admin_state == constants.ADMINST_UP:
11863             remote_state = "down"
11864           else:
11865             remote_state = instance.admin_state
11866
11867       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11868                   instance.disks)
11869
11870       snodes_group_uuids = [nodes[snode_name].group
11871                             for snode_name in instance.secondary_nodes]
11872
11873       result[instance.name] = {
11874         "name": instance.name,
11875         "config_state": instance.admin_state,
11876         "run_state": remote_state,
11877         "pnode": instance.primary_node,
11878         "pnode_group_uuid": pnode.group,
11879         "pnode_group_name": group2name_fn(pnode.group),
11880         "snodes": instance.secondary_nodes,
11881         "snodes_group_uuids": snodes_group_uuids,
11882         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11883         "os": instance.os,
11884         # this happens to be the same format used for hooks
11885         "nics": _NICListToTuple(self, instance.nics),
11886         "disk_template": instance.disk_template,
11887         "disks": disks,
11888         "hypervisor": instance.hypervisor,
11889         "network_port": instance.network_port,
11890         "hv_instance": instance.hvparams,
11891         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11892         "be_instance": instance.beparams,
11893         "be_actual": cluster.FillBE(instance),
11894         "os_instance": instance.osparams,
11895         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11896         "serial_no": instance.serial_no,
11897         "mtime": instance.mtime,
11898         "ctime": instance.ctime,
11899         "uuid": instance.uuid,
11900         }
11901
11902     return result
11903
11904
11905 def PrepareContainerMods(mods, private_fn):
11906   """Prepares a list of container modifications by adding a private data field.
11907
11908   @type mods: list of tuples; (operation, index, parameters)
11909   @param mods: List of modifications
11910   @type private_fn: callable or None
11911   @param private_fn: Callable for constructing a private data field for a
11912     modification
11913   @rtype: list
11914
11915   """
11916   if private_fn is None:
11917     fn = lambda: None
11918   else:
11919     fn = private_fn
11920
11921   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11922
11923
11924 #: Type description for changes as returned by L{ApplyContainerMods}'s
11925 #: callbacks
11926 _TApplyContModsCbChanges = \
11927   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11928     ht.TNonEmptyString,
11929     ht.TAny,
11930     ])))
11931
11932
11933 def ApplyContainerMods(kind, container, chgdesc, mods,
11934                        create_fn, modify_fn, remove_fn):
11935   """Applies descriptions in C{mods} to C{container}.
11936
11937   @type kind: string
11938   @param kind: One-word item description
11939   @type container: list
11940   @param container: Container to modify
11941   @type chgdesc: None or list
11942   @param chgdesc: List of applied changes
11943   @type mods: list
11944   @param mods: Modifications as returned by L{PrepareContainerMods}
11945   @type create_fn: callable
11946   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11947     receives absolute item index, parameters and private data object as added
11948     by L{PrepareContainerMods}, returns tuple containing new item and changes
11949     as list
11950   @type modify_fn: callable
11951   @param modify_fn: Callback for modifying an existing item
11952     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11953     and private data object as added by L{PrepareContainerMods}, returns
11954     changes as list
11955   @type remove_fn: callable
11956   @param remove_fn: Callback on removing item; receives absolute item index,
11957     item and private data object as added by L{PrepareContainerMods}
11958
11959   """
11960   for (op, idx, params, private) in mods:
11961     if idx == -1:
11962       # Append
11963       absidx = len(container) - 1
11964     elif idx < 0:
11965       raise IndexError("Not accepting negative indices other than -1")
11966     elif idx > len(container):
11967       raise IndexError("Got %s index %s, but there are only %s" %
11968                        (kind, idx, len(container)))
11969     else:
11970       absidx = idx
11971
11972     changes = None
11973
11974     if op == constants.DDM_ADD:
11975       # Calculate where item will be added
11976       if idx == -1:
11977         addidx = len(container)
11978       else:
11979         addidx = idx
11980
11981       if create_fn is None:
11982         item = params
11983       else:
11984         (item, changes) = create_fn(addidx, params, private)
11985
11986       if idx == -1:
11987         container.append(item)
11988       else:
11989         assert idx >= 0
11990         assert idx <= len(container)
11991         # list.insert does so before the specified index
11992         container.insert(idx, item)
11993     else:
11994       # Retrieve existing item
11995       try:
11996         item = container[absidx]
11997       except IndexError:
11998         raise IndexError("Invalid %s index %s" % (kind, idx))
11999
12000       if op == constants.DDM_REMOVE:
12001         assert not params
12002
12003         if remove_fn is not None:
12004           remove_fn(absidx, item, private)
12005
12006         changes = [("%s/%s" % (kind, absidx), "remove")]
12007
12008         assert container[absidx] == item
12009         del container[absidx]
12010       elif op == constants.DDM_MODIFY:
12011         if modify_fn is not None:
12012           changes = modify_fn(absidx, item, params, private)
12013       else:
12014         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12015
12016     assert _TApplyContModsCbChanges(changes)
12017
12018     if not (chgdesc is None or changes is None):
12019       chgdesc.extend(changes)
12020
12021
12022 def _UpdateIvNames(base_index, disks):
12023   """Updates the C{iv_name} attribute of disks.
12024
12025   @type disks: list of L{objects.Disk}
12026
12027   """
12028   for (idx, disk) in enumerate(disks):
12029     disk.iv_name = "disk/%s" % (base_index + idx, )
12030
12031
12032 class _InstNicModPrivate:
12033   """Data structure for network interface modifications.
12034
12035   Used by L{LUInstanceSetParams}.
12036
12037   """
12038   def __init__(self):
12039     self.params = None
12040     self.filled = None
12041
12042
12043 class LUInstanceSetParams(LogicalUnit):
12044   """Modifies an instances's parameters.
12045
12046   """
12047   HPATH = "instance-modify"
12048   HTYPE = constants.HTYPE_INSTANCE
12049   REQ_BGL = False
12050
12051   @staticmethod
12052   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12053     assert ht.TList(mods)
12054     assert not mods or len(mods[0]) in (2, 3)
12055
12056     if mods and len(mods[0]) == 2:
12057       result = []
12058
12059       addremove = 0
12060       for op, params in mods:
12061         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12062           result.append((op, -1, params))
12063           addremove += 1
12064
12065           if addremove > 1:
12066             raise errors.OpPrereqError("Only one %s add or remove operation is"
12067                                        " supported at a time" % kind,
12068                                        errors.ECODE_INVAL)
12069         else:
12070           result.append((constants.DDM_MODIFY, op, params))
12071
12072       assert verify_fn(result)
12073     else:
12074       result = mods
12075
12076     return result
12077
12078   @staticmethod
12079   def _CheckMods(kind, mods, key_types, item_fn):
12080     """Ensures requested disk/NIC modifications are valid.
12081
12082     """
12083     for (op, _, params) in mods:
12084       assert ht.TDict(params)
12085
12086       utils.ForceDictType(params, key_types)
12087
12088       if op == constants.DDM_REMOVE:
12089         if params:
12090           raise errors.OpPrereqError("No settings should be passed when"
12091                                      " removing a %s" % kind,
12092                                      errors.ECODE_INVAL)
12093       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12094         item_fn(op, params)
12095       else:
12096         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12097
12098   @staticmethod
12099   def _VerifyDiskModification(op, params):
12100     """Verifies a disk modification.
12101
12102     """
12103     if op == constants.DDM_ADD:
12104       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12105       if mode not in constants.DISK_ACCESS_SET:
12106         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12107                                    errors.ECODE_INVAL)
12108
12109       size = params.get(constants.IDISK_SIZE, None)
12110       if size is None:
12111         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12112                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12113
12114       try:
12115         size = int(size)
12116       except (TypeError, ValueError), err:
12117         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12118                                    errors.ECODE_INVAL)
12119
12120       params[constants.IDISK_SIZE] = size
12121
12122     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12123       raise errors.OpPrereqError("Disk size change not possible, use"
12124                                  " grow-disk", errors.ECODE_INVAL)
12125
12126   @staticmethod
12127   def _VerifyNicModification(op, params):
12128     """Verifies a network interface modification.
12129
12130     """
12131     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12132       ip = params.get(constants.INIC_IP, None)
12133       if ip is None:
12134         pass
12135       elif ip.lower() == constants.VALUE_NONE:
12136         params[constants.INIC_IP] = None
12137       elif not netutils.IPAddress.IsValid(ip):
12138         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12139                                    errors.ECODE_INVAL)
12140
12141       bridge = params.get("bridge", None)
12142       link = params.get(constants.INIC_LINK, None)
12143       if bridge and link:
12144         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12145                                    " at the same time", errors.ECODE_INVAL)
12146       elif bridge and bridge.lower() == constants.VALUE_NONE:
12147         params["bridge"] = None
12148       elif link and link.lower() == constants.VALUE_NONE:
12149         params[constants.INIC_LINK] = None
12150
12151       if op == constants.DDM_ADD:
12152         macaddr = params.get(constants.INIC_MAC, None)
12153         if macaddr is None:
12154           params[constants.INIC_MAC] = constants.VALUE_AUTO
12155
12156       if constants.INIC_MAC in params:
12157         macaddr = params[constants.INIC_MAC]
12158         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12159           macaddr = utils.NormalizeAndValidateMac(macaddr)
12160
12161         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12162           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12163                                      " modifying an existing NIC",
12164                                      errors.ECODE_INVAL)
12165
12166   def CheckArguments(self):
12167     if not (self.op.nics or self.op.disks or self.op.disk_template or
12168             self.op.hvparams or self.op.beparams or self.op.os_name or
12169             self.op.offline is not None or self.op.runtime_mem):
12170       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12171
12172     if self.op.hvparams:
12173       _CheckGlobalHvParams(self.op.hvparams)
12174
12175     self.op.disks = \
12176       self._UpgradeDiskNicMods("disk", self.op.disks,
12177         opcodes.OpInstanceSetParams.TestDiskModifications)
12178     self.op.nics = \
12179       self._UpgradeDiskNicMods("NIC", self.op.nics,
12180         opcodes.OpInstanceSetParams.TestNicModifications)
12181
12182     # Check disk modifications
12183     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12184                     self._VerifyDiskModification)
12185
12186     if self.op.disks and self.op.disk_template is not None:
12187       raise errors.OpPrereqError("Disk template conversion and other disk"
12188                                  " changes not supported at the same time",
12189                                  errors.ECODE_INVAL)
12190
12191     if (self.op.disk_template and
12192         self.op.disk_template in constants.DTS_INT_MIRROR and
12193         self.op.remote_node is None):
12194       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12195                                  " one requires specifying a secondary node",
12196                                  errors.ECODE_INVAL)
12197
12198     # Check NIC modifications
12199     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12200                     self._VerifyNicModification)
12201
12202   def ExpandNames(self):
12203     self._ExpandAndLockInstance()
12204     # Can't even acquire node locks in shared mode as upcoming changes in
12205     # Ganeti 2.6 will start to modify the node object on disk conversion
12206     self.needed_locks[locking.LEVEL_NODE] = []
12207     self.needed_locks[locking.LEVEL_NODE_RES] = []
12208     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12209
12210   def DeclareLocks(self, level):
12211     # TODO: Acquire group lock in shared mode (disk parameters)
12212     if level == locking.LEVEL_NODE:
12213       self._LockInstancesNodes()
12214       if self.op.disk_template and self.op.remote_node:
12215         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12216         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12217     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12218       # Copy node locks
12219       self.needed_locks[locking.LEVEL_NODE_RES] = \
12220         self.needed_locks[locking.LEVEL_NODE][:]
12221
12222   def BuildHooksEnv(self):
12223     """Build hooks env.
12224
12225     This runs on the master, primary and secondaries.
12226
12227     """
12228     args = dict()
12229     if constants.BE_MINMEM in self.be_new:
12230       args["minmem"] = self.be_new[constants.BE_MINMEM]
12231     if constants.BE_MAXMEM in self.be_new:
12232       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12233     if constants.BE_VCPUS in self.be_new:
12234       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12235     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12236     # information at all.
12237
12238     if self._new_nics is not None:
12239       nics = []
12240
12241       for nic in self._new_nics:
12242         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12243         mode = nicparams[constants.NIC_MODE]
12244         link = nicparams[constants.NIC_LINK]
12245         nics.append((nic.ip, nic.mac, mode, link))
12246
12247       args["nics"] = nics
12248
12249     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12250     if self.op.disk_template:
12251       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12252     if self.op.runtime_mem:
12253       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12254
12255     return env
12256
12257   def BuildHooksNodes(self):
12258     """Build hooks nodes.
12259
12260     """
12261     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12262     return (nl, nl)
12263
12264   def _PrepareNicModification(self, params, private, old_ip, old_params,
12265                               cluster, pnode):
12266     update_params_dict = dict([(key, params[key])
12267                                for key in constants.NICS_PARAMETERS
12268                                if key in params])
12269
12270     if "bridge" in params:
12271       update_params_dict[constants.NIC_LINK] = params["bridge"]
12272
12273     new_params = _GetUpdatedParams(old_params, update_params_dict)
12274     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12275
12276     new_filled_params = cluster.SimpleFillNIC(new_params)
12277     objects.NIC.CheckParameterSyntax(new_filled_params)
12278
12279     new_mode = new_filled_params[constants.NIC_MODE]
12280     if new_mode == constants.NIC_MODE_BRIDGED:
12281       bridge = new_filled_params[constants.NIC_LINK]
12282       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12283       if msg:
12284         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12285         if self.op.force:
12286           self.warn.append(msg)
12287         else:
12288           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12289
12290     elif new_mode == constants.NIC_MODE_ROUTED:
12291       ip = params.get(constants.INIC_IP, old_ip)
12292       if ip is None:
12293         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12294                                    " on a routed NIC", errors.ECODE_INVAL)
12295
12296     if constants.INIC_MAC in params:
12297       mac = params[constants.INIC_MAC]
12298       if mac is None:
12299         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12300                                    errors.ECODE_INVAL)
12301       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12302         # otherwise generate the MAC address
12303         params[constants.INIC_MAC] = \
12304           self.cfg.GenerateMAC(self.proc.GetECId())
12305       else:
12306         # or validate/reserve the current one
12307         try:
12308           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12309         except errors.ReservationError:
12310           raise errors.OpPrereqError("MAC address '%s' already in use"
12311                                      " in cluster" % mac,
12312                                      errors.ECODE_NOTUNIQUE)
12313
12314     private.params = new_params
12315     private.filled = new_filled_params
12316
12317     return (None, None)
12318
12319   def CheckPrereq(self):
12320     """Check prerequisites.
12321
12322     This only checks the instance list against the existing names.
12323
12324     """
12325     # checking the new params on the primary/secondary nodes
12326
12327     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12328     cluster = self.cluster = self.cfg.GetClusterInfo()
12329     assert self.instance is not None, \
12330       "Cannot retrieve locked instance %s" % self.op.instance_name
12331     pnode = instance.primary_node
12332     nodelist = list(instance.all_nodes)
12333     pnode_info = self.cfg.GetNodeInfo(pnode)
12334     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12335
12336     # Prepare disk/NIC modifications
12337     self.diskmod = PrepareContainerMods(self.op.disks, None)
12338     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12339
12340     # OS change
12341     if self.op.os_name and not self.op.force:
12342       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12343                       self.op.force_variant)
12344       instance_os = self.op.os_name
12345     else:
12346       instance_os = instance.os
12347
12348     assert not (self.op.disk_template and self.op.disks), \
12349       "Can't modify disk template and apply disk changes at the same time"
12350
12351     if self.op.disk_template:
12352       if instance.disk_template == self.op.disk_template:
12353         raise errors.OpPrereqError("Instance already has disk template %s" %
12354                                    instance.disk_template, errors.ECODE_INVAL)
12355
12356       if (instance.disk_template,
12357           self.op.disk_template) not in self._DISK_CONVERSIONS:
12358         raise errors.OpPrereqError("Unsupported disk template conversion from"
12359                                    " %s to %s" % (instance.disk_template,
12360                                                   self.op.disk_template),
12361                                    errors.ECODE_INVAL)
12362       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12363                           msg="cannot change disk template")
12364       if self.op.disk_template in constants.DTS_INT_MIRROR:
12365         if self.op.remote_node == pnode:
12366           raise errors.OpPrereqError("Given new secondary node %s is the same"
12367                                      " as the primary node of the instance" %
12368                                      self.op.remote_node, errors.ECODE_STATE)
12369         _CheckNodeOnline(self, self.op.remote_node)
12370         _CheckNodeNotDrained(self, self.op.remote_node)
12371         # FIXME: here we assume that the old instance type is DT_PLAIN
12372         assert instance.disk_template == constants.DT_PLAIN
12373         disks = [{constants.IDISK_SIZE: d.size,
12374                   constants.IDISK_VG: d.logical_id[0]}
12375                  for d in instance.disks]
12376         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12377         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12378
12379         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12380         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12381         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12382         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12383                                 ignore=self.op.ignore_ipolicy)
12384         if pnode_info.group != snode_info.group:
12385           self.LogWarning("The primary and secondary nodes are in two"
12386                           " different node groups; the disk parameters"
12387                           " from the first disk's node group will be"
12388                           " used")
12389
12390     # hvparams processing
12391     if self.op.hvparams:
12392       hv_type = instance.hypervisor
12393       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12394       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12395       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12396
12397       # local check
12398       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12399       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12400       self.hv_proposed = self.hv_new = hv_new # the new actual values
12401       self.hv_inst = i_hvdict # the new dict (without defaults)
12402     else:
12403       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12404                                               instance.hvparams)
12405       self.hv_new = self.hv_inst = {}
12406
12407     # beparams processing
12408     if self.op.beparams:
12409       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12410                                    use_none=True)
12411       objects.UpgradeBeParams(i_bedict)
12412       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12413       be_new = cluster.SimpleFillBE(i_bedict)
12414       self.be_proposed = self.be_new = be_new # the new actual values
12415       self.be_inst = i_bedict # the new dict (without defaults)
12416     else:
12417       self.be_new = self.be_inst = {}
12418       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12419     be_old = cluster.FillBE(instance)
12420
12421     # CPU param validation -- checking every time a paramtere is
12422     # changed to cover all cases where either CPU mask or vcpus have
12423     # changed
12424     if (constants.BE_VCPUS in self.be_proposed and
12425         constants.HV_CPU_MASK in self.hv_proposed):
12426       cpu_list = \
12427         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12428       # Verify mask is consistent with number of vCPUs. Can skip this
12429       # test if only 1 entry in the CPU mask, which means same mask
12430       # is applied to all vCPUs.
12431       if (len(cpu_list) > 1 and
12432           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12433         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12434                                    " CPU mask [%s]" %
12435                                    (self.be_proposed[constants.BE_VCPUS],
12436                                     self.hv_proposed[constants.HV_CPU_MASK]),
12437                                    errors.ECODE_INVAL)
12438
12439       # Only perform this test if a new CPU mask is given
12440       if constants.HV_CPU_MASK in self.hv_new:
12441         # Calculate the largest CPU number requested
12442         max_requested_cpu = max(map(max, cpu_list))
12443         # Check that all of the instance's nodes have enough physical CPUs to
12444         # satisfy the requested CPU mask
12445         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12446                                 max_requested_cpu + 1, instance.hypervisor)
12447
12448     # osparams processing
12449     if self.op.osparams:
12450       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12451       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12452       self.os_inst = i_osdict # the new dict (without defaults)
12453     else:
12454       self.os_inst = {}
12455
12456     self.warn = []
12457
12458     #TODO(dynmem): do the appropriate check involving MINMEM
12459     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12460         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12461       mem_check_list = [pnode]
12462       if be_new[constants.BE_AUTO_BALANCE]:
12463         # either we changed auto_balance to yes or it was from before
12464         mem_check_list.extend(instance.secondary_nodes)
12465       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12466                                                   instance.hypervisor)
12467       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12468                                          [instance.hypervisor])
12469       pninfo = nodeinfo[pnode]
12470       msg = pninfo.fail_msg
12471       if msg:
12472         # Assume the primary node is unreachable and go ahead
12473         self.warn.append("Can't get info from primary node %s: %s" %
12474                          (pnode, msg))
12475       else:
12476         (_, _, (pnhvinfo, )) = pninfo.payload
12477         if not isinstance(pnhvinfo.get("memory_free", None), int):
12478           self.warn.append("Node data from primary node %s doesn't contain"
12479                            " free memory information" % pnode)
12480         elif instance_info.fail_msg:
12481           self.warn.append("Can't get instance runtime information: %s" %
12482                           instance_info.fail_msg)
12483         else:
12484           if instance_info.payload:
12485             current_mem = int(instance_info.payload["memory"])
12486           else:
12487             # Assume instance not running
12488             # (there is a slight race condition here, but it's not very
12489             # probable, and we have no other way to check)
12490             # TODO: Describe race condition
12491             current_mem = 0
12492           #TODO(dynmem): do the appropriate check involving MINMEM
12493           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12494                       pnhvinfo["memory_free"])
12495           if miss_mem > 0:
12496             raise errors.OpPrereqError("This change will prevent the instance"
12497                                        " from starting, due to %d MB of memory"
12498                                        " missing on its primary node" %
12499                                        miss_mem,
12500                                        errors.ECODE_NORES)
12501
12502       if be_new[constants.BE_AUTO_BALANCE]:
12503         for node, nres in nodeinfo.items():
12504           if node not in instance.secondary_nodes:
12505             continue
12506           nres.Raise("Can't get info from secondary node %s" % node,
12507                      prereq=True, ecode=errors.ECODE_STATE)
12508           (_, _, (nhvinfo, )) = nres.payload
12509           if not isinstance(nhvinfo.get("memory_free", None), int):
12510             raise errors.OpPrereqError("Secondary node %s didn't return free"
12511                                        " memory information" % node,
12512                                        errors.ECODE_STATE)
12513           #TODO(dynmem): do the appropriate check involving MINMEM
12514           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12515             raise errors.OpPrereqError("This change will prevent the instance"
12516                                        " from failover to its secondary node"
12517                                        " %s, due to not enough memory" % node,
12518                                        errors.ECODE_STATE)
12519
12520     if self.op.runtime_mem:
12521       remote_info = self.rpc.call_instance_info(instance.primary_node,
12522                                                 instance.name,
12523                                                 instance.hypervisor)
12524       remote_info.Raise("Error checking node %s" % instance.primary_node)
12525       if not remote_info.payload: # not running already
12526         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12527                                    errors.ECODE_STATE)
12528
12529       current_memory = remote_info.payload["memory"]
12530       if (not self.op.force and
12531            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12532             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12533         raise errors.OpPrereqError("Instance %s must have memory between %d"
12534                                    " and %d MB of memory unless --force is"
12535                                    " given" % (instance.name,
12536                                     self.be_proposed[constants.BE_MINMEM],
12537                                     self.be_proposed[constants.BE_MAXMEM]),
12538                                    errors.ECODE_INVAL)
12539
12540       if self.op.runtime_mem > current_memory:
12541         _CheckNodeFreeMemory(self, instance.primary_node,
12542                              "ballooning memory for instance %s" %
12543                              instance.name,
12544                              self.op.memory - current_memory,
12545                              instance.hypervisor)
12546
12547     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12548       raise errors.OpPrereqError("Disk operations not supported for"
12549                                  " diskless instances",
12550                                  errors.ECODE_INVAL)
12551
12552     def _PrepareNicCreate(_, params, private):
12553       return self._PrepareNicModification(params, private, None, {},
12554                                           cluster, pnode)
12555
12556     def _PrepareNicMod(_, nic, params, private):
12557       return self._PrepareNicModification(params, private, nic.ip,
12558                                           nic.nicparams, cluster, pnode)
12559
12560     # Verify NIC changes (operating on copy)
12561     nics = instance.nics[:]
12562     ApplyContainerMods("NIC", nics, None, self.nicmod,
12563                        _PrepareNicCreate, _PrepareNicMod, None)
12564     if len(nics) > constants.MAX_NICS:
12565       raise errors.OpPrereqError("Instance has too many network interfaces"
12566                                  " (%d), cannot add more" % constants.MAX_NICS,
12567                                  errors.ECODE_STATE)
12568
12569     # Verify disk changes (operating on a copy)
12570     disks = instance.disks[:]
12571     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12572     if len(disks) > constants.MAX_DISKS:
12573       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12574                                  " more" % constants.MAX_DISKS,
12575                                  errors.ECODE_STATE)
12576
12577     if self.op.offline is not None:
12578       if self.op.offline:
12579         msg = "can't change to offline"
12580       else:
12581         msg = "can't change to online"
12582       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12583
12584     # Pre-compute NIC changes (necessary to use result in hooks)
12585     self._nic_chgdesc = []
12586     if self.nicmod:
12587       # Operate on copies as this is still in prereq
12588       nics = [nic.Copy() for nic in instance.nics]
12589       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12590                          self._CreateNewNic, self._ApplyNicMods, None)
12591       self._new_nics = nics
12592     else:
12593       self._new_nics = None
12594
12595   def _ConvertPlainToDrbd(self, feedback_fn):
12596     """Converts an instance from plain to drbd.
12597
12598     """
12599     feedback_fn("Converting template to drbd")
12600     instance = self.instance
12601     pnode = instance.primary_node
12602     snode = self.op.remote_node
12603
12604     assert instance.disk_template == constants.DT_PLAIN
12605
12606     # create a fake disk info for _GenerateDiskTemplate
12607     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12608                   constants.IDISK_VG: d.logical_id[0]}
12609                  for d in instance.disks]
12610     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12611                                       instance.name, pnode, [snode],
12612                                       disk_info, None, None, 0, feedback_fn,
12613                                       self.diskparams)
12614     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12615                                         self.diskparams)
12616     info = _GetInstanceInfoText(instance)
12617     feedback_fn("Creating additional volumes...")
12618     # first, create the missing data and meta devices
12619     for disk in anno_disks:
12620       # unfortunately this is... not too nice
12621       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12622                             info, True)
12623       for child in disk.children:
12624         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12625     # at this stage, all new LVs have been created, we can rename the
12626     # old ones
12627     feedback_fn("Renaming original volumes...")
12628     rename_list = [(o, n.children[0].logical_id)
12629                    for (o, n) in zip(instance.disks, new_disks)]
12630     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12631     result.Raise("Failed to rename original LVs")
12632
12633     feedback_fn("Initializing DRBD devices...")
12634     # all child devices are in place, we can now create the DRBD devices
12635     for disk in anno_disks:
12636       for node in [pnode, snode]:
12637         f_create = node == pnode
12638         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12639
12640     # at this point, the instance has been modified
12641     instance.disk_template = constants.DT_DRBD8
12642     instance.disks = new_disks
12643     self.cfg.Update(instance, feedback_fn)
12644
12645     # Release node locks while waiting for sync
12646     _ReleaseLocks(self, locking.LEVEL_NODE)
12647
12648     # disks are created, waiting for sync
12649     disk_abort = not _WaitForSync(self, instance,
12650                                   oneshot=not self.op.wait_for_sync)
12651     if disk_abort:
12652       raise errors.OpExecError("There are some degraded disks for"
12653                                " this instance, please cleanup manually")
12654
12655     # Node resource locks will be released by caller
12656
12657   def _ConvertDrbdToPlain(self, feedback_fn):
12658     """Converts an instance from drbd to plain.
12659
12660     """
12661     instance = self.instance
12662
12663     assert len(instance.secondary_nodes) == 1
12664     assert instance.disk_template == constants.DT_DRBD8
12665
12666     pnode = instance.primary_node
12667     snode = instance.secondary_nodes[0]
12668     feedback_fn("Converting template to plain")
12669
12670     old_disks = instance.disks
12671     new_disks = [d.children[0] for d in old_disks]
12672
12673     # copy over size and mode
12674     for parent, child in zip(old_disks, new_disks):
12675       child.size = parent.size
12676       child.mode = parent.mode
12677
12678     # this is a DRBD disk, return its port to the pool
12679     # NOTE: this must be done right before the call to cfg.Update!
12680     for disk in old_disks:
12681       tcp_port = disk.logical_id[2]
12682       self.cfg.AddTcpUdpPort(tcp_port)
12683
12684     # update instance structure
12685     instance.disks = new_disks
12686     instance.disk_template = constants.DT_PLAIN
12687     self.cfg.Update(instance, feedback_fn)
12688
12689     # Release locks in case removing disks takes a while
12690     _ReleaseLocks(self, locking.LEVEL_NODE)
12691
12692     feedback_fn("Removing volumes on the secondary node...")
12693     for disk in old_disks:
12694       self.cfg.SetDiskID(disk, snode)
12695       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12696       if msg:
12697         self.LogWarning("Could not remove block device %s on node %s,"
12698                         " continuing anyway: %s", disk.iv_name, snode, msg)
12699
12700     feedback_fn("Removing unneeded volumes on the primary node...")
12701     for idx, disk in enumerate(old_disks):
12702       meta = disk.children[1]
12703       self.cfg.SetDiskID(meta, pnode)
12704       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12705       if msg:
12706         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12707                         " continuing anyway: %s", idx, pnode, msg)
12708
12709   def _CreateNewDisk(self, idx, params, _):
12710     """Creates a new disk.
12711
12712     """
12713     instance = self.instance
12714
12715     # add a new disk
12716     if instance.disk_template in constants.DTS_FILEBASED:
12717       (file_driver, file_path) = instance.disks[0].logical_id
12718       file_path = os.path.dirname(file_path)
12719     else:
12720       file_driver = file_path = None
12721
12722     disk = \
12723       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12724                             instance.primary_node, instance.secondary_nodes,
12725                             [params], file_path, file_driver, idx,
12726                             self.Log, self.diskparams)[0]
12727
12728     info = _GetInstanceInfoText(instance)
12729
12730     logging.info("Creating volume %s for instance %s",
12731                  disk.iv_name, instance.name)
12732     # Note: this needs to be kept in sync with _CreateDisks
12733     #HARDCODE
12734     for node in instance.all_nodes:
12735       f_create = (node == instance.primary_node)
12736       try:
12737         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12738       except errors.OpExecError, err:
12739         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12740                         disk.iv_name, disk, node, err)
12741
12742     return (disk, [
12743       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12744       ])
12745
12746   @staticmethod
12747   def _ModifyDisk(idx, disk, params, _):
12748     """Modifies a disk.
12749
12750     """
12751     disk.mode = params[constants.IDISK_MODE]
12752
12753     return [
12754       ("disk.mode/%d" % idx, disk.mode),
12755       ]
12756
12757   def _RemoveDisk(self, idx, root, _):
12758     """Removes a disk.
12759
12760     """
12761     for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12762       self.cfg.SetDiskID(disk, node)
12763       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12764       if msg:
12765         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12766                         " continuing anyway", idx, node, msg)
12767
12768     # if this is a DRBD disk, return its port to the pool
12769     if root.dev_type in constants.LDS_DRBD:
12770       self.cfg.AddTcpUdpPort(root.logical_id[2])
12771
12772   @staticmethod
12773   def _CreateNewNic(idx, params, private):
12774     """Creates data structure for a new network interface.
12775
12776     """
12777     mac = params[constants.INIC_MAC]
12778     ip = params.get(constants.INIC_IP, None)
12779     nicparams = private.params
12780
12781     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12782       ("nic.%d" % idx,
12783        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12784        (mac, ip, private.filled[constants.NIC_MODE],
12785        private.filled[constants.NIC_LINK])),
12786       ])
12787
12788   @staticmethod
12789   def _ApplyNicMods(idx, nic, params, private):
12790     """Modifies a network interface.
12791
12792     """
12793     changes = []
12794
12795     for key in [constants.INIC_MAC, constants.INIC_IP]:
12796       if key in params:
12797         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12798         setattr(nic, key, params[key])
12799
12800     if private.params:
12801       nic.nicparams = private.params
12802
12803       for (key, val) in params.items():
12804         changes.append(("nic.%s/%d" % (key, idx), val))
12805
12806     return changes
12807
12808   def Exec(self, feedback_fn):
12809     """Modifies an instance.
12810
12811     All parameters take effect only at the next restart of the instance.
12812
12813     """
12814     # Process here the warnings from CheckPrereq, as we don't have a
12815     # feedback_fn there.
12816     # TODO: Replace with self.LogWarning
12817     for warn in self.warn:
12818       feedback_fn("WARNING: %s" % warn)
12819
12820     assert ((self.op.disk_template is None) ^
12821             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12822       "Not owning any node resource locks"
12823
12824     result = []
12825     instance = self.instance
12826
12827     # runtime memory
12828     if self.op.runtime_mem:
12829       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12830                                                      instance,
12831                                                      self.op.runtime_mem)
12832       rpcres.Raise("Cannot modify instance runtime memory")
12833       result.append(("runtime_memory", self.op.runtime_mem))
12834
12835     # Apply disk changes
12836     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12837                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12838     _UpdateIvNames(0, instance.disks)
12839
12840     if self.op.disk_template:
12841       if __debug__:
12842         check_nodes = set(instance.all_nodes)
12843         if self.op.remote_node:
12844           check_nodes.add(self.op.remote_node)
12845         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12846           owned = self.owned_locks(level)
12847           assert not (check_nodes - owned), \
12848             ("Not owning the correct locks, owning %r, expected at least %r" %
12849              (owned, check_nodes))
12850
12851       r_shut = _ShutdownInstanceDisks(self, instance)
12852       if not r_shut:
12853         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12854                                  " proceed with disk template conversion")
12855       mode = (instance.disk_template, self.op.disk_template)
12856       try:
12857         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12858       except:
12859         self.cfg.ReleaseDRBDMinors(instance.name)
12860         raise
12861       result.append(("disk_template", self.op.disk_template))
12862
12863       assert instance.disk_template == self.op.disk_template, \
12864         ("Expected disk template '%s', found '%s'" %
12865          (self.op.disk_template, instance.disk_template))
12866
12867     # Release node and resource locks if there are any (they might already have
12868     # been released during disk conversion)
12869     _ReleaseLocks(self, locking.LEVEL_NODE)
12870     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12871
12872     # Apply NIC changes
12873     if self._new_nics is not None:
12874       instance.nics = self._new_nics
12875       result.extend(self._nic_chgdesc)
12876
12877     # hvparams changes
12878     if self.op.hvparams:
12879       instance.hvparams = self.hv_inst
12880       for key, val in self.op.hvparams.iteritems():
12881         result.append(("hv/%s" % key, val))
12882
12883     # beparams changes
12884     if self.op.beparams:
12885       instance.beparams = self.be_inst
12886       for key, val in self.op.beparams.iteritems():
12887         result.append(("be/%s" % key, val))
12888
12889     # OS change
12890     if self.op.os_name:
12891       instance.os = self.op.os_name
12892
12893     # osparams changes
12894     if self.op.osparams:
12895       instance.osparams = self.os_inst
12896       for key, val in self.op.osparams.iteritems():
12897         result.append(("os/%s" % key, val))
12898
12899     if self.op.offline is None:
12900       # Ignore
12901       pass
12902     elif self.op.offline:
12903       # Mark instance as offline
12904       self.cfg.MarkInstanceOffline(instance.name)
12905       result.append(("admin_state", constants.ADMINST_OFFLINE))
12906     else:
12907       # Mark instance as online, but stopped
12908       self.cfg.MarkInstanceDown(instance.name)
12909       result.append(("admin_state", constants.ADMINST_DOWN))
12910
12911     self.cfg.Update(instance, feedback_fn)
12912
12913     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12914                 self.owned_locks(locking.LEVEL_NODE)), \
12915       "All node locks should have been released by now"
12916
12917     return result
12918
12919   _DISK_CONVERSIONS = {
12920     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12921     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12922     }
12923
12924
12925 class LUInstanceChangeGroup(LogicalUnit):
12926   HPATH = "instance-change-group"
12927   HTYPE = constants.HTYPE_INSTANCE
12928   REQ_BGL = False
12929
12930   def ExpandNames(self):
12931     self.share_locks = _ShareAll()
12932     self.needed_locks = {
12933       locking.LEVEL_NODEGROUP: [],
12934       locking.LEVEL_NODE: [],
12935       }
12936
12937     self._ExpandAndLockInstance()
12938
12939     if self.op.target_groups:
12940       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12941                                   self.op.target_groups)
12942     else:
12943       self.req_target_uuids = None
12944
12945     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12946
12947   def DeclareLocks(self, level):
12948     if level == locking.LEVEL_NODEGROUP:
12949       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12950
12951       if self.req_target_uuids:
12952         lock_groups = set(self.req_target_uuids)
12953
12954         # Lock all groups used by instance optimistically; this requires going
12955         # via the node before it's locked, requiring verification later on
12956         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12957         lock_groups.update(instance_groups)
12958       else:
12959         # No target groups, need to lock all of them
12960         lock_groups = locking.ALL_SET
12961
12962       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12963
12964     elif level == locking.LEVEL_NODE:
12965       if self.req_target_uuids:
12966         # Lock all nodes used by instances
12967         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12968         self._LockInstancesNodes()
12969
12970         # Lock all nodes in all potential target groups
12971         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12972                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12973         member_nodes = [node_name
12974                         for group in lock_groups
12975                         for node_name in self.cfg.GetNodeGroup(group).members]
12976         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12977       else:
12978         # Lock all nodes as all groups are potential targets
12979         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12980
12981   def CheckPrereq(self):
12982     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12983     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12984     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12985
12986     assert (self.req_target_uuids is None or
12987             owned_groups.issuperset(self.req_target_uuids))
12988     assert owned_instances == set([self.op.instance_name])
12989
12990     # Get instance information
12991     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12992
12993     # Check if node groups for locked instance are still correct
12994     assert owned_nodes.issuperset(self.instance.all_nodes), \
12995       ("Instance %s's nodes changed while we kept the lock" %
12996        self.op.instance_name)
12997
12998     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12999                                            owned_groups)
13000
13001     if self.req_target_uuids:
13002       # User requested specific target groups
13003       self.target_uuids = frozenset(self.req_target_uuids)
13004     else:
13005       # All groups except those used by the instance are potential targets
13006       self.target_uuids = owned_groups - inst_groups
13007
13008     conflicting_groups = self.target_uuids & inst_groups
13009     if conflicting_groups:
13010       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13011                                  " used by the instance '%s'" %
13012                                  (utils.CommaJoin(conflicting_groups),
13013                                   self.op.instance_name),
13014                                  errors.ECODE_INVAL)
13015
13016     if not self.target_uuids:
13017       raise errors.OpPrereqError("There are no possible target groups",
13018                                  errors.ECODE_INVAL)
13019
13020   def BuildHooksEnv(self):
13021     """Build hooks env.
13022
13023     """
13024     assert self.target_uuids
13025
13026     env = {
13027       "TARGET_GROUPS": " ".join(self.target_uuids),
13028       }
13029
13030     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13031
13032     return env
13033
13034   def BuildHooksNodes(self):
13035     """Build hooks nodes.
13036
13037     """
13038     mn = self.cfg.GetMasterNode()
13039     return ([mn], [mn])
13040
13041   def Exec(self, feedback_fn):
13042     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13043
13044     assert instances == [self.op.instance_name], "Instance not locked"
13045
13046     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13047                      instances=instances, target_groups=list(self.target_uuids))
13048
13049     ial.Run(self.op.iallocator)
13050
13051     if not ial.success:
13052       raise errors.OpPrereqError("Can't compute solution for changing group of"
13053                                  " instance '%s' using iallocator '%s': %s" %
13054                                  (self.op.instance_name, self.op.iallocator,
13055                                   ial.info),
13056                                  errors.ECODE_NORES)
13057
13058     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13059
13060     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13061                  " instance '%s'", len(jobs), self.op.instance_name)
13062
13063     return ResultWithJobs(jobs)
13064
13065
13066 class LUBackupQuery(NoHooksLU):
13067   """Query the exports list
13068
13069   """
13070   REQ_BGL = False
13071
13072   def CheckArguments(self):
13073     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13074                              ["node", "export"], self.op.use_locking)
13075
13076   def ExpandNames(self):
13077     self.expq.ExpandNames(self)
13078
13079   def DeclareLocks(self, level):
13080     self.expq.DeclareLocks(self, level)
13081
13082   def Exec(self, feedback_fn):
13083     result = {}
13084
13085     for (node, expname) in self.expq.OldStyleQuery(self):
13086       if expname is None:
13087         result[node] = False
13088       else:
13089         result.setdefault(node, []).append(expname)
13090
13091     return result
13092
13093
13094 class _ExportQuery(_QueryBase):
13095   FIELDS = query.EXPORT_FIELDS
13096
13097   #: The node name is not a unique key for this query
13098   SORT_FIELD = "node"
13099
13100   def ExpandNames(self, lu):
13101     lu.needed_locks = {}
13102
13103     # The following variables interact with _QueryBase._GetNames
13104     if self.names:
13105       self.wanted = _GetWantedNodes(lu, self.names)
13106     else:
13107       self.wanted = locking.ALL_SET
13108
13109     self.do_locking = self.use_locking
13110
13111     if self.do_locking:
13112       lu.share_locks = _ShareAll()
13113       lu.needed_locks = {
13114         locking.LEVEL_NODE: self.wanted,
13115         }
13116
13117   def DeclareLocks(self, lu, level):
13118     pass
13119
13120   def _GetQueryData(self, lu):
13121     """Computes the list of nodes and their attributes.
13122
13123     """
13124     # Locking is not used
13125     # TODO
13126     assert not (compat.any(lu.glm.is_owned(level)
13127                            for level in locking.LEVELS
13128                            if level != locking.LEVEL_CLUSTER) or
13129                 self.do_locking or self.use_locking)
13130
13131     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13132
13133     result = []
13134
13135     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13136       if nres.fail_msg:
13137         result.append((node, None))
13138       else:
13139         result.extend((node, expname) for expname in nres.payload)
13140
13141     return result
13142
13143
13144 class LUBackupPrepare(NoHooksLU):
13145   """Prepares an instance for an export and returns useful information.
13146
13147   """
13148   REQ_BGL = False
13149
13150   def ExpandNames(self):
13151     self._ExpandAndLockInstance()
13152
13153   def CheckPrereq(self):
13154     """Check prerequisites.
13155
13156     """
13157     instance_name = self.op.instance_name
13158
13159     self.instance = self.cfg.GetInstanceInfo(instance_name)
13160     assert self.instance is not None, \
13161           "Cannot retrieve locked instance %s" % self.op.instance_name
13162     _CheckNodeOnline(self, self.instance.primary_node)
13163
13164     self._cds = _GetClusterDomainSecret()
13165
13166   def Exec(self, feedback_fn):
13167     """Prepares an instance for an export.
13168
13169     """
13170     instance = self.instance
13171
13172     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13173       salt = utils.GenerateSecret(8)
13174
13175       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13176       result = self.rpc.call_x509_cert_create(instance.primary_node,
13177                                               constants.RIE_CERT_VALIDITY)
13178       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13179
13180       (name, cert_pem) = result.payload
13181
13182       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13183                                              cert_pem)
13184
13185       return {
13186         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13187         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13188                           salt),
13189         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13190         }
13191
13192     return None
13193
13194
13195 class LUBackupExport(LogicalUnit):
13196   """Export an instance to an image in the cluster.
13197
13198   """
13199   HPATH = "instance-export"
13200   HTYPE = constants.HTYPE_INSTANCE
13201   REQ_BGL = False
13202
13203   def CheckArguments(self):
13204     """Check the arguments.
13205
13206     """
13207     self.x509_key_name = self.op.x509_key_name
13208     self.dest_x509_ca_pem = self.op.destination_x509_ca
13209
13210     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13211       if not self.x509_key_name:
13212         raise errors.OpPrereqError("Missing X509 key name for encryption",
13213                                    errors.ECODE_INVAL)
13214
13215       if not self.dest_x509_ca_pem:
13216         raise errors.OpPrereqError("Missing destination X509 CA",
13217                                    errors.ECODE_INVAL)
13218
13219   def ExpandNames(self):
13220     self._ExpandAndLockInstance()
13221
13222     # Lock all nodes for local exports
13223     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13224       # FIXME: lock only instance primary and destination node
13225       #
13226       # Sad but true, for now we have do lock all nodes, as we don't know where
13227       # the previous export might be, and in this LU we search for it and
13228       # remove it from its current node. In the future we could fix this by:
13229       #  - making a tasklet to search (share-lock all), then create the
13230       #    new one, then one to remove, after
13231       #  - removing the removal operation altogether
13232       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13233
13234   def DeclareLocks(self, level):
13235     """Last minute lock declaration."""
13236     # All nodes are locked anyway, so nothing to do here.
13237
13238   def BuildHooksEnv(self):
13239     """Build hooks env.
13240
13241     This will run on the master, primary node and target node.
13242
13243     """
13244     env = {
13245       "EXPORT_MODE": self.op.mode,
13246       "EXPORT_NODE": self.op.target_node,
13247       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13248       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13249       # TODO: Generic function for boolean env variables
13250       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13251       }
13252
13253     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13254
13255     return env
13256
13257   def BuildHooksNodes(self):
13258     """Build hooks nodes.
13259
13260     """
13261     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13262
13263     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13264       nl.append(self.op.target_node)
13265
13266     return (nl, nl)
13267
13268   def CheckPrereq(self):
13269     """Check prerequisites.
13270
13271     This checks that the instance and node names are valid.
13272
13273     """
13274     instance_name = self.op.instance_name
13275
13276     self.instance = self.cfg.GetInstanceInfo(instance_name)
13277     assert self.instance is not None, \
13278           "Cannot retrieve locked instance %s" % self.op.instance_name
13279     _CheckNodeOnline(self, self.instance.primary_node)
13280
13281     if (self.op.remove_instance and
13282         self.instance.admin_state == constants.ADMINST_UP and
13283         not self.op.shutdown):
13284       raise errors.OpPrereqError("Can not remove instance without shutting it"
13285                                  " down before")
13286
13287     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13288       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13289       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13290       assert self.dst_node is not None
13291
13292       _CheckNodeOnline(self, self.dst_node.name)
13293       _CheckNodeNotDrained(self, self.dst_node.name)
13294
13295       self._cds = None
13296       self.dest_disk_info = None
13297       self.dest_x509_ca = None
13298
13299     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13300       self.dst_node = None
13301
13302       if len(self.op.target_node) != len(self.instance.disks):
13303         raise errors.OpPrereqError(("Received destination information for %s"
13304                                     " disks, but instance %s has %s disks") %
13305                                    (len(self.op.target_node), instance_name,
13306                                     len(self.instance.disks)),
13307                                    errors.ECODE_INVAL)
13308
13309       cds = _GetClusterDomainSecret()
13310
13311       # Check X509 key name
13312       try:
13313         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13314       except (TypeError, ValueError), err:
13315         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13316
13317       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13318         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13319                                    errors.ECODE_INVAL)
13320
13321       # Load and verify CA
13322       try:
13323         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13324       except OpenSSL.crypto.Error, err:
13325         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13326                                    (err, ), errors.ECODE_INVAL)
13327
13328       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13329       if errcode is not None:
13330         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13331                                    (msg, ), errors.ECODE_INVAL)
13332
13333       self.dest_x509_ca = cert
13334
13335       # Verify target information
13336       disk_info = []
13337       for idx, disk_data in enumerate(self.op.target_node):
13338         try:
13339           (host, port, magic) = \
13340             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13341         except errors.GenericError, err:
13342           raise errors.OpPrereqError("Target info for disk %s: %s" %
13343                                      (idx, err), errors.ECODE_INVAL)
13344
13345         disk_info.append((host, port, magic))
13346
13347       assert len(disk_info) == len(self.op.target_node)
13348       self.dest_disk_info = disk_info
13349
13350     else:
13351       raise errors.ProgrammerError("Unhandled export mode %r" %
13352                                    self.op.mode)
13353
13354     # instance disk type verification
13355     # TODO: Implement export support for file-based disks
13356     for disk in self.instance.disks:
13357       if disk.dev_type == constants.LD_FILE:
13358         raise errors.OpPrereqError("Export not supported for instances with"
13359                                    " file-based disks", errors.ECODE_INVAL)
13360
13361   def _CleanupExports(self, feedback_fn):
13362     """Removes exports of current instance from all other nodes.
13363
13364     If an instance in a cluster with nodes A..D was exported to node C, its
13365     exports will be removed from the nodes A, B and D.
13366
13367     """
13368     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13369
13370     nodelist = self.cfg.GetNodeList()
13371     nodelist.remove(self.dst_node.name)
13372
13373     # on one-node clusters nodelist will be empty after the removal
13374     # if we proceed the backup would be removed because OpBackupQuery
13375     # substitutes an empty list with the full cluster node list.
13376     iname = self.instance.name
13377     if nodelist:
13378       feedback_fn("Removing old exports for instance %s" % iname)
13379       exportlist = self.rpc.call_export_list(nodelist)
13380       for node in exportlist:
13381         if exportlist[node].fail_msg:
13382           continue
13383         if iname in exportlist[node].payload:
13384           msg = self.rpc.call_export_remove(node, iname).fail_msg
13385           if msg:
13386             self.LogWarning("Could not remove older export for instance %s"
13387                             " on node %s: %s", iname, node, msg)
13388
13389   def Exec(self, feedback_fn):
13390     """Export an instance to an image in the cluster.
13391
13392     """
13393     assert self.op.mode in constants.EXPORT_MODES
13394
13395     instance = self.instance
13396     src_node = instance.primary_node
13397
13398     if self.op.shutdown:
13399       # shutdown the instance, but not the disks
13400       feedback_fn("Shutting down instance %s" % instance.name)
13401       result = self.rpc.call_instance_shutdown(src_node, instance,
13402                                                self.op.shutdown_timeout)
13403       # TODO: Maybe ignore failures if ignore_remove_failures is set
13404       result.Raise("Could not shutdown instance %s on"
13405                    " node %s" % (instance.name, src_node))
13406
13407     # set the disks ID correctly since call_instance_start needs the
13408     # correct drbd minor to create the symlinks
13409     for disk in instance.disks:
13410       self.cfg.SetDiskID(disk, src_node)
13411
13412     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13413
13414     if activate_disks:
13415       # Activate the instance disks if we'exporting a stopped instance
13416       feedback_fn("Activating disks for %s" % instance.name)
13417       _StartInstanceDisks(self, instance, None)
13418
13419     try:
13420       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13421                                                      instance)
13422
13423       helper.CreateSnapshots()
13424       try:
13425         if (self.op.shutdown and
13426             instance.admin_state == constants.ADMINST_UP and
13427             not self.op.remove_instance):
13428           assert not activate_disks
13429           feedback_fn("Starting instance %s" % instance.name)
13430           result = self.rpc.call_instance_start(src_node,
13431                                                 (instance, None, None), False)
13432           msg = result.fail_msg
13433           if msg:
13434             feedback_fn("Failed to start instance: %s" % msg)
13435             _ShutdownInstanceDisks(self, instance)
13436             raise errors.OpExecError("Could not start instance: %s" % msg)
13437
13438         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13439           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13440         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13441           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13442           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13443
13444           (key_name, _, _) = self.x509_key_name
13445
13446           dest_ca_pem = \
13447             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13448                                             self.dest_x509_ca)
13449
13450           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13451                                                      key_name, dest_ca_pem,
13452                                                      timeouts)
13453       finally:
13454         helper.Cleanup()
13455
13456       # Check for backwards compatibility
13457       assert len(dresults) == len(instance.disks)
13458       assert compat.all(isinstance(i, bool) for i in dresults), \
13459              "Not all results are boolean: %r" % dresults
13460
13461     finally:
13462       if activate_disks:
13463         feedback_fn("Deactivating disks for %s" % instance.name)
13464         _ShutdownInstanceDisks(self, instance)
13465
13466     if not (compat.all(dresults) and fin_resu):
13467       failures = []
13468       if not fin_resu:
13469         failures.append("export finalization")
13470       if not compat.all(dresults):
13471         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13472                                if not dsk)
13473         failures.append("disk export: disk(s) %s" % fdsk)
13474
13475       raise errors.OpExecError("Export failed, errors in %s" %
13476                                utils.CommaJoin(failures))
13477
13478     # At this point, the export was successful, we can cleanup/finish
13479
13480     # Remove instance if requested
13481     if self.op.remove_instance:
13482       feedback_fn("Removing instance %s" % instance.name)
13483       _RemoveInstance(self, feedback_fn, instance,
13484                       self.op.ignore_remove_failures)
13485
13486     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13487       self._CleanupExports(feedback_fn)
13488
13489     return fin_resu, dresults
13490
13491
13492 class LUBackupRemove(NoHooksLU):
13493   """Remove exports related to the named instance.
13494
13495   """
13496   REQ_BGL = False
13497
13498   def ExpandNames(self):
13499     self.needed_locks = {}
13500     # We need all nodes to be locked in order for RemoveExport to work, but we
13501     # don't need to lock the instance itself, as nothing will happen to it (and
13502     # we can remove exports also for a removed instance)
13503     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13504
13505   def Exec(self, feedback_fn):
13506     """Remove any export.
13507
13508     """
13509     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13510     # If the instance was not found we'll try with the name that was passed in.
13511     # This will only work if it was an FQDN, though.
13512     fqdn_warn = False
13513     if not instance_name:
13514       fqdn_warn = True
13515       instance_name = self.op.instance_name
13516
13517     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13518     exportlist = self.rpc.call_export_list(locked_nodes)
13519     found = False
13520     for node in exportlist:
13521       msg = exportlist[node].fail_msg
13522       if msg:
13523         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13524         continue
13525       if instance_name in exportlist[node].payload:
13526         found = True
13527         result = self.rpc.call_export_remove(node, instance_name)
13528         msg = result.fail_msg
13529         if msg:
13530           logging.error("Could not remove export for instance %s"
13531                         " on node %s: %s", instance_name, node, msg)
13532
13533     if fqdn_warn and not found:
13534       feedback_fn("Export not found. If trying to remove an export belonging"
13535                   " to a deleted instance please use its Fully Qualified"
13536                   " Domain Name.")
13537
13538
13539 class LUGroupAdd(LogicalUnit):
13540   """Logical unit for creating node groups.
13541
13542   """
13543   HPATH = "group-add"
13544   HTYPE = constants.HTYPE_GROUP
13545   REQ_BGL = False
13546
13547   def ExpandNames(self):
13548     # We need the new group's UUID here so that we can create and acquire the
13549     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13550     # that it should not check whether the UUID exists in the configuration.
13551     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13552     self.needed_locks = {}
13553     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13554
13555   def CheckPrereq(self):
13556     """Check prerequisites.
13557
13558     This checks that the given group name is not an existing node group
13559     already.
13560
13561     """
13562     try:
13563       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13564     except errors.OpPrereqError:
13565       pass
13566     else:
13567       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13568                                  " node group (UUID: %s)" %
13569                                  (self.op.group_name, existing_uuid),
13570                                  errors.ECODE_EXISTS)
13571
13572     if self.op.ndparams:
13573       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13574
13575     if self.op.hv_state:
13576       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13577     else:
13578       self.new_hv_state = None
13579
13580     if self.op.disk_state:
13581       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13582     else:
13583       self.new_disk_state = None
13584
13585     if self.op.diskparams:
13586       for templ in constants.DISK_TEMPLATES:
13587         if templ in self.op.diskparams:
13588           utils.ForceDictType(self.op.diskparams[templ],
13589                               constants.DISK_DT_TYPES)
13590       self.new_diskparams = self.op.diskparams
13591     else:
13592       self.new_diskparams = {}
13593
13594     if self.op.ipolicy:
13595       cluster = self.cfg.GetClusterInfo()
13596       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13597       try:
13598         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13599       except errors.ConfigurationError, err:
13600         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13601                                    errors.ECODE_INVAL)
13602
13603   def BuildHooksEnv(self):
13604     """Build hooks env.
13605
13606     """
13607     return {
13608       "GROUP_NAME": self.op.group_name,
13609       }
13610
13611   def BuildHooksNodes(self):
13612     """Build hooks nodes.
13613
13614     """
13615     mn = self.cfg.GetMasterNode()
13616     return ([mn], [mn])
13617
13618   def Exec(self, feedback_fn):
13619     """Add the node group to the cluster.
13620
13621     """
13622     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13623                                   uuid=self.group_uuid,
13624                                   alloc_policy=self.op.alloc_policy,
13625                                   ndparams=self.op.ndparams,
13626                                   diskparams=self.new_diskparams,
13627                                   ipolicy=self.op.ipolicy,
13628                                   hv_state_static=self.new_hv_state,
13629                                   disk_state_static=self.new_disk_state)
13630
13631     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13632     del self.remove_locks[locking.LEVEL_NODEGROUP]
13633
13634
13635 class LUGroupAssignNodes(NoHooksLU):
13636   """Logical unit for assigning nodes to groups.
13637
13638   """
13639   REQ_BGL = False
13640
13641   def ExpandNames(self):
13642     # These raise errors.OpPrereqError on their own:
13643     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13644     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13645
13646     # We want to lock all the affected nodes and groups. We have readily
13647     # available the list of nodes, and the *destination* group. To gather the
13648     # list of "source" groups, we need to fetch node information later on.
13649     self.needed_locks = {
13650       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13651       locking.LEVEL_NODE: self.op.nodes,
13652       }
13653
13654   def DeclareLocks(self, level):
13655     if level == locking.LEVEL_NODEGROUP:
13656       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13657
13658       # Try to get all affected nodes' groups without having the group or node
13659       # lock yet. Needs verification later in the code flow.
13660       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13661
13662       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13663
13664   def CheckPrereq(self):
13665     """Check prerequisites.
13666
13667     """
13668     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13669     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13670             frozenset(self.op.nodes))
13671
13672     expected_locks = (set([self.group_uuid]) |
13673                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13674     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13675     if actual_locks != expected_locks:
13676       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13677                                " current groups are '%s', used to be '%s'" %
13678                                (utils.CommaJoin(expected_locks),
13679                                 utils.CommaJoin(actual_locks)))
13680
13681     self.node_data = self.cfg.GetAllNodesInfo()
13682     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13683     instance_data = self.cfg.GetAllInstancesInfo()
13684
13685     if self.group is None:
13686       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13687                                (self.op.group_name, self.group_uuid))
13688
13689     (new_splits, previous_splits) = \
13690       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13691                                              for node in self.op.nodes],
13692                                             self.node_data, instance_data)
13693
13694     if new_splits:
13695       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13696
13697       if not self.op.force:
13698         raise errors.OpExecError("The following instances get split by this"
13699                                  " change and --force was not given: %s" %
13700                                  fmt_new_splits)
13701       else:
13702         self.LogWarning("This operation will split the following instances: %s",
13703                         fmt_new_splits)
13704
13705         if previous_splits:
13706           self.LogWarning("In addition, these already-split instances continue"
13707                           " to be split across groups: %s",
13708                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13709
13710   def Exec(self, feedback_fn):
13711     """Assign nodes to a new group.
13712
13713     """
13714     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13715
13716     self.cfg.AssignGroupNodes(mods)
13717
13718   @staticmethod
13719   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13720     """Check for split instances after a node assignment.
13721
13722     This method considers a series of node assignments as an atomic operation,
13723     and returns information about split instances after applying the set of
13724     changes.
13725
13726     In particular, it returns information about newly split instances, and
13727     instances that were already split, and remain so after the change.
13728
13729     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13730     considered.
13731
13732     @type changes: list of (node_name, new_group_uuid) pairs.
13733     @param changes: list of node assignments to consider.
13734     @param node_data: a dict with data for all nodes
13735     @param instance_data: a dict with all instances to consider
13736     @rtype: a two-tuple
13737     @return: a list of instances that were previously okay and result split as a
13738       consequence of this change, and a list of instances that were previously
13739       split and this change does not fix.
13740
13741     """
13742     changed_nodes = dict((node, group) for node, group in changes
13743                          if node_data[node].group != group)
13744
13745     all_split_instances = set()
13746     previously_split_instances = set()
13747
13748     def InstanceNodes(instance):
13749       return [instance.primary_node] + list(instance.secondary_nodes)
13750
13751     for inst in instance_data.values():
13752       if inst.disk_template not in constants.DTS_INT_MIRROR:
13753         continue
13754
13755       instance_nodes = InstanceNodes(inst)
13756
13757       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13758         previously_split_instances.add(inst.name)
13759
13760       if len(set(changed_nodes.get(node, node_data[node].group)
13761                  for node in instance_nodes)) > 1:
13762         all_split_instances.add(inst.name)
13763
13764     return (list(all_split_instances - previously_split_instances),
13765             list(previously_split_instances & all_split_instances))
13766
13767
13768 class _GroupQuery(_QueryBase):
13769   FIELDS = query.GROUP_FIELDS
13770
13771   def ExpandNames(self, lu):
13772     lu.needed_locks = {}
13773
13774     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13775     self._cluster = lu.cfg.GetClusterInfo()
13776     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13777
13778     if not self.names:
13779       self.wanted = [name_to_uuid[name]
13780                      for name in utils.NiceSort(name_to_uuid.keys())]
13781     else:
13782       # Accept names to be either names or UUIDs.
13783       missing = []
13784       self.wanted = []
13785       all_uuid = frozenset(self._all_groups.keys())
13786
13787       for name in self.names:
13788         if name in all_uuid:
13789           self.wanted.append(name)
13790         elif name in name_to_uuid:
13791           self.wanted.append(name_to_uuid[name])
13792         else:
13793           missing.append(name)
13794
13795       if missing:
13796         raise errors.OpPrereqError("Some groups do not exist: %s" %
13797                                    utils.CommaJoin(missing),
13798                                    errors.ECODE_NOENT)
13799
13800   def DeclareLocks(self, lu, level):
13801     pass
13802
13803   def _GetQueryData(self, lu):
13804     """Computes the list of node groups and their attributes.
13805
13806     """
13807     do_nodes = query.GQ_NODE in self.requested_data
13808     do_instances = query.GQ_INST in self.requested_data
13809
13810     group_to_nodes = None
13811     group_to_instances = None
13812
13813     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13814     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13815     # latter GetAllInstancesInfo() is not enough, for we have to go through
13816     # instance->node. Hence, we will need to process nodes even if we only need
13817     # instance information.
13818     if do_nodes or do_instances:
13819       all_nodes = lu.cfg.GetAllNodesInfo()
13820       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13821       node_to_group = {}
13822
13823       for node in all_nodes.values():
13824         if node.group in group_to_nodes:
13825           group_to_nodes[node.group].append(node.name)
13826           node_to_group[node.name] = node.group
13827
13828       if do_instances:
13829         all_instances = lu.cfg.GetAllInstancesInfo()
13830         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13831
13832         for instance in all_instances.values():
13833           node = instance.primary_node
13834           if node in node_to_group:
13835             group_to_instances[node_to_group[node]].append(instance.name)
13836
13837         if not do_nodes:
13838           # Do not pass on node information if it was not requested.
13839           group_to_nodes = None
13840
13841     return query.GroupQueryData(self._cluster,
13842                                 [self._all_groups[uuid]
13843                                  for uuid in self.wanted],
13844                                 group_to_nodes, group_to_instances,
13845                                 query.GQ_DISKPARAMS in self.requested_data)
13846
13847
13848 class LUGroupQuery(NoHooksLU):
13849   """Logical unit for querying node groups.
13850
13851   """
13852   REQ_BGL = False
13853
13854   def CheckArguments(self):
13855     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13856                           self.op.output_fields, False)
13857
13858   def ExpandNames(self):
13859     self.gq.ExpandNames(self)
13860
13861   def DeclareLocks(self, level):
13862     self.gq.DeclareLocks(self, level)
13863
13864   def Exec(self, feedback_fn):
13865     return self.gq.OldStyleQuery(self)
13866
13867
13868 class LUGroupSetParams(LogicalUnit):
13869   """Modifies the parameters of a node group.
13870
13871   """
13872   HPATH = "group-modify"
13873   HTYPE = constants.HTYPE_GROUP
13874   REQ_BGL = False
13875
13876   def CheckArguments(self):
13877     all_changes = [
13878       self.op.ndparams,
13879       self.op.diskparams,
13880       self.op.alloc_policy,
13881       self.op.hv_state,
13882       self.op.disk_state,
13883       self.op.ipolicy,
13884       ]
13885
13886     if all_changes.count(None) == len(all_changes):
13887       raise errors.OpPrereqError("Please pass at least one modification",
13888                                  errors.ECODE_INVAL)
13889
13890   def ExpandNames(self):
13891     # This raises errors.OpPrereqError on its own:
13892     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13893
13894     self.needed_locks = {
13895       locking.LEVEL_INSTANCE: [],
13896       locking.LEVEL_NODEGROUP: [self.group_uuid],
13897       }
13898
13899     self.share_locks[locking.LEVEL_INSTANCE] = 1
13900
13901   def DeclareLocks(self, level):
13902     if level == locking.LEVEL_INSTANCE:
13903       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13904
13905       # Lock instances optimistically, needs verification once group lock has
13906       # been acquired
13907       self.needed_locks[locking.LEVEL_INSTANCE] = \
13908           self.cfg.GetNodeGroupInstances(self.group_uuid)
13909
13910   @staticmethod
13911   def _UpdateAndVerifyDiskParams(old, new):
13912     """Updates and verifies disk parameters.
13913
13914     """
13915     new_params = _GetUpdatedParams(old, new)
13916     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13917     return new_params
13918
13919   def CheckPrereq(self):
13920     """Check prerequisites.
13921
13922     """
13923     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13924
13925     # Check if locked instances are still correct
13926     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13927
13928     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13929     cluster = self.cfg.GetClusterInfo()
13930
13931     if self.group is None:
13932       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13933                                (self.op.group_name, self.group_uuid))
13934
13935     if self.op.ndparams:
13936       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13937       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13938       self.new_ndparams = new_ndparams
13939
13940     if self.op.diskparams:
13941       diskparams = self.group.diskparams
13942       uavdp = self._UpdateAndVerifyDiskParams
13943       # For each disktemplate subdict update and verify the values
13944       new_diskparams = dict((dt,
13945                              uavdp(diskparams.get(dt, {}),
13946                                    self.op.diskparams[dt]))
13947                             for dt in constants.DISK_TEMPLATES
13948                             if dt in self.op.diskparams)
13949       # As we've all subdicts of diskparams ready, lets merge the actual
13950       # dict with all updated subdicts
13951       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
13952
13953     if self.op.hv_state:
13954       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13955                                                  self.group.hv_state_static)
13956
13957     if self.op.disk_state:
13958       self.new_disk_state = \
13959         _MergeAndVerifyDiskState(self.op.disk_state,
13960                                  self.group.disk_state_static)
13961
13962     if self.op.ipolicy:
13963       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13964                                             self.op.ipolicy,
13965                                             group_policy=True)
13966
13967       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13968       inst_filter = lambda inst: inst.name in owned_instances
13969       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13970       violations = \
13971           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13972                                                                self.group),
13973                                         new_ipolicy, instances)
13974
13975       if violations:
13976         self.LogWarning("After the ipolicy change the following instances"
13977                         " violate them: %s",
13978                         utils.CommaJoin(violations))
13979
13980   def BuildHooksEnv(self):
13981     """Build hooks env.
13982
13983     """
13984     return {
13985       "GROUP_NAME": self.op.group_name,
13986       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13987       }
13988
13989   def BuildHooksNodes(self):
13990     """Build hooks nodes.
13991
13992     """
13993     mn = self.cfg.GetMasterNode()
13994     return ([mn], [mn])
13995
13996   def Exec(self, feedback_fn):
13997     """Modifies the node group.
13998
13999     """
14000     result = []
14001
14002     if self.op.ndparams:
14003       self.group.ndparams = self.new_ndparams
14004       result.append(("ndparams", str(self.group.ndparams)))
14005
14006     if self.op.diskparams:
14007       self.group.diskparams = self.new_diskparams
14008       result.append(("diskparams", str(self.group.diskparams)))
14009
14010     if self.op.alloc_policy:
14011       self.group.alloc_policy = self.op.alloc_policy
14012
14013     if self.op.hv_state:
14014       self.group.hv_state_static = self.new_hv_state
14015
14016     if self.op.disk_state:
14017       self.group.disk_state_static = self.new_disk_state
14018
14019     if self.op.ipolicy:
14020       self.group.ipolicy = self.new_ipolicy
14021
14022     self.cfg.Update(self.group, feedback_fn)
14023     return result
14024
14025
14026 class LUGroupRemove(LogicalUnit):
14027   HPATH = "group-remove"
14028   HTYPE = constants.HTYPE_GROUP
14029   REQ_BGL = False
14030
14031   def ExpandNames(self):
14032     # This will raises errors.OpPrereqError on its own:
14033     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14034     self.needed_locks = {
14035       locking.LEVEL_NODEGROUP: [self.group_uuid],
14036       }
14037
14038   def CheckPrereq(self):
14039     """Check prerequisites.
14040
14041     This checks that the given group name exists as a node group, that is
14042     empty (i.e., contains no nodes), and that is not the last group of the
14043     cluster.
14044
14045     """
14046     # Verify that the group is empty.
14047     group_nodes = [node.name
14048                    for node in self.cfg.GetAllNodesInfo().values()
14049                    if node.group == self.group_uuid]
14050
14051     if group_nodes:
14052       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14053                                  " nodes: %s" %
14054                                  (self.op.group_name,
14055                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14056                                  errors.ECODE_STATE)
14057
14058     # Verify the cluster would not be left group-less.
14059     if len(self.cfg.GetNodeGroupList()) == 1:
14060       raise errors.OpPrereqError("Group '%s' is the only group,"
14061                                  " cannot be removed" %
14062                                  self.op.group_name,
14063                                  errors.ECODE_STATE)
14064
14065   def BuildHooksEnv(self):
14066     """Build hooks env.
14067
14068     """
14069     return {
14070       "GROUP_NAME": self.op.group_name,
14071       }
14072
14073   def BuildHooksNodes(self):
14074     """Build hooks nodes.
14075
14076     """
14077     mn = self.cfg.GetMasterNode()
14078     return ([mn], [mn])
14079
14080   def Exec(self, feedback_fn):
14081     """Remove the node group.
14082
14083     """
14084     try:
14085       self.cfg.RemoveNodeGroup(self.group_uuid)
14086     except errors.ConfigurationError:
14087       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14088                                (self.op.group_name, self.group_uuid))
14089
14090     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14091
14092
14093 class LUGroupRename(LogicalUnit):
14094   HPATH = "group-rename"
14095   HTYPE = constants.HTYPE_GROUP
14096   REQ_BGL = False
14097
14098   def ExpandNames(self):
14099     # This raises errors.OpPrereqError on its own:
14100     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14101
14102     self.needed_locks = {
14103       locking.LEVEL_NODEGROUP: [self.group_uuid],
14104       }
14105
14106   def CheckPrereq(self):
14107     """Check prerequisites.
14108
14109     Ensures requested new name is not yet used.
14110
14111     """
14112     try:
14113       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14114     except errors.OpPrereqError:
14115       pass
14116     else:
14117       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14118                                  " node group (UUID: %s)" %
14119                                  (self.op.new_name, new_name_uuid),
14120                                  errors.ECODE_EXISTS)
14121
14122   def BuildHooksEnv(self):
14123     """Build hooks env.
14124
14125     """
14126     return {
14127       "OLD_NAME": self.op.group_name,
14128       "NEW_NAME": self.op.new_name,
14129       }
14130
14131   def BuildHooksNodes(self):
14132     """Build hooks nodes.
14133
14134     """
14135     mn = self.cfg.GetMasterNode()
14136
14137     all_nodes = self.cfg.GetAllNodesInfo()
14138     all_nodes.pop(mn, None)
14139
14140     run_nodes = [mn]
14141     run_nodes.extend(node.name for node in all_nodes.values()
14142                      if node.group == self.group_uuid)
14143
14144     return (run_nodes, run_nodes)
14145
14146   def Exec(self, feedback_fn):
14147     """Rename the node group.
14148
14149     """
14150     group = self.cfg.GetNodeGroup(self.group_uuid)
14151
14152     if group is None:
14153       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14154                                (self.op.group_name, self.group_uuid))
14155
14156     group.name = self.op.new_name
14157     self.cfg.Update(group, feedback_fn)
14158
14159     return self.op.new_name
14160
14161
14162 class LUGroupEvacuate(LogicalUnit):
14163   HPATH = "group-evacuate"
14164   HTYPE = constants.HTYPE_GROUP
14165   REQ_BGL = False
14166
14167   def ExpandNames(self):
14168     # This raises errors.OpPrereqError on its own:
14169     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14170
14171     if self.op.target_groups:
14172       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14173                                   self.op.target_groups)
14174     else:
14175       self.req_target_uuids = []
14176
14177     if self.group_uuid in self.req_target_uuids:
14178       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14179                                  " as a target group (targets are %s)" %
14180                                  (self.group_uuid,
14181                                   utils.CommaJoin(self.req_target_uuids)),
14182                                  errors.ECODE_INVAL)
14183
14184     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14185
14186     self.share_locks = _ShareAll()
14187     self.needed_locks = {
14188       locking.LEVEL_INSTANCE: [],
14189       locking.LEVEL_NODEGROUP: [],
14190       locking.LEVEL_NODE: [],
14191       }
14192
14193   def DeclareLocks(self, level):
14194     if level == locking.LEVEL_INSTANCE:
14195       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14196
14197       # Lock instances optimistically, needs verification once node and group
14198       # locks have been acquired
14199       self.needed_locks[locking.LEVEL_INSTANCE] = \
14200         self.cfg.GetNodeGroupInstances(self.group_uuid)
14201
14202     elif level == locking.LEVEL_NODEGROUP:
14203       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14204
14205       if self.req_target_uuids:
14206         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14207
14208         # Lock all groups used by instances optimistically; this requires going
14209         # via the node before it's locked, requiring verification later on
14210         lock_groups.update(group_uuid
14211                            for instance_name in
14212                              self.owned_locks(locking.LEVEL_INSTANCE)
14213                            for group_uuid in
14214                              self.cfg.GetInstanceNodeGroups(instance_name))
14215       else:
14216         # No target groups, need to lock all of them
14217         lock_groups = locking.ALL_SET
14218
14219       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14220
14221     elif level == locking.LEVEL_NODE:
14222       # This will only lock the nodes in the group to be evacuated which
14223       # contain actual instances
14224       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14225       self._LockInstancesNodes()
14226
14227       # Lock all nodes in group to be evacuated and target groups
14228       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14229       assert self.group_uuid in owned_groups
14230       member_nodes = [node_name
14231                       for group in owned_groups
14232                       for node_name in self.cfg.GetNodeGroup(group).members]
14233       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14234
14235   def CheckPrereq(self):
14236     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14237     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14238     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14239
14240     assert owned_groups.issuperset(self.req_target_uuids)
14241     assert self.group_uuid in owned_groups
14242
14243     # Check if locked instances are still correct
14244     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14245
14246     # Get instance information
14247     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14248
14249     # Check if node groups for locked instances are still correct
14250     _CheckInstancesNodeGroups(self.cfg, self.instances,
14251                               owned_groups, owned_nodes, self.group_uuid)
14252
14253     if self.req_target_uuids:
14254       # User requested specific target groups
14255       self.target_uuids = self.req_target_uuids
14256     else:
14257       # All groups except the one to be evacuated are potential targets
14258       self.target_uuids = [group_uuid for group_uuid in owned_groups
14259                            if group_uuid != self.group_uuid]
14260
14261       if not self.target_uuids:
14262         raise errors.OpPrereqError("There are no possible target groups",
14263                                    errors.ECODE_INVAL)
14264
14265   def BuildHooksEnv(self):
14266     """Build hooks env.
14267
14268     """
14269     return {
14270       "GROUP_NAME": self.op.group_name,
14271       "TARGET_GROUPS": " ".join(self.target_uuids),
14272       }
14273
14274   def BuildHooksNodes(self):
14275     """Build hooks nodes.
14276
14277     """
14278     mn = self.cfg.GetMasterNode()
14279
14280     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14281
14282     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14283
14284     return (run_nodes, run_nodes)
14285
14286   def Exec(self, feedback_fn):
14287     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14288
14289     assert self.group_uuid not in self.target_uuids
14290
14291     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14292                      instances=instances, target_groups=self.target_uuids)
14293
14294     ial.Run(self.op.iallocator)
14295
14296     if not ial.success:
14297       raise errors.OpPrereqError("Can't compute group evacuation using"
14298                                  " iallocator '%s': %s" %
14299                                  (self.op.iallocator, ial.info),
14300                                  errors.ECODE_NORES)
14301
14302     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14303
14304     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14305                  len(jobs), self.op.group_name)
14306
14307     return ResultWithJobs(jobs)
14308
14309
14310 class TagsLU(NoHooksLU): # pylint: disable=W0223
14311   """Generic tags LU.
14312
14313   This is an abstract class which is the parent of all the other tags LUs.
14314
14315   """
14316   def ExpandNames(self):
14317     self.group_uuid = None
14318     self.needed_locks = {}
14319
14320     if self.op.kind == constants.TAG_NODE:
14321       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14322       lock_level = locking.LEVEL_NODE
14323       lock_name = self.op.name
14324     elif self.op.kind == constants.TAG_INSTANCE:
14325       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14326       lock_level = locking.LEVEL_INSTANCE
14327       lock_name = self.op.name
14328     elif self.op.kind == constants.TAG_NODEGROUP:
14329       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14330       lock_level = locking.LEVEL_NODEGROUP
14331       lock_name = self.group_uuid
14332     else:
14333       lock_level = None
14334       lock_name = None
14335
14336     if lock_level and getattr(self.op, "use_locking", True):
14337       self.needed_locks[lock_level] = lock_name
14338
14339     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14340     # not possible to acquire the BGL based on opcode parameters)
14341
14342   def CheckPrereq(self):
14343     """Check prerequisites.
14344
14345     """
14346     if self.op.kind == constants.TAG_CLUSTER:
14347       self.target = self.cfg.GetClusterInfo()
14348     elif self.op.kind == constants.TAG_NODE:
14349       self.target = self.cfg.GetNodeInfo(self.op.name)
14350     elif self.op.kind == constants.TAG_INSTANCE:
14351       self.target = self.cfg.GetInstanceInfo(self.op.name)
14352     elif self.op.kind == constants.TAG_NODEGROUP:
14353       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14354     else:
14355       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14356                                  str(self.op.kind), errors.ECODE_INVAL)
14357
14358
14359 class LUTagsGet(TagsLU):
14360   """Returns the tags of a given object.
14361
14362   """
14363   REQ_BGL = False
14364
14365   def ExpandNames(self):
14366     TagsLU.ExpandNames(self)
14367
14368     # Share locks as this is only a read operation
14369     self.share_locks = _ShareAll()
14370
14371   def Exec(self, feedback_fn):
14372     """Returns the tag list.
14373
14374     """
14375     return list(self.target.GetTags())
14376
14377
14378 class LUTagsSearch(NoHooksLU):
14379   """Searches the tags for a given pattern.
14380
14381   """
14382   REQ_BGL = False
14383
14384   def ExpandNames(self):
14385     self.needed_locks = {}
14386
14387   def CheckPrereq(self):
14388     """Check prerequisites.
14389
14390     This checks the pattern passed for validity by compiling it.
14391
14392     """
14393     try:
14394       self.re = re.compile(self.op.pattern)
14395     except re.error, err:
14396       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14397                                  (self.op.pattern, err), errors.ECODE_INVAL)
14398
14399   def Exec(self, feedback_fn):
14400     """Returns the tag list.
14401
14402     """
14403     cfg = self.cfg
14404     tgts = [("/cluster", cfg.GetClusterInfo())]
14405     ilist = cfg.GetAllInstancesInfo().values()
14406     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14407     nlist = cfg.GetAllNodesInfo().values()
14408     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14409     tgts.extend(("/nodegroup/%s" % n.name, n)
14410                 for n in cfg.GetAllNodeGroupsInfo().values())
14411     results = []
14412     for path, target in tgts:
14413       for tag in target.GetTags():
14414         if self.re.search(tag):
14415           results.append((path, tag))
14416     return results
14417
14418
14419 class LUTagsSet(TagsLU):
14420   """Sets a tag on a given object.
14421
14422   """
14423   REQ_BGL = False
14424
14425   def CheckPrereq(self):
14426     """Check prerequisites.
14427
14428     This checks the type and length of the tag name and value.
14429
14430     """
14431     TagsLU.CheckPrereq(self)
14432     for tag in self.op.tags:
14433       objects.TaggableObject.ValidateTag(tag)
14434
14435   def Exec(self, feedback_fn):
14436     """Sets the tag.
14437
14438     """
14439     try:
14440       for tag in self.op.tags:
14441         self.target.AddTag(tag)
14442     except errors.TagError, err:
14443       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14444     self.cfg.Update(self.target, feedback_fn)
14445
14446
14447 class LUTagsDel(TagsLU):
14448   """Delete a list of tags from a given object.
14449
14450   """
14451   REQ_BGL = False
14452
14453   def CheckPrereq(self):
14454     """Check prerequisites.
14455
14456     This checks that we have the given tag.
14457
14458     """
14459     TagsLU.CheckPrereq(self)
14460     for tag in self.op.tags:
14461       objects.TaggableObject.ValidateTag(tag)
14462     del_tags = frozenset(self.op.tags)
14463     cur_tags = self.target.GetTags()
14464
14465     diff_tags = del_tags - cur_tags
14466     if diff_tags:
14467       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14468       raise errors.OpPrereqError("Tag(s) %s not found" %
14469                                  (utils.CommaJoin(diff_names), ),
14470                                  errors.ECODE_NOENT)
14471
14472   def Exec(self, feedback_fn):
14473     """Remove the tag from the object.
14474
14475     """
14476     for tag in self.op.tags:
14477       self.target.RemoveTag(tag)
14478     self.cfg.Update(self.target, feedback_fn)
14479
14480
14481 class LUTestDelay(NoHooksLU):
14482   """Sleep for a specified amount of time.
14483
14484   This LU sleeps on the master and/or nodes for a specified amount of
14485   time.
14486
14487   """
14488   REQ_BGL = False
14489
14490   def ExpandNames(self):
14491     """Expand names and set required locks.
14492
14493     This expands the node list, if any.
14494
14495     """
14496     self.needed_locks = {}
14497     if self.op.on_nodes:
14498       # _GetWantedNodes can be used here, but is not always appropriate to use
14499       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14500       # more information.
14501       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14502       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14503
14504   def _TestDelay(self):
14505     """Do the actual sleep.
14506
14507     """
14508     if self.op.on_master:
14509       if not utils.TestDelay(self.op.duration):
14510         raise errors.OpExecError("Error during master delay test")
14511     if self.op.on_nodes:
14512       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14513       for node, node_result in result.items():
14514         node_result.Raise("Failure during rpc call to node %s" % node)
14515
14516   def Exec(self, feedback_fn):
14517     """Execute the test delay opcode, with the wanted repetitions.
14518
14519     """
14520     if self.op.repeat == 0:
14521       self._TestDelay()
14522     else:
14523       top_value = self.op.repeat - 1
14524       for i in range(self.op.repeat):
14525         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14526         self._TestDelay()
14527
14528
14529 class LUTestJqueue(NoHooksLU):
14530   """Utility LU to test some aspects of the job queue.
14531
14532   """
14533   REQ_BGL = False
14534
14535   # Must be lower than default timeout for WaitForJobChange to see whether it
14536   # notices changed jobs
14537   _CLIENT_CONNECT_TIMEOUT = 20.0
14538   _CLIENT_CONFIRM_TIMEOUT = 60.0
14539
14540   @classmethod
14541   def _NotifyUsingSocket(cls, cb, errcls):
14542     """Opens a Unix socket and waits for another program to connect.
14543
14544     @type cb: callable
14545     @param cb: Callback to send socket name to client
14546     @type errcls: class
14547     @param errcls: Exception class to use for errors
14548
14549     """
14550     # Using a temporary directory as there's no easy way to create temporary
14551     # sockets without writing a custom loop around tempfile.mktemp and
14552     # socket.bind
14553     tmpdir = tempfile.mkdtemp()
14554     try:
14555       tmpsock = utils.PathJoin(tmpdir, "sock")
14556
14557       logging.debug("Creating temporary socket at %s", tmpsock)
14558       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14559       try:
14560         sock.bind(tmpsock)
14561         sock.listen(1)
14562
14563         # Send details to client
14564         cb(tmpsock)
14565
14566         # Wait for client to connect before continuing
14567         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14568         try:
14569           (conn, _) = sock.accept()
14570         except socket.error, err:
14571           raise errcls("Client didn't connect in time (%s)" % err)
14572       finally:
14573         sock.close()
14574     finally:
14575       # Remove as soon as client is connected
14576       shutil.rmtree(tmpdir)
14577
14578     # Wait for client to close
14579     try:
14580       try:
14581         # pylint: disable=E1101
14582         # Instance of '_socketobject' has no ... member
14583         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14584         conn.recv(1)
14585       except socket.error, err:
14586         raise errcls("Client failed to confirm notification (%s)" % err)
14587     finally:
14588       conn.close()
14589
14590   def _SendNotification(self, test, arg, sockname):
14591     """Sends a notification to the client.
14592
14593     @type test: string
14594     @param test: Test name
14595     @param arg: Test argument (depends on test)
14596     @type sockname: string
14597     @param sockname: Socket path
14598
14599     """
14600     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14601
14602   def _Notify(self, prereq, test, arg):
14603     """Notifies the client of a test.
14604
14605     @type prereq: bool
14606     @param prereq: Whether this is a prereq-phase test
14607     @type test: string
14608     @param test: Test name
14609     @param arg: Test argument (depends on test)
14610
14611     """
14612     if prereq:
14613       errcls = errors.OpPrereqError
14614     else:
14615       errcls = errors.OpExecError
14616
14617     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14618                                                   test, arg),
14619                                    errcls)
14620
14621   def CheckArguments(self):
14622     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14623     self.expandnames_calls = 0
14624
14625   def ExpandNames(self):
14626     checkargs_calls = getattr(self, "checkargs_calls", 0)
14627     if checkargs_calls < 1:
14628       raise errors.ProgrammerError("CheckArguments was not called")
14629
14630     self.expandnames_calls += 1
14631
14632     if self.op.notify_waitlock:
14633       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14634
14635     self.LogInfo("Expanding names")
14636
14637     # Get lock on master node (just to get a lock, not for a particular reason)
14638     self.needed_locks = {
14639       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14640       }
14641
14642   def Exec(self, feedback_fn):
14643     if self.expandnames_calls < 1:
14644       raise errors.ProgrammerError("ExpandNames was not called")
14645
14646     if self.op.notify_exec:
14647       self._Notify(False, constants.JQT_EXEC, None)
14648
14649     self.LogInfo("Executing")
14650
14651     if self.op.log_messages:
14652       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14653       for idx, msg in enumerate(self.op.log_messages):
14654         self.LogInfo("Sending log message %s", idx + 1)
14655         feedback_fn(constants.JQT_MSGPREFIX + msg)
14656         # Report how many test messages have been sent
14657         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14658
14659     if self.op.fail:
14660       raise errors.OpExecError("Opcode failure was requested")
14661
14662     return True
14663
14664
14665 class IAllocator(object):
14666   """IAllocator framework.
14667
14668   An IAllocator instance has three sets of attributes:
14669     - cfg that is needed to query the cluster
14670     - input data (all members of the _KEYS class attribute are required)
14671     - four buffer attributes (in|out_data|text), that represent the
14672       input (to the external script) in text and data structure format,
14673       and the output from it, again in two formats
14674     - the result variables from the script (success, info, nodes) for
14675       easy usage
14676
14677   """
14678   # pylint: disable=R0902
14679   # lots of instance attributes
14680
14681   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14682     self.cfg = cfg
14683     self.rpc = rpc_runner
14684     # init buffer variables
14685     self.in_text = self.out_text = self.in_data = self.out_data = None
14686     # init all input fields so that pylint is happy
14687     self.mode = mode
14688     self.memory = self.disks = self.disk_template = self.spindle_use = None
14689     self.os = self.tags = self.nics = self.vcpus = None
14690     self.hypervisor = None
14691     self.relocate_from = None
14692     self.name = None
14693     self.instances = None
14694     self.evac_mode = None
14695     self.target_groups = []
14696     # computed fields
14697     self.required_nodes = None
14698     # init result fields
14699     self.success = self.info = self.result = None
14700
14701     try:
14702       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14703     except KeyError:
14704       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14705                                    " IAllocator" % self.mode)
14706
14707     keyset = [n for (n, _) in keydata]
14708
14709     for key in kwargs:
14710       if key not in keyset:
14711         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14712                                      " IAllocator" % key)
14713       setattr(self, key, kwargs[key])
14714
14715     for key in keyset:
14716       if key not in kwargs:
14717         raise errors.ProgrammerError("Missing input parameter '%s' to"
14718                                      " IAllocator" % key)
14719     self._BuildInputData(compat.partial(fn, self), keydata)
14720
14721   def _ComputeClusterData(self):
14722     """Compute the generic allocator input data.
14723
14724     This is the data that is independent of the actual operation.
14725
14726     """
14727     cfg = self.cfg
14728     cluster_info = cfg.GetClusterInfo()
14729     # cluster data
14730     data = {
14731       "version": constants.IALLOCATOR_VERSION,
14732       "cluster_name": cfg.GetClusterName(),
14733       "cluster_tags": list(cluster_info.GetTags()),
14734       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14735       "ipolicy": cluster_info.ipolicy,
14736       }
14737     ninfo = cfg.GetAllNodesInfo()
14738     iinfo = cfg.GetAllInstancesInfo().values()
14739     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14740
14741     # node data
14742     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14743
14744     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14745       hypervisor_name = self.hypervisor
14746     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14747       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14748     else:
14749       hypervisor_name = cluster_info.primary_hypervisor
14750
14751     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14752                                         [hypervisor_name])
14753     node_iinfo = \
14754       self.rpc.call_all_instances_info(node_list,
14755                                        cluster_info.enabled_hypervisors)
14756
14757     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14758
14759     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14760     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14761                                                  i_list, config_ndata)
14762     assert len(data["nodes"]) == len(ninfo), \
14763         "Incomplete node data computed"
14764
14765     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14766
14767     self.in_data = data
14768
14769   @staticmethod
14770   def _ComputeNodeGroupData(cfg):
14771     """Compute node groups data.
14772
14773     """
14774     cluster = cfg.GetClusterInfo()
14775     ng = dict((guuid, {
14776       "name": gdata.name,
14777       "alloc_policy": gdata.alloc_policy,
14778       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14779       })
14780       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14781
14782     return ng
14783
14784   @staticmethod
14785   def _ComputeBasicNodeData(cfg, node_cfg):
14786     """Compute global node data.
14787
14788     @rtype: dict
14789     @returns: a dict of name: (node dict, node config)
14790
14791     """
14792     # fill in static (config-based) values
14793     node_results = dict((ninfo.name, {
14794       "tags": list(ninfo.GetTags()),
14795       "primary_ip": ninfo.primary_ip,
14796       "secondary_ip": ninfo.secondary_ip,
14797       "offline": ninfo.offline,
14798       "drained": ninfo.drained,
14799       "master_candidate": ninfo.master_candidate,
14800       "group": ninfo.group,
14801       "master_capable": ninfo.master_capable,
14802       "vm_capable": ninfo.vm_capable,
14803       "ndparams": cfg.GetNdParams(ninfo),
14804       })
14805       for ninfo in node_cfg.values())
14806
14807     return node_results
14808
14809   @staticmethod
14810   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14811                               node_results):
14812     """Compute global node data.
14813
14814     @param node_results: the basic node structures as filled from the config
14815
14816     """
14817     #TODO(dynmem): compute the right data on MAX and MIN memory
14818     # make a copy of the current dict
14819     node_results = dict(node_results)
14820     for nname, nresult in node_data.items():
14821       assert nname in node_results, "Missing basic data for node %s" % nname
14822       ninfo = node_cfg[nname]
14823
14824       if not (ninfo.offline or ninfo.drained):
14825         nresult.Raise("Can't get data for node %s" % nname)
14826         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14827                                 nname)
14828         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14829
14830         for attr in ["memory_total", "memory_free", "memory_dom0",
14831                      "vg_size", "vg_free", "cpu_total"]:
14832           if attr not in remote_info:
14833             raise errors.OpExecError("Node '%s' didn't return attribute"
14834                                      " '%s'" % (nname, attr))
14835           if not isinstance(remote_info[attr], int):
14836             raise errors.OpExecError("Node '%s' returned invalid value"
14837                                      " for '%s': %s" %
14838                                      (nname, attr, remote_info[attr]))
14839         # compute memory used by primary instances
14840         i_p_mem = i_p_up_mem = 0
14841         for iinfo, beinfo in i_list:
14842           if iinfo.primary_node == nname:
14843             i_p_mem += beinfo[constants.BE_MAXMEM]
14844             if iinfo.name not in node_iinfo[nname].payload:
14845               i_used_mem = 0
14846             else:
14847               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14848             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14849             remote_info["memory_free"] -= max(0, i_mem_diff)
14850
14851             if iinfo.admin_state == constants.ADMINST_UP:
14852               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14853
14854         # compute memory used by instances
14855         pnr_dyn = {
14856           "total_memory": remote_info["memory_total"],
14857           "reserved_memory": remote_info["memory_dom0"],
14858           "free_memory": remote_info["memory_free"],
14859           "total_disk": remote_info["vg_size"],
14860           "free_disk": remote_info["vg_free"],
14861           "total_cpus": remote_info["cpu_total"],
14862           "i_pri_memory": i_p_mem,
14863           "i_pri_up_memory": i_p_up_mem,
14864           }
14865         pnr_dyn.update(node_results[nname])
14866         node_results[nname] = pnr_dyn
14867
14868     return node_results
14869
14870   @staticmethod
14871   def _ComputeInstanceData(cluster_info, i_list):
14872     """Compute global instance data.
14873
14874     """
14875     instance_data = {}
14876     for iinfo, beinfo in i_list:
14877       nic_data = []
14878       for nic in iinfo.nics:
14879         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14880         nic_dict = {
14881           "mac": nic.mac,
14882           "ip": nic.ip,
14883           "mode": filled_params[constants.NIC_MODE],
14884           "link": filled_params[constants.NIC_LINK],
14885           }
14886         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14887           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14888         nic_data.append(nic_dict)
14889       pir = {
14890         "tags": list(iinfo.GetTags()),
14891         "admin_state": iinfo.admin_state,
14892         "vcpus": beinfo[constants.BE_VCPUS],
14893         "memory": beinfo[constants.BE_MAXMEM],
14894         "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14895         "os": iinfo.os,
14896         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14897         "nics": nic_data,
14898         "disks": [{constants.IDISK_SIZE: dsk.size,
14899                    constants.IDISK_MODE: dsk.mode}
14900                   for dsk in iinfo.disks],
14901         "disk_template": iinfo.disk_template,
14902         "hypervisor": iinfo.hypervisor,
14903         }
14904       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14905                                                  pir["disks"])
14906       instance_data[iinfo.name] = pir
14907
14908     return instance_data
14909
14910   def _AddNewInstance(self):
14911     """Add new instance data to allocator structure.
14912
14913     This in combination with _AllocatorGetClusterData will create the
14914     correct structure needed as input for the allocator.
14915
14916     The checks for the completeness of the opcode must have already been
14917     done.
14918
14919     """
14920     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14921
14922     if self.disk_template in constants.DTS_INT_MIRROR:
14923       self.required_nodes = 2
14924     else:
14925       self.required_nodes = 1
14926
14927     request = {
14928       "name": self.name,
14929       "disk_template": self.disk_template,
14930       "tags": self.tags,
14931       "os": self.os,
14932       "vcpus": self.vcpus,
14933       "memory": self.memory,
14934       "spindle_use": self.spindle_use,
14935       "disks": self.disks,
14936       "disk_space_total": disk_space,
14937       "nics": self.nics,
14938       "required_nodes": self.required_nodes,
14939       "hypervisor": self.hypervisor,
14940       }
14941
14942     return request
14943
14944   def _AddRelocateInstance(self):
14945     """Add relocate instance data to allocator structure.
14946
14947     This in combination with _IAllocatorGetClusterData will create the
14948     correct structure needed as input for the allocator.
14949
14950     The checks for the completeness of the opcode must have already been
14951     done.
14952
14953     """
14954     instance = self.cfg.GetInstanceInfo(self.name)
14955     if instance is None:
14956       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14957                                    " IAllocator" % self.name)
14958
14959     if instance.disk_template not in constants.DTS_MIRRORED:
14960       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14961                                  errors.ECODE_INVAL)
14962
14963     if instance.disk_template in constants.DTS_INT_MIRROR and \
14964         len(instance.secondary_nodes) != 1:
14965       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14966                                  errors.ECODE_STATE)
14967
14968     self.required_nodes = 1
14969     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14970     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14971
14972     request = {
14973       "name": self.name,
14974       "disk_space_total": disk_space,
14975       "required_nodes": self.required_nodes,
14976       "relocate_from": self.relocate_from,
14977       }
14978     return request
14979
14980   def _AddNodeEvacuate(self):
14981     """Get data for node-evacuate requests.
14982
14983     """
14984     return {
14985       "instances": self.instances,
14986       "evac_mode": self.evac_mode,
14987       }
14988
14989   def _AddChangeGroup(self):
14990     """Get data for node-evacuate requests.
14991
14992     """
14993     return {
14994       "instances": self.instances,
14995       "target_groups": self.target_groups,
14996       }
14997
14998   def _BuildInputData(self, fn, keydata):
14999     """Build input data structures.
15000
15001     """
15002     self._ComputeClusterData()
15003
15004     request = fn()
15005     request["type"] = self.mode
15006     for keyname, keytype in keydata:
15007       if keyname not in request:
15008         raise errors.ProgrammerError("Request parameter %s is missing" %
15009                                      keyname)
15010       val = request[keyname]
15011       if not keytype(val):
15012         raise errors.ProgrammerError("Request parameter %s doesn't pass"
15013                                      " validation, value %s, expected"
15014                                      " type %s" % (keyname, val, keytype))
15015     self.in_data["request"] = request
15016
15017     self.in_text = serializer.Dump(self.in_data)
15018
15019   _STRING_LIST = ht.TListOf(ht.TString)
15020   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15021      # pylint: disable=E1101
15022      # Class '...' has no 'OP_ID' member
15023      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15024                           opcodes.OpInstanceMigrate.OP_ID,
15025                           opcodes.OpInstanceReplaceDisks.OP_ID])
15026      })))
15027
15028   _NEVAC_MOVED = \
15029     ht.TListOf(ht.TAnd(ht.TIsLength(3),
15030                        ht.TItems([ht.TNonEmptyString,
15031                                   ht.TNonEmptyString,
15032                                   ht.TListOf(ht.TNonEmptyString),
15033                                  ])))
15034   _NEVAC_FAILED = \
15035     ht.TListOf(ht.TAnd(ht.TIsLength(2),
15036                        ht.TItems([ht.TNonEmptyString,
15037                                   ht.TMaybeString,
15038                                  ])))
15039   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15040                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15041
15042   _MODE_DATA = {
15043     constants.IALLOCATOR_MODE_ALLOC:
15044       (_AddNewInstance,
15045        [
15046         ("name", ht.TString),
15047         ("memory", ht.TInt),
15048         ("spindle_use", ht.TInt),
15049         ("disks", ht.TListOf(ht.TDict)),
15050         ("disk_template", ht.TString),
15051         ("os", ht.TString),
15052         ("tags", _STRING_LIST),
15053         ("nics", ht.TListOf(ht.TDict)),
15054         ("vcpus", ht.TInt),
15055         ("hypervisor", ht.TString),
15056         ], ht.TList),
15057     constants.IALLOCATOR_MODE_RELOC:
15058       (_AddRelocateInstance,
15059        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15060        ht.TList),
15061      constants.IALLOCATOR_MODE_NODE_EVAC:
15062       (_AddNodeEvacuate, [
15063         ("instances", _STRING_LIST),
15064         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15065         ], _NEVAC_RESULT),
15066      constants.IALLOCATOR_MODE_CHG_GROUP:
15067       (_AddChangeGroup, [
15068         ("instances", _STRING_LIST),
15069         ("target_groups", _STRING_LIST),
15070         ], _NEVAC_RESULT),
15071     }
15072
15073   def Run(self, name, validate=True, call_fn=None):
15074     """Run an instance allocator and return the results.
15075
15076     """
15077     if call_fn is None:
15078       call_fn = self.rpc.call_iallocator_runner
15079
15080     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15081     result.Raise("Failure while running the iallocator script")
15082
15083     self.out_text = result.payload
15084     if validate:
15085       self._ValidateResult()
15086
15087   def _ValidateResult(self):
15088     """Process the allocator results.
15089
15090     This will process and if successful save the result in
15091     self.out_data and the other parameters.
15092
15093     """
15094     try:
15095       rdict = serializer.Load(self.out_text)
15096     except Exception, err:
15097       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15098
15099     if not isinstance(rdict, dict):
15100       raise errors.OpExecError("Can't parse iallocator results: not a dict")
15101
15102     # TODO: remove backwards compatiblity in later versions
15103     if "nodes" in rdict and "result" not in rdict:
15104       rdict["result"] = rdict["nodes"]
15105       del rdict["nodes"]
15106
15107     for key in "success", "info", "result":
15108       if key not in rdict:
15109         raise errors.OpExecError("Can't parse iallocator results:"
15110                                  " missing key '%s'" % key)
15111       setattr(self, key, rdict[key])
15112
15113     if not self._result_check(self.result):
15114       raise errors.OpExecError("Iallocator returned invalid result,"
15115                                " expected %s, got %s" %
15116                                (self._result_check, self.result),
15117                                errors.ECODE_INVAL)
15118
15119     if self.mode == constants.IALLOCATOR_MODE_RELOC:
15120       assert self.relocate_from is not None
15121       assert self.required_nodes == 1
15122
15123       node2group = dict((name, ndata["group"])
15124                         for (name, ndata) in self.in_data["nodes"].items())
15125
15126       fn = compat.partial(self._NodesToGroups, node2group,
15127                           self.in_data["nodegroups"])
15128
15129       instance = self.cfg.GetInstanceInfo(self.name)
15130       request_groups = fn(self.relocate_from + [instance.primary_node])
15131       result_groups = fn(rdict["result"] + [instance.primary_node])
15132
15133       if self.success and not set(result_groups).issubset(request_groups):
15134         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15135                                  " differ from original groups (%s)" %
15136                                  (utils.CommaJoin(result_groups),
15137                                   utils.CommaJoin(request_groups)))
15138
15139     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15140       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15141
15142     self.out_data = rdict
15143
15144   @staticmethod
15145   def _NodesToGroups(node2group, groups, nodes):
15146     """Returns a list of unique group names for a list of nodes.
15147
15148     @type node2group: dict
15149     @param node2group: Map from node name to group UUID
15150     @type groups: dict
15151     @param groups: Group information
15152     @type nodes: list
15153     @param nodes: Node names
15154
15155     """
15156     result = set()
15157
15158     for node in nodes:
15159       try:
15160         group_uuid = node2group[node]
15161       except KeyError:
15162         # Ignore unknown node
15163         pass
15164       else:
15165         try:
15166           group = groups[group_uuid]
15167         except KeyError:
15168           # Can't find group, let's use UUID
15169           group_name = group_uuid
15170         else:
15171           group_name = group["name"]
15172
15173         result.add(group_name)
15174
15175     return sorted(result)
15176
15177
15178 class LUTestAllocator(NoHooksLU):
15179   """Run allocator tests.
15180
15181   This LU runs the allocator tests
15182
15183   """
15184   def CheckPrereq(self):
15185     """Check prerequisites.
15186
15187     This checks the opcode parameters depending on the director and mode test.
15188
15189     """
15190     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15191       for attr in ["memory", "disks", "disk_template",
15192                    "os", "tags", "nics", "vcpus"]:
15193         if not hasattr(self.op, attr):
15194           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15195                                      attr, errors.ECODE_INVAL)
15196       iname = self.cfg.ExpandInstanceName(self.op.name)
15197       if iname is not None:
15198         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15199                                    iname, errors.ECODE_EXISTS)
15200       if not isinstance(self.op.nics, list):
15201         raise errors.OpPrereqError("Invalid parameter 'nics'",
15202                                    errors.ECODE_INVAL)
15203       if not isinstance(self.op.disks, list):
15204         raise errors.OpPrereqError("Invalid parameter 'disks'",
15205                                    errors.ECODE_INVAL)
15206       for row in self.op.disks:
15207         if (not isinstance(row, dict) or
15208             constants.IDISK_SIZE not in row or
15209             not isinstance(row[constants.IDISK_SIZE], int) or
15210             constants.IDISK_MODE not in row or
15211             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15212           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15213                                      " parameter", errors.ECODE_INVAL)
15214       if self.op.hypervisor is None:
15215         self.op.hypervisor = self.cfg.GetHypervisorType()
15216     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15217       fname = _ExpandInstanceName(self.cfg, self.op.name)
15218       self.op.name = fname
15219       self.relocate_from = \
15220           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15221     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15222                           constants.IALLOCATOR_MODE_NODE_EVAC):
15223       if not self.op.instances:
15224         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15225       self.op.instances = _GetWantedInstances(self, self.op.instances)
15226     else:
15227       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15228                                  self.op.mode, errors.ECODE_INVAL)
15229
15230     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15231       if self.op.allocator is None:
15232         raise errors.OpPrereqError("Missing allocator name",
15233                                    errors.ECODE_INVAL)
15234     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15235       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15236                                  self.op.direction, errors.ECODE_INVAL)
15237
15238   def Exec(self, feedback_fn):
15239     """Run the allocator test.
15240
15241     """
15242     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15243       ial = IAllocator(self.cfg, self.rpc,
15244                        mode=self.op.mode,
15245                        name=self.op.name,
15246                        memory=self.op.memory,
15247                        disks=self.op.disks,
15248                        disk_template=self.op.disk_template,
15249                        os=self.op.os,
15250                        tags=self.op.tags,
15251                        nics=self.op.nics,
15252                        vcpus=self.op.vcpus,
15253                        hypervisor=self.op.hypervisor,
15254                        )
15255     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15256       ial = IAllocator(self.cfg, self.rpc,
15257                        mode=self.op.mode,
15258                        name=self.op.name,
15259                        relocate_from=list(self.relocate_from),
15260                        )
15261     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15262       ial = IAllocator(self.cfg, self.rpc,
15263                        mode=self.op.mode,
15264                        instances=self.op.instances,
15265                        target_groups=self.op.target_groups)
15266     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15267       ial = IAllocator(self.cfg, self.rpc,
15268                        mode=self.op.mode,
15269                        instances=self.op.instances,
15270                        evac_mode=self.op.evac_mode)
15271     else:
15272       raise errors.ProgrammerError("Uncatched mode %s in"
15273                                    " LUTestAllocator.Exec", self.op.mode)
15274
15275     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15276       result = ial.in_text
15277     else:
15278       ial.Run(self.op.allocator, validate=False)
15279       result = ial.out_text
15280     return result
15281
15282
15283 #: Query type implementations
15284 _QUERY_IMPL = {
15285   constants.QR_CLUSTER: _ClusterQuery,
15286   constants.QR_INSTANCE: _InstanceQuery,
15287   constants.QR_NODE: _NodeQuery,
15288   constants.QR_GROUP: _GroupQuery,
15289   constants.QR_OS: _OsQuery,
15290   constants.QR_EXPORT: _ExportQuery,
15291   }
15292
15293 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15294
15295
15296 def _GetQueryImplementation(name):
15297   """Returns the implemtnation for a query type.
15298
15299   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15300
15301   """
15302   try:
15303     return _QUERY_IMPL[name]
15304   except KeyError:
15305     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15306                                errors.ECODE_INVAL)