code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60 from ganeti import ht
  61 from ganeti import rpc
  62 from ganeti import runtime
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   #: Field to sort by
 497   SORT_FIELD = "name"
 498
 499   def __init__(self, qfilter, fields, use_locking):
 500     """Initializes this class.
 501
 502     """
 503     self.use_locking = use_locking
 504
 505     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 506                              namefield=self.SORT_FIELD)
 507     self.requested_data = self.query.RequestedData()
 508     self.names = self.query.RequestedNames()
 509
 510     # Sort only if no names were requested
 511     self.sort_by_name = not self.names
 512
 513     self.do_locking = None
 514     self.wanted = None
 515
 516   def _GetNames(self, lu, all_names, lock_level):
 517     """Helper function to determine names asked for in the query.
 518
 519     """
 520     if self.do_locking:
 521       names = lu.owned_locks(lock_level)
 522     else:
 523       names = all_names
 524
 525     if self.wanted == locking.ALL_SET:
 526       assert not self.names
 527       # caller didn't specify names, so ordering is not important
 528       return utils.NiceSort(names)
 529
 530     # caller specified names and we must keep the same order
 531     assert self.names
 532     assert not self.do_locking or lu.glm.is_owned(lock_level)
 533
 534     missing = set(self.wanted).difference(names)
 535     if missing:
 536       raise errors.OpExecError("Some items were removed before retrieving"
 537                                " their data: %s" % missing)
 538
 539     # Return expanded names
 540     return self.wanted
 541
 542   def ExpandNames(self, lu):
 543     """Expand names for this query.
 544
 545     See L{LogicalUnit.ExpandNames}.
 546
 547     """
 548     raise NotImplementedError()
 549
 550   def DeclareLocks(self, lu, level):
 551     """Declare locks for this query.
 552
 553     See L{LogicalUnit.DeclareLocks}.
 554
 555     """
 556     raise NotImplementedError()
 557
 558   def _GetQueryData(self, lu):
 559     """Collects all data for this query.
 560
 561     @return: Query data object
 562
 563     """
 564     raise NotImplementedError()
 565
 566   def NewStyleQuery(self, lu):
 567     """Collect data and execute query.
 568
 569     """
 570     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 571                                   sort_by_name=self.sort_by_name)
 572
 573   def OldStyleQuery(self, lu):
 574     """Collect data and execute query.
 575
 576     """
 577     return self.query.OldStyleQuery(self._GetQueryData(lu),
 578                                     sort_by_name=self.sort_by_name)
 579
 580
 581 def _ShareAll():
 582   """Returns a dict declaring all lock levels shared.
 583
 584   """
 585   return dict.fromkeys(locking.LEVELS, 1)
 586
 587
 588 def _MakeLegacyNodeInfo(data):
 589   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 590
 591   Converts the data into a single dictionary. This is fine for most use cases,
 592   but some require information from more than one volume group or hypervisor.
 593
 594   """
 595   (bootid, (vg_info, ), (hv_info, )) = data
 596
 597   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 598     "bootid": bootid,
 599     })
 600
 601
 602 def _AnnotateDiskParams(instance, devs, cfg):
 603   """Little helper wrapper to the rpc annotation method.
 604
 605   @param instance: The instance object
 606   @type devs: List of L{objects.Disk}
 607   @param devs: The root devices (not any of its children!)
 608   @param cfg: The config object
 609   @returns The annotated disk copies
 610   @see L{rpc.AnnotateDiskParams}
 611
 612   """
 613   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 614                                 cfg.GetInstanceDiskParams(instance))
 615
 616
 617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 618                               cur_group_uuid):
 619   """Checks if node groups for locked instances are still correct.
 620
 621   @type cfg: L{config.ConfigWriter}
 622   @param cfg: Cluster configuration
 623   @type instances: dict; string as key, L{objects.Instance} as value
 624   @param instances: Dictionary, instance name as key, instance object as value
 625   @type owned_groups: iterable of string
 626   @param owned_groups: List of owned groups
 627   @type owned_nodes: iterable of string
 628   @param owned_nodes: List of owned nodes
 629   @type cur_group_uuid: string or None
 630   @param cur_group_uuid: Optional group UUID to check against instance's groups
 631
 632   """
 633   for (name, inst) in instances.items():
 634     assert owned_nodes.issuperset(inst.all_nodes), \
 635       "Instance %s's nodes changed while we kept the lock" % name
 636
 637     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 638
 639     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 640       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 641
 642
 643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 644   """Checks if the owned node groups are still correct for an instance.
 645
 646   @type cfg: L{config.ConfigWriter}
 647   @param cfg: The cluster configuration
 648   @type instance_name: string
 649   @param instance_name: Instance name
 650   @type owned_groups: set or frozenset
 651   @param owned_groups: List of currently owned node groups
 652
 653   """
 654   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 655
 656   if not owned_groups.issuperset(inst_groups):
 657     raise errors.OpPrereqError("Instance %s's node groups changed since"
 658                                " locks were acquired, current groups are"
 659                                " are '%s', owning groups '%s'; retry the"
 660                                " operation" %
 661                                (instance_name,
 662                                 utils.CommaJoin(inst_groups),
 663                                 utils.CommaJoin(owned_groups)),
 664                                errors.ECODE_STATE)
 665
 666   return inst_groups
 667
 668
 669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 670   """Checks if the instances in a node group are still correct.
 671
 672   @type cfg: L{config.ConfigWriter}
 673   @param cfg: The cluster configuration
 674   @type group_uuid: string
 675   @param group_uuid: Node group UUID
 676   @type owned_instances: set or frozenset
 677   @param owned_instances: List of currently owned instances
 678
 679   """
 680   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 681   if owned_instances != wanted_instances:
 682     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 683                                " locks were acquired, wanted '%s', have '%s';"
 684                                " retry the operation" %
 685                                (group_uuid,
 686                                 utils.CommaJoin(wanted_instances),
 687                                 utils.CommaJoin(owned_instances)),
 688                                errors.ECODE_STATE)
 689
 690   return wanted_instances
 691
 692
 693 def _SupportsOob(cfg, node):
 694   """Tells if node supports OOB.
 695
 696   @type cfg: L{config.ConfigWriter}
 697   @param cfg: The cluster configuration
 698   @type node: L{objects.Node}
 699   @param node: The node
 700   @return: The OOB script if supported or an empty string otherwise
 701
 702   """
 703   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 704
 705
 706 def _CopyLockList(names):
 707   """Makes a copy of a list of lock names.
 708
 709   Handles L{locking.ALL_SET} correctly.
 710
 711   """
 712   if names == locking.ALL_SET:
 713     return locking.ALL_SET
 714   else:
 715     return names[:]
 716
 717
 718 def _GetWantedNodes(lu, nodes):
 719   """Returns list of checked and expanded node names.
 720
 721   @type lu: L{LogicalUnit}
 722   @param lu: the logical unit on whose behalf we execute
 723   @type nodes: list
 724   @param nodes: list of node names or None for all nodes
 725   @rtype: list
 726   @return: the list of nodes, sorted
 727   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 728
 729   """
 730   if nodes:
 731     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 732
 733   return utils.NiceSort(lu.cfg.GetNodeList())
 734
 735
 736 def _GetWantedInstances(lu, instances):
 737   """Returns list of checked and expanded instance names.
 738
 739   @type lu: L{LogicalUnit}
 740   @param lu: the logical unit on whose behalf we execute
 741   @type instances: list
 742   @param instances: list of instance names or None for all instances
 743   @rtype: list
 744   @return: the list of instances, sorted
 745   @raise errors.OpPrereqError: if the instances parameter is wrong type
 746   @raise errors.OpPrereqError: if any of the passed instances is not found
 747
 748   """
 749   if instances:
 750     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 751   else:
 752     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 753   return wanted
 754
 755
 756 def _GetUpdatedParams(old_params, update_dict,
 757                       use_default=True, use_none=False):
 758   """Return the new version of a parameter dictionary.
 759
 760   @type old_params: dict
 761   @param old_params: old parameters
 762   @type update_dict: dict
 763   @param update_dict: dict containing new parameter values, or
 764       constants.VALUE_DEFAULT to reset the parameter to its default
 765       value
 766   @param use_default: boolean
 767   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 768       values as 'to be deleted' values
 769   @param use_none: boolean
 770   @type use_none: whether to recognise C{None} values as 'to be
 771       deleted' values
 772   @rtype: dict
 773   @return: the new parameter dictionary
 774
 775   """
 776   params_copy = copy.deepcopy(old_params)
 777   for key, val in update_dict.iteritems():
 778     if ((use_default and val == constants.VALUE_DEFAULT) or
 779         (use_none and val is None)):
 780       try:
 781         del params_copy[key]
 782       except KeyError:
 783         pass
 784     else:
 785       params_copy[key] = val
 786   return params_copy
 787
 788
 789 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 790   """Return the new version of a instance policy.
 791
 792   @param group_policy: whether this policy applies to a group and thus
 793     we should support removal of policy entries
 794
 795   """
 796   use_none = use_default = group_policy
 797   ipolicy = copy.deepcopy(old_ipolicy)
 798   for key, value in new_ipolicy.items():
 799     if key not in constants.IPOLICY_ALL_KEYS:
 800       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 801                                  errors.ECODE_INVAL)
 802     if key in constants.IPOLICY_ISPECS:
 803       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 804       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 805                                        use_none=use_none,
 806                                        use_default=use_default)
 807     else:
 808       if (not value or value == [constants.VALUE_DEFAULT] or
 809           value == constants.VALUE_DEFAULT):
 810         if group_policy:
 811           del ipolicy[key]
 812         else:
 813           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 814                                      " on the cluster'" % key,
 815                                      errors.ECODE_INVAL)
 816       else:
 817         if key in constants.IPOLICY_PARAMETERS:
 818           # FIXME: we assume all such values are float
 819           try:
 820             ipolicy[key] = float(value)
 821           except (TypeError, ValueError), err:
 822             raise errors.OpPrereqError("Invalid value for attribute"
 823                                        " '%s': '%s', error: %s" %
 824                                        (key, value, err), errors.ECODE_INVAL)
 825         else:
 826           # FIXME: we assume all others are lists; this should be redone
 827           # in a nicer way
 828           ipolicy[key] = list(value)
 829   try:
 830     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 831   except errors.ConfigurationError, err:
 832     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 833                                errors.ECODE_INVAL)
 834   return ipolicy
 835
 836
 837 def _UpdateAndVerifySubDict(base, updates, type_check):
 838   """Updates and verifies a dict with sub dicts of the same type.
 839
 840   @param base: The dict with the old data
 841   @param updates: The dict with the new data
 842   @param type_check: Dict suitable to ForceDictType to verify correct types
 843   @returns: A new dict with updated and verified values
 844
 845   """
 846   def fn(old, value):
 847     new = _GetUpdatedParams(old, value)
 848     utils.ForceDictType(new, type_check)
 849     return new
 850
 851   ret = copy.deepcopy(base)
 852   ret.update(dict((key, fn(base.get(key, {}), value))
 853                   for key, value in updates.items()))
 854   return ret
 855
 856
 857 def _MergeAndVerifyHvState(op_input, obj_input):
 858   """Combines the hv state from an opcode with the one of the object
 859
 860   @param op_input: The input dict from the opcode
 861   @param obj_input: The input dict from the objects
 862   @return: The verified and updated dict
 863
 864   """
 865   if op_input:
 866     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 867     if invalid_hvs:
 868       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 869                                  " %s" % utils.CommaJoin(invalid_hvs),
 870                                  errors.ECODE_INVAL)
 871     if obj_input is None:
 872       obj_input = {}
 873     type_check = constants.HVSTS_PARAMETER_TYPES
 874     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 875
 876   return None
 877
 878
 879 def _MergeAndVerifyDiskState(op_input, obj_input):
 880   """Combines the disk state from an opcode with the one of the object
 881
 882   @param op_input: The input dict from the opcode
 883   @param obj_input: The input dict from the objects
 884   @return: The verified and updated dict
 885   """
 886   if op_input:
 887     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 888     if invalid_dst:
 889       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 890                                  utils.CommaJoin(invalid_dst),
 891                                  errors.ECODE_INVAL)
 892     type_check = constants.DSS_PARAMETER_TYPES
 893     if obj_input is None:
 894       obj_input = {}
 895     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 896                                               type_check))
 897                 for key, value in op_input.items())
 898
 899   return None
 900
 901
 902 def _ReleaseLocks(lu, level, names=None, keep=None):
 903   """Releases locks owned by an LU.
 904
 905   @type lu: L{LogicalUnit}
 906   @param level: Lock level
 907   @type names: list or None
 908   @param names: Names of locks to release
 909   @type keep: list or None
 910   @param keep: Names of locks to retain
 911
 912   """
 913   assert not (keep is not None and names is not None), \
 914          "Only one of the 'names' and the 'keep' parameters can be given"
 915
 916   if names is not None:
 917     should_release = names.__contains__
 918   elif keep:
 919     should_release = lambda name: name not in keep
 920   else:
 921     should_release = None
 922
 923   owned = lu.owned_locks(level)
 924   if not owned:
 925     # Not owning any lock at this level, do nothing
 926     pass
 927
 928   elif should_release:
 929     retain = []
 930     release = []
 931
 932     # Determine which locks to release
 933     for name in owned:
 934       if should_release(name):
 935         release.append(name)
 936       else:
 937         retain.append(name)
 938
 939     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 940
 941     # Release just some locks
 942     lu.glm.release(level, names=release)
 943
 944     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 945   else:
 946     # Release everything
 947     lu.glm.release(level)
 948
 949     assert not lu.glm.is_owned(level), "No locks should be owned"
 950
 951
 952 def _MapInstanceDisksToNodes(instances):
 953   """Creates a map from (node, volume) to instance name.
 954
 955   @type instances: list of L{objects.Instance}
 956   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 957
 958   """
 959   return dict(((node, vol), inst.name)
 960               for inst in instances
 961               for (node, vols) in inst.MapLVsByNode().items()
 962               for vol in vols)
 963
 964
 965 def _RunPostHook(lu, node_name):
 966   """Runs the post-hook for an opcode on a single node.
 967
 968   """
 969   hm = lu.proc.BuildHooksManager(lu)
 970   try:
 971     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 972   except Exception, err: # pylint: disable=W0703
 973     lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
 974
 975
 976 def _CheckOutputFields(static, dynamic, selected):
 977   """Checks whether all selected fields are valid.
 978
 979   @type static: L{utils.FieldSet}
 980   @param static: static fields set
 981   @type dynamic: L{utils.FieldSet}
 982   @param dynamic: dynamic fields set
 983
 984   """
 985   f = utils.FieldSet()
 986   f.Extend(static)
 987   f.Extend(dynamic)
 988
 989   delta = f.NonMatching(selected)
 990   if delta:
 991     raise errors.OpPrereqError("Unknown output fields selected: %s"
 992                                % ",".join(delta), errors.ECODE_INVAL)
 993
 994
 995 def _CheckGlobalHvParams(params):
 996   """Validates that given hypervisor params are not global ones.
 997
 998   This will ensure that instances don't get customised versions of
 999   global params.
1000
1001   """
1002   used_globals = constants.HVC_GLOBALS.intersection(params)
1003   if used_globals:
1004     msg = ("The following hypervisor parameters are global and cannot"
1005            " be customized at instance level, please modify them at"
1006            " cluster level: %s" % utils.CommaJoin(used_globals))
1007     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1008
1009
1010 def _CheckNodeOnline(lu, node, msg=None):
1011   """Ensure that a given node is online.
1012
1013   @param lu: the LU on behalf of which we make the check
1014   @param node: the node to check
1015   @param msg: if passed, should be a message to replace the default one
1016   @raise errors.OpPrereqError: if the node is offline
1017
1018   """
1019   if msg is None:
1020     msg = "Can't use offline node"
1021   if lu.cfg.GetNodeInfo(node).offline:
1022     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1023
1024
1025 def _CheckNodeNotDrained(lu, node):
1026   """Ensure that a given node is not drained.
1027
1028   @param lu: the LU on behalf of which we make the check
1029   @param node: the node to check
1030   @raise errors.OpPrereqError: if the node is drained
1031
1032   """
1033   if lu.cfg.GetNodeInfo(node).drained:
1034     raise errors.OpPrereqError("Can't use drained node %s" % node,
1035                                errors.ECODE_STATE)
1036
1037
1038 def _CheckNodeVmCapable(lu, node):
1039   """Ensure that a given node is vm capable.
1040
1041   @param lu: the LU on behalf of which we make the check
1042   @param node: the node to check
1043   @raise errors.OpPrereqError: if the node is not vm capable
1044
1045   """
1046   if not lu.cfg.GetNodeInfo(node).vm_capable:
1047     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1048                                errors.ECODE_STATE)
1049
1050
1051 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1052   """Ensure that a node supports a given OS.
1053
1054   @param lu: the LU on behalf of which we make the check
1055   @param node: the node to check
1056   @param os_name: the OS to query about
1057   @param force_variant: whether to ignore variant errors
1058   @raise errors.OpPrereqError: if the node is not supporting the OS
1059
1060   """
1061   result = lu.rpc.call_os_get(node, os_name)
1062   result.Raise("OS '%s' not in supported OS list for node %s" %
1063                (os_name, node),
1064                prereq=True, ecode=errors.ECODE_INVAL)
1065   if not force_variant:
1066     _CheckOSVariant(result.payload, os_name)
1067
1068
1069 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1070   """Ensure that a node has the given secondary ip.
1071
1072   @type lu: L{LogicalUnit}
1073   @param lu: the LU on behalf of which we make the check
1074   @type node: string
1075   @param node: the node to check
1076   @type secondary_ip: string
1077   @param secondary_ip: the ip to check
1078   @type prereq: boolean
1079   @param prereq: whether to throw a prerequisite or an execute error
1080   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1081   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1082
1083   """
1084   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1085   result.Raise("Failure checking secondary ip on node %s" % node,
1086                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1087   if not result.payload:
1088     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1089            " please fix and re-run this command" % secondary_ip)
1090     if prereq:
1091       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1092     else:
1093       raise errors.OpExecError(msg)
1094
1095
1096 def _GetClusterDomainSecret():
1097   """Reads the cluster domain secret.
1098
1099   """
1100   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1101                                strict=True)
1102
1103
1104 def _CheckInstanceState(lu, instance, req_states, msg=None):
1105   """Ensure that an instance is in one of the required states.
1106
1107   @param lu: the LU on behalf of which we make the check
1108   @param instance: the instance to check
1109   @param msg: if passed, should be a message to replace the default one
1110   @raise errors.OpPrereqError: if the instance is not in the required state
1111
1112   """
1113   if msg is None:
1114     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1115   if instance.admin_state not in req_states:
1116     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1117                                (instance.name, instance.admin_state, msg),
1118                                errors.ECODE_STATE)
1119
1120   if constants.ADMINST_UP not in req_states:
1121     pnode = instance.primary_node
1122     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1123     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1124                 prereq=True, ecode=errors.ECODE_ENVIRON)
1125
1126     if instance.name in ins_l.payload:
1127       raise errors.OpPrereqError("Instance %s is running, %s" %
1128                                  (instance.name, msg), errors.ECODE_STATE)
1129
1130
1131 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1132   """Computes if value is in the desired range.
1133
1134   @param name: name of the parameter for which we perform the check
1135   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1136       not just 'disk')
1137   @param ipolicy: dictionary containing min, max and std values
1138   @param value: actual value that we want to use
1139   @return: None or element not meeting the criteria
1140
1141
1142   """
1143   if value in [None, constants.VALUE_AUTO]:
1144     return None
1145   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1146   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1147   if value > max_v or min_v > value:
1148     if qualifier:
1149       fqn = "%s/%s" % (name, qualifier)
1150     else:
1151       fqn = name
1152     return ("%s value %s is not in range [%s, %s]" %
1153             (fqn, value, min_v, max_v))
1154   return None
1155
1156
1157 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1158                                  nic_count, disk_sizes, spindle_use,
1159                                  _compute_fn=_ComputeMinMaxSpec):
1160   """Verifies ipolicy against provided specs.
1161
1162   @type ipolicy: dict
1163   @param ipolicy: The ipolicy
1164   @type mem_size: int
1165   @param mem_size: The memory size
1166   @type cpu_count: int
1167   @param cpu_count: Used cpu cores
1168   @type disk_count: int
1169   @param disk_count: Number of disks used
1170   @type nic_count: int
1171   @param nic_count: Number of nics used
1172   @type disk_sizes: list of ints
1173   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1174   @type spindle_use: int
1175   @param spindle_use: The number of spindles this instance uses
1176   @param _compute_fn: The compute function (unittest only)
1177   @return: A list of violations, or an empty list of no violations are found
1178
1179   """
1180   assert disk_count == len(disk_sizes)
1181
1182   test_settings = [
1183     (constants.ISPEC_MEM_SIZE, "", mem_size),
1184     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1185     (constants.ISPEC_DISK_COUNT, "", disk_count),
1186     (constants.ISPEC_NIC_COUNT, "", nic_count),
1187     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1188     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1189          for idx, d in enumerate(disk_sizes)]
1190
1191   return filter(None,
1192                 (_compute_fn(name, qualifier, ipolicy, value)
1193                  for (name, qualifier, value) in test_settings))
1194
1195
1196 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1197                                      _compute_fn=_ComputeIPolicySpecViolation):
1198   """Compute if instance meets the specs of ipolicy.
1199
1200   @type ipolicy: dict
1201   @param ipolicy: The ipolicy to verify against
1202   @type instance: L{objects.Instance}
1203   @param instance: The instance to verify
1204   @param _compute_fn: The function to verify ipolicy (unittest only)
1205   @see: L{_ComputeIPolicySpecViolation}
1206
1207   """
1208   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1209   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1210   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1211   disk_count = len(instance.disks)
1212   disk_sizes = [disk.size for disk in instance.disks]
1213   nic_count = len(instance.nics)
1214
1215   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1216                      disk_sizes, spindle_use)
1217
1218
1219 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1220     _compute_fn=_ComputeIPolicySpecViolation):
1221   """Compute if instance specs meets the specs of ipolicy.
1222
1223   @type ipolicy: dict
1224   @param ipolicy: The ipolicy to verify against
1225   @param instance_spec: dict
1226   @param instance_spec: The instance spec to verify
1227   @param _compute_fn: The function to verify ipolicy (unittest only)
1228   @see: L{_ComputeIPolicySpecViolation}
1229
1230   """
1231   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1232   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1233   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1234   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1235   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1236   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1237
1238   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1239                      disk_sizes, spindle_use)
1240
1241
1242 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1243                                  target_group,
1244                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1245   """Compute if instance meets the specs of the new target group.
1246
1247   @param ipolicy: The ipolicy to verify
1248   @param instance: The instance object to verify
1249   @param current_group: The current group of the instance
1250   @param target_group: The new group of the instance
1251   @param _compute_fn: The function to verify ipolicy (unittest only)
1252   @see: L{_ComputeIPolicySpecViolation}
1253
1254   """
1255   if current_group == target_group:
1256     return []
1257   else:
1258     return _compute_fn(ipolicy, instance)
1259
1260
1261 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1262                             _compute_fn=_ComputeIPolicyNodeViolation):
1263   """Checks that the target node is correct in terms of instance policy.
1264
1265   @param ipolicy: The ipolicy to verify
1266   @param instance: The instance object to verify
1267   @param node: The new node to relocate
1268   @param ignore: Ignore violations of the ipolicy
1269   @param _compute_fn: The function to verify ipolicy (unittest only)
1270   @see: L{_ComputeIPolicySpecViolation}
1271
1272   """
1273   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1274   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1275
1276   if res:
1277     msg = ("Instance does not meet target node group's (%s) instance"
1278            " policy: %s") % (node.group, utils.CommaJoin(res))
1279     if ignore:
1280       lu.LogWarning(msg)
1281     else:
1282       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1283
1284
1285 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1286   """Computes a set of any instances that would violate the new ipolicy.
1287
1288   @param old_ipolicy: The current (still in-place) ipolicy
1289   @param new_ipolicy: The new (to become) ipolicy
1290   @param instances: List of instances to verify
1291   @return: A list of instances which violates the new ipolicy but
1292       did not before
1293
1294   """
1295   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1296           _ComputeViolatingInstances(old_ipolicy, instances))
1297
1298
1299 def _ExpandItemName(fn, name, kind):
1300   """Expand an item name.
1301
1302   @param fn: the function to use for expansion
1303   @param name: requested item name
1304   @param kind: text description ('Node' or 'Instance')
1305   @return: the resolved (full) name
1306   @raise errors.OpPrereqError: if the item is not found
1307
1308   """
1309   full_name = fn(name)
1310   if full_name is None:
1311     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1312                                errors.ECODE_NOENT)
1313   return full_name
1314
1315
1316 def _ExpandNodeName(cfg, name):
1317   """Wrapper over L{_ExpandItemName} for nodes."""
1318   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1319
1320
1321 def _ExpandInstanceName(cfg, name):
1322   """Wrapper over L{_ExpandItemName} for instance."""
1323   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1324
1325
1326 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1327                           minmem, maxmem, vcpus, nics, disk_template, disks,
1328                           bep, hvp, hypervisor_name, tags):
1329   """Builds instance related env variables for hooks
1330
1331   This builds the hook environment from individual variables.
1332
1333   @type name: string
1334   @param name: the name of the instance
1335   @type primary_node: string
1336   @param primary_node: the name of the instance's primary node
1337   @type secondary_nodes: list
1338   @param secondary_nodes: list of secondary nodes as strings
1339   @type os_type: string
1340   @param os_type: the name of the instance's OS
1341   @type status: string
1342   @param status: the desired status of the instance
1343   @type minmem: string
1344   @param minmem: the minimum memory size of the instance
1345   @type maxmem: string
1346   @param maxmem: the maximum memory size of the instance
1347   @type vcpus: string
1348   @param vcpus: the count of VCPUs the instance has
1349   @type nics: list
1350   @param nics: list of tuples (ip, mac, mode, link) representing
1351       the NICs the instance has
1352   @type disk_template: string
1353   @param disk_template: the disk template of the instance
1354   @type disks: list
1355   @param disks: the list of (size, mode) pairs
1356   @type bep: dict
1357   @param bep: the backend parameters for the instance
1358   @type hvp: dict
1359   @param hvp: the hypervisor parameters for the instance
1360   @type hypervisor_name: string
1361   @param hypervisor_name: the hypervisor for the instance
1362   @type tags: list
1363   @param tags: list of instance tags as strings
1364   @rtype: dict
1365   @return: the hook environment for this instance
1366
1367   """
1368   env = {
1369     "OP_TARGET": name,
1370     "INSTANCE_NAME": name,
1371     "INSTANCE_PRIMARY": primary_node,
1372     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1373     "INSTANCE_OS_TYPE": os_type,
1374     "INSTANCE_STATUS": status,
1375     "INSTANCE_MINMEM": minmem,
1376     "INSTANCE_MAXMEM": maxmem,
1377     # TODO(2.7) remove deprecated "memory" value
1378     "INSTANCE_MEMORY": maxmem,
1379     "INSTANCE_VCPUS": vcpus,
1380     "INSTANCE_DISK_TEMPLATE": disk_template,
1381     "INSTANCE_HYPERVISOR": hypervisor_name,
1382   }
1383   if nics:
1384     nic_count = len(nics)
1385     for idx, (ip, mac, mode, link) in enumerate(nics):
1386       if ip is None:
1387         ip = ""
1388       env["INSTANCE_NIC%d_IP" % idx] = ip
1389       env["INSTANCE_NIC%d_MAC" % idx] = mac
1390       env["INSTANCE_NIC%d_MODE" % idx] = mode
1391       env["INSTANCE_NIC%d_LINK" % idx] = link
1392       if mode == constants.NIC_MODE_BRIDGED:
1393         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1394   else:
1395     nic_count = 0
1396
1397   env["INSTANCE_NIC_COUNT"] = nic_count
1398
1399   if disks:
1400     disk_count = len(disks)
1401     for idx, (size, mode) in enumerate(disks):
1402       env["INSTANCE_DISK%d_SIZE" % idx] = size
1403       env["INSTANCE_DISK%d_MODE" % idx] = mode
1404   else:
1405     disk_count = 0
1406
1407   env["INSTANCE_DISK_COUNT"] = disk_count
1408
1409   if not tags:
1410     tags = []
1411
1412   env["INSTANCE_TAGS"] = " ".join(tags)
1413
1414   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1415     for key, value in source.items():
1416       env["INSTANCE_%s_%s" % (kind, key)] = value
1417
1418   return env
1419
1420
1421 def _NICListToTuple(lu, nics):
1422   """Build a list of nic information tuples.
1423
1424   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1425   value in LUInstanceQueryData.
1426
1427   @type lu:  L{LogicalUnit}
1428   @param lu: the logical unit on whose behalf we execute
1429   @type nics: list of L{objects.NIC}
1430   @param nics: list of nics to convert to hooks tuples
1431
1432   """
1433   hooks_nics = []
1434   cluster = lu.cfg.GetClusterInfo()
1435   for nic in nics:
1436     ip = nic.ip
1437     mac = nic.mac
1438     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1439     mode = filled_params[constants.NIC_MODE]
1440     link = filled_params[constants.NIC_LINK]
1441     hooks_nics.append((ip, mac, mode, link))
1442   return hooks_nics
1443
1444
1445 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1446   """Builds instance related env variables for hooks from an object.
1447
1448   @type lu: L{LogicalUnit}
1449   @param lu: the logical unit on whose behalf we execute
1450   @type instance: L{objects.Instance}
1451   @param instance: the instance for which we should build the
1452       environment
1453   @type override: dict
1454   @param override: dictionary with key/values that will override
1455       our values
1456   @rtype: dict
1457   @return: the hook environment dictionary
1458
1459   """
1460   cluster = lu.cfg.GetClusterInfo()
1461   bep = cluster.FillBE(instance)
1462   hvp = cluster.FillHV(instance)
1463   args = {
1464     "name": instance.name,
1465     "primary_node": instance.primary_node,
1466     "secondary_nodes": instance.secondary_nodes,
1467     "os_type": instance.os,
1468     "status": instance.admin_state,
1469     "maxmem": bep[constants.BE_MAXMEM],
1470     "minmem": bep[constants.BE_MINMEM],
1471     "vcpus": bep[constants.BE_VCPUS],
1472     "nics": _NICListToTuple(lu, instance.nics),
1473     "disk_template": instance.disk_template,
1474     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1475     "bep": bep,
1476     "hvp": hvp,
1477     "hypervisor_name": instance.hypervisor,
1478     "tags": instance.tags,
1479   }
1480   if override:
1481     args.update(override)
1482   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1483
1484
1485 def _AdjustCandidatePool(lu, exceptions):
1486   """Adjust the candidate pool after node operations.
1487
1488   """
1489   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1490   if mod_list:
1491     lu.LogInfo("Promoted nodes to master candidate role: %s",
1492                utils.CommaJoin(node.name for node in mod_list))
1493     for name in mod_list:
1494       lu.context.ReaddNode(name)
1495   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1496   if mc_now > mc_max:
1497     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1498                (mc_now, mc_max))
1499
1500
1501 def _DecideSelfPromotion(lu, exceptions=None):
1502   """Decide whether I should promote myself as a master candidate.
1503
1504   """
1505   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1506   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1507   # the new node will increase mc_max with one, so:
1508   mc_should = min(mc_should + 1, cp_size)
1509   return mc_now < mc_should
1510
1511
1512 def _CalculateGroupIPolicy(cluster, group):
1513   """Calculate instance policy for group.
1514
1515   """
1516   return cluster.SimpleFillIPolicy(group.ipolicy)
1517
1518
1519 def _ComputeViolatingInstances(ipolicy, instances):
1520   """Computes a set of instances who violates given ipolicy.
1521
1522   @param ipolicy: The ipolicy to verify
1523   @type instances: object.Instance
1524   @param instances: List of instances to verify
1525   @return: A frozenset of instance names violating the ipolicy
1526
1527   """
1528   return frozenset([inst.name for inst in instances
1529                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1530
1531
1532 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1533   """Check that the brigdes needed by a list of nics exist.
1534
1535   """
1536   cluster = lu.cfg.GetClusterInfo()
1537   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1538   brlist = [params[constants.NIC_LINK] for params in paramslist
1539             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1540   if brlist:
1541     result = lu.rpc.call_bridges_exist(target_node, brlist)
1542     result.Raise("Error checking bridges on destination node '%s'" %
1543                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1544
1545
1546 def _CheckInstanceBridgesExist(lu, instance, node=None):
1547   """Check that the brigdes needed by an instance exist.
1548
1549   """
1550   if node is None:
1551     node = instance.primary_node
1552   _CheckNicsBridgesExist(lu, instance.nics, node)
1553
1554
1555 def _CheckOSVariant(os_obj, name):
1556   """Check whether an OS name conforms to the os variants specification.
1557
1558   @type os_obj: L{objects.OS}
1559   @param os_obj: OS object to check
1560   @type name: string
1561   @param name: OS name passed by the user, to check for validity
1562
1563   """
1564   variant = objects.OS.GetVariant(name)
1565   if not os_obj.supported_variants:
1566     if variant:
1567       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1568                                  " passed)" % (os_obj.name, variant),
1569                                  errors.ECODE_INVAL)
1570     return
1571   if not variant:
1572     raise errors.OpPrereqError("OS name must include a variant",
1573                                errors.ECODE_INVAL)
1574
1575   if variant not in os_obj.supported_variants:
1576     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1577
1578
1579 def _GetNodeInstancesInner(cfg, fn):
1580   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1581
1582
1583 def _GetNodeInstances(cfg, node_name):
1584   """Returns a list of all primary and secondary instances on a node.
1585
1586   """
1587
1588   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1589
1590
1591 def _GetNodePrimaryInstances(cfg, node_name):
1592   """Returns primary instances on a node.
1593
1594   """
1595   return _GetNodeInstancesInner(cfg,
1596                                 lambda inst: node_name == inst.primary_node)
1597
1598
1599 def _GetNodeSecondaryInstances(cfg, node_name):
1600   """Returns secondary instances on a node.
1601
1602   """
1603   return _GetNodeInstancesInner(cfg,
1604                                 lambda inst: node_name in inst.secondary_nodes)
1605
1606
1607 def _GetStorageTypeArgs(cfg, storage_type):
1608   """Returns the arguments for a storage type.
1609
1610   """
1611   # Special case for file storage
1612   if storage_type == constants.ST_FILE:
1613     # storage.FileStorage wants a list of storage directories
1614     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1615
1616   return []
1617
1618
1619 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1620   faulty = []
1621
1622   for dev in instance.disks:
1623     cfg.SetDiskID(dev, node_name)
1624
1625   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1626                                                                 instance))
1627   result.Raise("Failed to get disk status from node %s" % node_name,
1628                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1629
1630   for idx, bdev_status in enumerate(result.payload):
1631     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1632       faulty.append(idx)
1633
1634   return faulty
1635
1636
1637 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1638   """Check the sanity of iallocator and node arguments and use the
1639   cluster-wide iallocator if appropriate.
1640
1641   Check that at most one of (iallocator, node) is specified. If none is
1642   specified, then the LU's opcode's iallocator slot is filled with the
1643   cluster-wide default iallocator.
1644
1645   @type iallocator_slot: string
1646   @param iallocator_slot: the name of the opcode iallocator slot
1647   @type node_slot: string
1648   @param node_slot: the name of the opcode target node slot
1649
1650   """
1651   node = getattr(lu.op, node_slot, None)
1652   iallocator = getattr(lu.op, iallocator_slot, None)
1653
1654   if node is not None and iallocator is not None:
1655     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1656                                errors.ECODE_INVAL)
1657   elif node is None and iallocator is None:
1658     default_iallocator = lu.cfg.GetDefaultIAllocator()
1659     if default_iallocator:
1660       setattr(lu.op, iallocator_slot, default_iallocator)
1661     else:
1662       raise errors.OpPrereqError("No iallocator or node given and no"
1663                                  " cluster-wide default iallocator found;"
1664                                  " please specify either an iallocator or a"
1665                                  " node, or set a cluster-wide default"
1666                                  " iallocator")
1667
1668
1669 def _GetDefaultIAllocator(cfg, iallocator):
1670   """Decides on which iallocator to use.
1671
1672   @type cfg: L{config.ConfigWriter}
1673   @param cfg: Cluster configuration object
1674   @type iallocator: string or None
1675   @param iallocator: Iallocator specified in opcode
1676   @rtype: string
1677   @return: Iallocator name
1678
1679   """
1680   if not iallocator:
1681     # Use default iallocator
1682     iallocator = cfg.GetDefaultIAllocator()
1683
1684   if not iallocator:
1685     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1686                                " opcode nor as a cluster-wide default",
1687                                errors.ECODE_INVAL)
1688
1689   return iallocator
1690
1691
1692 class LUClusterPostInit(LogicalUnit):
1693   """Logical unit for running hooks after cluster initialization.
1694
1695   """
1696   HPATH = "cluster-init"
1697   HTYPE = constants.HTYPE_CLUSTER
1698
1699   def BuildHooksEnv(self):
1700     """Build hooks env.
1701
1702     """
1703     return {
1704       "OP_TARGET": self.cfg.GetClusterName(),
1705       }
1706
1707   def BuildHooksNodes(self):
1708     """Build hooks nodes.
1709
1710     """
1711     return ([], [self.cfg.GetMasterNode()])
1712
1713   def Exec(self, feedback_fn):
1714     """Nothing to do.
1715
1716     """
1717     return True
1718
1719
1720 class LUClusterDestroy(LogicalUnit):
1721   """Logical unit for destroying the cluster.
1722
1723   """
1724   HPATH = "cluster-destroy"
1725   HTYPE = constants.HTYPE_CLUSTER
1726
1727   def BuildHooksEnv(self):
1728     """Build hooks env.
1729
1730     """
1731     return {
1732       "OP_TARGET": self.cfg.GetClusterName(),
1733       }
1734
1735   def BuildHooksNodes(self):
1736     """Build hooks nodes.
1737
1738     """
1739     return ([], [])
1740
1741   def CheckPrereq(self):
1742     """Check prerequisites.
1743
1744     This checks whether the cluster is empty.
1745
1746     Any errors are signaled by raising errors.OpPrereqError.
1747
1748     """
1749     master = self.cfg.GetMasterNode()
1750
1751     nodelist = self.cfg.GetNodeList()
1752     if len(nodelist) != 1 or nodelist[0] != master:
1753       raise errors.OpPrereqError("There are still %d node(s) in"
1754                                  " this cluster." % (len(nodelist) - 1),
1755                                  errors.ECODE_INVAL)
1756     instancelist = self.cfg.GetInstanceList()
1757     if instancelist:
1758       raise errors.OpPrereqError("There are still %d instance(s) in"
1759                                  " this cluster." % len(instancelist),
1760                                  errors.ECODE_INVAL)
1761
1762   def Exec(self, feedback_fn):
1763     """Destroys the cluster.
1764
1765     """
1766     master_params = self.cfg.GetMasterNetworkParameters()
1767
1768     # Run post hooks on master node before it's removed
1769     _RunPostHook(self, master_params.name)
1770
1771     ems = self.cfg.GetUseExternalMipScript()
1772     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1773                                                      master_params, ems)
1774     if result.fail_msg:
1775       self.LogWarning("Error disabling the master IP address: %s",
1776                       result.fail_msg)
1777
1778     return master_params.name
1779
1780
1781 def _VerifyCertificate(filename):
1782   """Verifies a certificate for L{LUClusterVerifyConfig}.
1783
1784   @type filename: string
1785   @param filename: Path to PEM file
1786
1787   """
1788   try:
1789     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1790                                            utils.ReadFile(filename))
1791   except Exception, err: # pylint: disable=W0703
1792     return (LUClusterVerifyConfig.ETYPE_ERROR,
1793             "Failed to load X509 certificate %s: %s" % (filename, err))
1794
1795   (errcode, msg) = \
1796     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1797                                 constants.SSL_CERT_EXPIRATION_ERROR)
1798
1799   if msg:
1800     fnamemsg = "While verifying %s: %s" % (filename, msg)
1801   else:
1802     fnamemsg = None
1803
1804   if errcode is None:
1805     return (None, fnamemsg)
1806   elif errcode == utils.CERT_WARNING:
1807     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1808   elif errcode == utils.CERT_ERROR:
1809     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1810
1811   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1812
1813
1814 def _GetAllHypervisorParameters(cluster, instances):
1815   """Compute the set of all hypervisor parameters.
1816
1817   @type cluster: L{objects.Cluster}
1818   @param cluster: the cluster object
1819   @param instances: list of L{objects.Instance}
1820   @param instances: additional instances from which to obtain parameters
1821   @rtype: list of (origin, hypervisor, parameters)
1822   @return: a list with all parameters found, indicating the hypervisor they
1823        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1824
1825   """
1826   hvp_data = []
1827
1828   for hv_name in cluster.enabled_hypervisors:
1829     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1830
1831   for os_name, os_hvp in cluster.os_hvp.items():
1832     for hv_name, hv_params in os_hvp.items():
1833       if hv_params:
1834         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1835         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1836
1837   # TODO: collapse identical parameter values in a single one
1838   for instance in instances:
1839     if instance.hvparams:
1840       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1841                        cluster.FillHV(instance)))
1842
1843   return hvp_data
1844
1845
1846 class _VerifyErrors(object):
1847   """Mix-in for cluster/group verify LUs.
1848
1849   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1850   self.op and self._feedback_fn to be available.)
1851
1852   """
1853
1854   ETYPE_FIELD = "code"
1855   ETYPE_ERROR = "ERROR"
1856   ETYPE_WARNING = "WARNING"
1857
1858   def _Error(self, ecode, item, msg, *args, **kwargs):
1859     """Format an error message.
1860
1861     Based on the opcode's error_codes parameter, either format a
1862     parseable error code, or a simpler error string.
1863
1864     This must be called only from Exec and functions called from Exec.
1865
1866     """
1867     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1868     itype, etxt, _ = ecode
1869     # first complete the msg
1870     if args:
1871       msg = msg % args
1872     # then format the whole message
1873     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1874       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1875     else:
1876       if item:
1877         item = " " + item
1878       else:
1879         item = ""
1880       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1881     # and finally report it via the feedback_fn
1882     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1883
1884   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1885     """Log an error message if the passed condition is True.
1886
1887     """
1888     cond = (bool(cond)
1889             or self.op.debug_simulate_errors) # pylint: disable=E1101
1890
1891     # If the error code is in the list of ignored errors, demote the error to a
1892     # warning
1893     (_, etxt, _) = ecode
1894     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1895       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1896
1897     if cond:
1898       self._Error(ecode, *args, **kwargs)
1899
1900     # do not mark the operation as failed for WARN cases only
1901     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1902       self.bad = self.bad or cond
1903
1904
1905 class LUClusterVerify(NoHooksLU):
1906   """Submits all jobs necessary to verify the cluster.
1907
1908   """
1909   REQ_BGL = False
1910
1911   def ExpandNames(self):
1912     self.needed_locks = {}
1913
1914   def Exec(self, feedback_fn):
1915     jobs = []
1916
1917     if self.op.group_name:
1918       groups = [self.op.group_name]
1919       depends_fn = lambda: None
1920     else:
1921       groups = self.cfg.GetNodeGroupList()
1922
1923       # Verify global configuration
1924       jobs.append([
1925         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1926         ])
1927
1928       # Always depend on global verification
1929       depends_fn = lambda: [(-len(jobs), [])]
1930
1931     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1932                                             ignore_errors=self.op.ignore_errors,
1933                                             depends=depends_fn())]
1934                 for group in groups)
1935
1936     # Fix up all parameters
1937     for op in itertools.chain(*jobs): # pylint: disable=W0142
1938       op.debug_simulate_errors = self.op.debug_simulate_errors
1939       op.verbose = self.op.verbose
1940       op.error_codes = self.op.error_codes
1941       try:
1942         op.skip_checks = self.op.skip_checks
1943       except AttributeError:
1944         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1945
1946     return ResultWithJobs(jobs)
1947
1948
1949 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1950   """Verifies the cluster config.
1951
1952   """
1953   REQ_BGL = False
1954
1955   def _VerifyHVP(self, hvp_data):
1956     """Verifies locally the syntax of the hypervisor parameters.
1957
1958     """
1959     for item, hv_name, hv_params in hvp_data:
1960       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1961              (item, hv_name))
1962       try:
1963         hv_class = hypervisor.GetHypervisor(hv_name)
1964         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1965         hv_class.CheckParameterSyntax(hv_params)
1966       except errors.GenericError, err:
1967         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1968
1969   def ExpandNames(self):
1970     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1971     self.share_locks = _ShareAll()
1972
1973   def CheckPrereq(self):
1974     """Check prerequisites.
1975
1976     """
1977     # Retrieve all information
1978     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1979     self.all_node_info = self.cfg.GetAllNodesInfo()
1980     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1981
1982   def Exec(self, feedback_fn):
1983     """Verify integrity of cluster, performing various test on nodes.
1984
1985     """
1986     self.bad = False
1987     self._feedback_fn = feedback_fn
1988
1989     feedback_fn("* Verifying cluster config")
1990
1991     for msg in self.cfg.VerifyConfig():
1992       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1993
1994     feedback_fn("* Verifying cluster certificate files")
1995
1996     for cert_filename in constants.ALL_CERT_FILES:
1997       (errcode, msg) = _VerifyCertificate(cert_filename)
1998       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1999
2000     feedback_fn("* Verifying hypervisor parameters")
2001
2002     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2003                                                 self.all_inst_info.values()))
2004
2005     feedback_fn("* Verifying all nodes belong to an existing group")
2006
2007     # We do this verification here because, should this bogus circumstance
2008     # occur, it would never be caught by VerifyGroup, which only acts on
2009     # nodes/instances reachable from existing node groups.
2010
2011     dangling_nodes = set(node.name for node in self.all_node_info.values()
2012                          if node.group not in self.all_group_info)
2013
2014     dangling_instances = {}
2015     no_node_instances = []
2016
2017     for inst in self.all_inst_info.values():
2018       if inst.primary_node in dangling_nodes:
2019         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2020       elif inst.primary_node not in self.all_node_info:
2021         no_node_instances.append(inst.name)
2022
2023     pretty_dangling = [
2024         "%s (%s)" %
2025         (node.name,
2026          utils.CommaJoin(dangling_instances.get(node.name,
2027                                                 ["no instances"])))
2028         for node in dangling_nodes]
2029
2030     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2031                   None,
2032                   "the following nodes (and their instances) belong to a non"
2033                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2034
2035     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2036                   None,
2037                   "the following instances have a non-existing primary-node:"
2038                   " %s", utils.CommaJoin(no_node_instances))
2039
2040     return not self.bad
2041
2042
2043 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2044   """Verifies the status of a node group.
2045
2046   """
2047   HPATH = "cluster-verify"
2048   HTYPE = constants.HTYPE_CLUSTER
2049   REQ_BGL = False
2050
2051   _HOOKS_INDENT_RE = re.compile("^", re.M)
2052
2053   class NodeImage(object):
2054     """A class representing the logical and physical status of a node.
2055
2056     @type name: string
2057     @ivar name: the node name to which this object refers
2058     @ivar volumes: a structure as returned from
2059         L{ganeti.backend.GetVolumeList} (runtime)
2060     @ivar instances: a list of running instances (runtime)
2061     @ivar pinst: list of configured primary instances (config)
2062     @ivar sinst: list of configured secondary instances (config)
2063     @ivar sbp: dictionary of {primary-node: list of instances} for all
2064         instances for which this node is secondary (config)
2065     @ivar mfree: free memory, as reported by hypervisor (runtime)
2066     @ivar dfree: free disk, as reported by the node (runtime)
2067     @ivar offline: the offline status (config)
2068     @type rpc_fail: boolean
2069     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2070         not whether the individual keys were correct) (runtime)
2071     @type lvm_fail: boolean
2072     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2073     @type hyp_fail: boolean
2074     @ivar hyp_fail: whether the RPC call didn't return the instance list
2075     @type ghost: boolean
2076     @ivar ghost: whether this is a known node or not (config)
2077     @type os_fail: boolean
2078     @ivar os_fail: whether the RPC call didn't return valid OS data
2079     @type oslist: list
2080     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2081     @type vm_capable: boolean
2082     @ivar vm_capable: whether the node can host instances
2083
2084     """
2085     def __init__(self, offline=False, name=None, vm_capable=True):
2086       self.name = name
2087       self.volumes = {}
2088       self.instances = []
2089       self.pinst = []
2090       self.sinst = []
2091       self.sbp = {}
2092       self.mfree = 0
2093       self.dfree = 0
2094       self.offline = offline
2095       self.vm_capable = vm_capable
2096       self.rpc_fail = False
2097       self.lvm_fail = False
2098       self.hyp_fail = False
2099       self.ghost = False
2100       self.os_fail = False
2101       self.oslist = {}
2102
2103   def ExpandNames(self):
2104     # This raises errors.OpPrereqError on its own:
2105     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2106
2107     # Get instances in node group; this is unsafe and needs verification later
2108     inst_names = \
2109       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2110
2111     self.needed_locks = {
2112       locking.LEVEL_INSTANCE: inst_names,
2113       locking.LEVEL_NODEGROUP: [self.group_uuid],
2114       locking.LEVEL_NODE: [],
2115       }
2116
2117     self.share_locks = _ShareAll()
2118
2119   def DeclareLocks(self, level):
2120     if level == locking.LEVEL_NODE:
2121       # Get members of node group; this is unsafe and needs verification later
2122       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2123
2124       all_inst_info = self.cfg.GetAllInstancesInfo()
2125
2126       # In Exec(), we warn about mirrored instances that have primary and
2127       # secondary living in separate node groups. To fully verify that
2128       # volumes for these instances are healthy, we will need to do an
2129       # extra call to their secondaries. We ensure here those nodes will
2130       # be locked.
2131       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2132         # Important: access only the instances whose lock is owned
2133         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2134           nodes.update(all_inst_info[inst].secondary_nodes)
2135
2136       self.needed_locks[locking.LEVEL_NODE] = nodes
2137
2138   def CheckPrereq(self):
2139     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2140     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2141
2142     group_nodes = set(self.group_info.members)
2143     group_instances = \
2144       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2145
2146     unlocked_nodes = \
2147         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2148
2149     unlocked_instances = \
2150         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2151
2152     if unlocked_nodes:
2153       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2154                                  utils.CommaJoin(unlocked_nodes),
2155                                  errors.ECODE_STATE)
2156
2157     if unlocked_instances:
2158       raise errors.OpPrereqError("Missing lock for instances: %s" %
2159                                  utils.CommaJoin(unlocked_instances),
2160                                  errors.ECODE_STATE)
2161
2162     self.all_node_info = self.cfg.GetAllNodesInfo()
2163     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2164
2165     self.my_node_names = utils.NiceSort(group_nodes)
2166     self.my_inst_names = utils.NiceSort(group_instances)
2167
2168     self.my_node_info = dict((name, self.all_node_info[name])
2169                              for name in self.my_node_names)
2170
2171     self.my_inst_info = dict((name, self.all_inst_info[name])
2172                              for name in self.my_inst_names)
2173
2174     # We detect here the nodes that will need the extra RPC calls for verifying
2175     # split LV volumes; they should be locked.
2176     extra_lv_nodes = set()
2177
2178     for inst in self.my_inst_info.values():
2179       if inst.disk_template in constants.DTS_INT_MIRROR:
2180         for nname in inst.all_nodes:
2181           if self.all_node_info[nname].group != self.group_uuid:
2182             extra_lv_nodes.add(nname)
2183
2184     unlocked_lv_nodes = \
2185         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2186
2187     if unlocked_lv_nodes:
2188       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2189                                  utils.CommaJoin(unlocked_lv_nodes),
2190                                  errors.ECODE_STATE)
2191     self.extra_lv_nodes = list(extra_lv_nodes)
2192
2193   def _VerifyNode(self, ninfo, nresult):
2194     """Perform some basic validation on data returned from a node.
2195
2196       - check the result data structure is well formed and has all the
2197         mandatory fields
2198       - check ganeti version
2199
2200     @type ninfo: L{objects.Node}
2201     @param ninfo: the node to check
2202     @param nresult: the results from the node
2203     @rtype: boolean
2204     @return: whether overall this call was successful (and we can expect
2205          reasonable values in the respose)
2206
2207     """
2208     node = ninfo.name
2209     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2210
2211     # main result, nresult should be a non-empty dict
2212     test = not nresult or not isinstance(nresult, dict)
2213     _ErrorIf(test, constants.CV_ENODERPC, node,
2214                   "unable to verify node: no data returned")
2215     if test:
2216       return False
2217
2218     # compares ganeti version
2219     local_version = constants.PROTOCOL_VERSION
2220     remote_version = nresult.get("version", None)
2221     test = not (remote_version and
2222                 isinstance(remote_version, (list, tuple)) and
2223                 len(remote_version) == 2)
2224     _ErrorIf(test, constants.CV_ENODERPC, node,
2225              "connection to node returned invalid data")
2226     if test:
2227       return False
2228
2229     test = local_version != remote_version[0]
2230     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2231              "incompatible protocol versions: master %s,"
2232              " node %s", local_version, remote_version[0])
2233     if test:
2234       return False
2235
2236     # node seems compatible, we can actually try to look into its results
2237
2238     # full package version
2239     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2240                   constants.CV_ENODEVERSION, node,
2241                   "software version mismatch: master %s, node %s",
2242                   constants.RELEASE_VERSION, remote_version[1],
2243                   code=self.ETYPE_WARNING)
2244
2245     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2246     if ninfo.vm_capable and isinstance(hyp_result, dict):
2247       for hv_name, hv_result in hyp_result.iteritems():
2248         test = hv_result is not None
2249         _ErrorIf(test, constants.CV_ENODEHV, node,
2250                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2251
2252     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2253     if ninfo.vm_capable and isinstance(hvp_result, list):
2254       for item, hv_name, hv_result in hvp_result:
2255         _ErrorIf(True, constants.CV_ENODEHV, node,
2256                  "hypervisor %s parameter verify failure (source %s): %s",
2257                  hv_name, item, hv_result)
2258
2259     test = nresult.get(constants.NV_NODESETUP,
2260                        ["Missing NODESETUP results"])
2261     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2262              "; ".join(test))
2263
2264     return True
2265
2266   def _VerifyNodeTime(self, ninfo, nresult,
2267                       nvinfo_starttime, nvinfo_endtime):
2268     """Check the node time.
2269
2270     @type ninfo: L{objects.Node}
2271     @param ninfo: the node to check
2272     @param nresult: the remote results for the node
2273     @param nvinfo_starttime: the start time of the RPC call
2274     @param nvinfo_endtime: the end time of the RPC call
2275
2276     """
2277     node = ninfo.name
2278     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2279
2280     ntime = nresult.get(constants.NV_TIME, None)
2281     try:
2282       ntime_merged = utils.MergeTime(ntime)
2283     except (ValueError, TypeError):
2284       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2285       return
2286
2287     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2288       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2289     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2290       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2291     else:
2292       ntime_diff = None
2293
2294     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2295              "Node time diverges by at least %s from master node time",
2296              ntime_diff)
2297
2298   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2299     """Check the node LVM results.
2300
2301     @type ninfo: L{objects.Node}
2302     @param ninfo: the node to check
2303     @param nresult: the remote results for the node
2304     @param vg_name: the configured VG name
2305
2306     """
2307     if vg_name is None:
2308       return
2309
2310     node = ninfo.name
2311     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2312
2313     # checks vg existence and size > 20G
2314     vglist = nresult.get(constants.NV_VGLIST, None)
2315     test = not vglist
2316     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2317     if not test:
2318       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2319                                             constants.MIN_VG_SIZE)
2320       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2321
2322     # check pv names
2323     pvlist = nresult.get(constants.NV_PVLIST, None)
2324     test = pvlist is None
2325     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2326     if not test:
2327       # check that ':' is not present in PV names, since it's a
2328       # special character for lvcreate (denotes the range of PEs to
2329       # use on the PV)
2330       for _, pvname, owner_vg in pvlist:
2331         test = ":" in pvname
2332         _ErrorIf(test, constants.CV_ENODELVM, node,
2333                  "Invalid character ':' in PV '%s' of VG '%s'",
2334                  pvname, owner_vg)
2335
2336   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2337     """Check the node bridges.
2338
2339     @type ninfo: L{objects.Node}
2340     @param ninfo: the node to check
2341     @param nresult: the remote results for the node
2342     @param bridges: the expected list of bridges
2343
2344     """
2345     if not bridges:
2346       return
2347
2348     node = ninfo.name
2349     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2350
2351     missing = nresult.get(constants.NV_BRIDGES, None)
2352     test = not isinstance(missing, list)
2353     _ErrorIf(test, constants.CV_ENODENET, node,
2354              "did not return valid bridge information")
2355     if not test:
2356       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2357                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2358
2359   def _VerifyNodeUserScripts(self, ninfo, nresult):
2360     """Check the results of user scripts presence and executability on the node
2361
2362     @type ninfo: L{objects.Node}
2363     @param ninfo: the node to check
2364     @param nresult: the remote results for the node
2365
2366     """
2367     node = ninfo.name
2368
2369     test = not constants.NV_USERSCRIPTS in nresult
2370     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2371                   "did not return user scripts information")
2372
2373     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2374     if not test:
2375       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2376                     "user scripts not present or not executable: %s" %
2377                     utils.CommaJoin(sorted(broken_scripts)))
2378
2379   def _VerifyNodeNetwork(self, ninfo, nresult):
2380     """Check the node network connectivity results.
2381
2382     @type ninfo: L{objects.Node}
2383     @param ninfo: the node to check
2384     @param nresult: the remote results for the node
2385
2386     """
2387     node = ninfo.name
2388     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2389
2390     test = constants.NV_NODELIST not in nresult
2391     _ErrorIf(test, constants.CV_ENODESSH, node,
2392              "node hasn't returned node ssh connectivity data")
2393     if not test:
2394       if nresult[constants.NV_NODELIST]:
2395         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2396           _ErrorIf(True, constants.CV_ENODESSH, node,
2397                    "ssh communication with node '%s': %s", a_node, a_msg)
2398
2399     test = constants.NV_NODENETTEST not in nresult
2400     _ErrorIf(test, constants.CV_ENODENET, node,
2401              "node hasn't returned node tcp connectivity data")
2402     if not test:
2403       if nresult[constants.NV_NODENETTEST]:
2404         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2405         for anode in nlist:
2406           _ErrorIf(True, constants.CV_ENODENET, node,
2407                    "tcp communication with node '%s': %s",
2408                    anode, nresult[constants.NV_NODENETTEST][anode])
2409
2410     test = constants.NV_MASTERIP not in nresult
2411     _ErrorIf(test, constants.CV_ENODENET, node,
2412              "node hasn't returned node master IP reachability data")
2413     if not test:
2414       if not nresult[constants.NV_MASTERIP]:
2415         if node == self.master_node:
2416           msg = "the master node cannot reach the master IP (not configured?)"
2417         else:
2418           msg = "cannot reach the master IP"
2419         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2420
2421   def _VerifyInstance(self, instance, instanceconfig, node_image,
2422                       diskstatus):
2423     """Verify an instance.
2424
2425     This function checks to see if the required block devices are
2426     available on the instance's node.
2427
2428     """
2429     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2430     node_current = instanceconfig.primary_node
2431
2432     node_vol_should = {}
2433     instanceconfig.MapLVsByNode(node_vol_should)
2434
2435     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2436     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2437     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2438
2439     for node in node_vol_should:
2440       n_img = node_image[node]
2441       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2442         # ignore missing volumes on offline or broken nodes
2443         continue
2444       for volume in node_vol_should[node]:
2445         test = volume not in n_img.volumes
2446         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2447                  "volume %s missing on node %s", volume, node)
2448
2449     if instanceconfig.admin_state == constants.ADMINST_UP:
2450       pri_img = node_image[node_current]
2451       test = instance not in pri_img.instances and not pri_img.offline
2452       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2453                "instance not running on its primary node %s",
2454                node_current)
2455
2456     diskdata = [(nname, success, status, idx)
2457                 for (nname, disks) in diskstatus.items()
2458                 for idx, (success, status) in enumerate(disks)]
2459
2460     for nname, success, bdev_status, idx in diskdata:
2461       # the 'ghost node' construction in Exec() ensures that we have a
2462       # node here
2463       snode = node_image[nname]
2464       bad_snode = snode.ghost or snode.offline
2465       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2466                not success and not bad_snode,
2467                constants.CV_EINSTANCEFAULTYDISK, instance,
2468                "couldn't retrieve status for disk/%s on %s: %s",
2469                idx, nname, bdev_status)
2470       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2471                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2472                constants.CV_EINSTANCEFAULTYDISK, instance,
2473                "disk/%s on %s is faulty", idx, nname)
2474
2475   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2476     """Verify if there are any unknown volumes in the cluster.
2477
2478     The .os, .swap and backup volumes are ignored. All other volumes are
2479     reported as unknown.
2480
2481     @type reserved: L{ganeti.utils.FieldSet}
2482     @param reserved: a FieldSet of reserved volume names
2483
2484     """
2485     for node, n_img in node_image.items():
2486       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2487           self.all_node_info[node].group != self.group_uuid):
2488         # skip non-healthy nodes
2489         continue
2490       for volume in n_img.volumes:
2491         test = ((node not in node_vol_should or
2492                 volume not in node_vol_should[node]) and
2493                 not reserved.Matches(volume))
2494         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2495                       "volume %s is unknown", volume)
2496
2497   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2498     """Verify N+1 Memory Resilience.
2499
2500     Check that if one single node dies we can still start all the
2501     instances it was primary for.
2502
2503     """
2504     cluster_info = self.cfg.GetClusterInfo()
2505     for node, n_img in node_image.items():
2506       # This code checks that every node which is now listed as
2507       # secondary has enough memory to host all instances it is
2508       # supposed to should a single other node in the cluster fail.
2509       # FIXME: not ready for failover to an arbitrary node
2510       # FIXME: does not support file-backed instances
2511       # WARNING: we currently take into account down instances as well
2512       # as up ones, considering that even if they're down someone
2513       # might want to start them even in the event of a node failure.
2514       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2515         # we're skipping nodes marked offline and nodes in other groups from
2516         # the N+1 warning, since most likely we don't have good memory
2517         # infromation from them; we already list instances living on such
2518         # nodes, and that's enough warning
2519         continue
2520       #TODO(dynmem): also consider ballooning out other instances
2521       for prinode, instances in n_img.sbp.items():
2522         needed_mem = 0
2523         for instance in instances:
2524           bep = cluster_info.FillBE(instance_cfg[instance])
2525           if bep[constants.BE_AUTO_BALANCE]:
2526             needed_mem += bep[constants.BE_MINMEM]
2527         test = n_img.mfree < needed_mem
2528         self._ErrorIf(test, constants.CV_ENODEN1, node,
2529                       "not enough memory to accomodate instance failovers"
2530                       " should node %s fail (%dMiB needed, %dMiB available)",
2531                       prinode, needed_mem, n_img.mfree)
2532
2533   @classmethod
2534   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2535                    (files_all, files_opt, files_mc, files_vm)):
2536     """Verifies file checksums collected from all nodes.
2537
2538     @param errorif: Callback for reporting errors
2539     @param nodeinfo: List of L{objects.Node} objects
2540     @param master_node: Name of master node
2541     @param all_nvinfo: RPC results
2542
2543     """
2544     # Define functions determining which nodes to consider for a file
2545     files2nodefn = [
2546       (files_all, None),
2547       (files_mc, lambda node: (node.master_candidate or
2548                                node.name == master_node)),
2549       (files_vm, lambda node: node.vm_capable),
2550       ]
2551
2552     # Build mapping from filename to list of nodes which should have the file
2553     nodefiles = {}
2554     for (files, fn) in files2nodefn:
2555       if fn is None:
2556         filenodes = nodeinfo
2557       else:
2558         filenodes = filter(fn, nodeinfo)
2559       nodefiles.update((filename,
2560                         frozenset(map(operator.attrgetter("name"), filenodes)))
2561                        for filename in files)
2562
2563     assert set(nodefiles) == (files_all | files_mc | files_vm)
2564
2565     fileinfo = dict((filename, {}) for filename in nodefiles)
2566     ignore_nodes = set()
2567
2568     for node in nodeinfo:
2569       if node.offline:
2570         ignore_nodes.add(node.name)
2571         continue
2572
2573       nresult = all_nvinfo[node.name]
2574
2575       if nresult.fail_msg or not nresult.payload:
2576         node_files = None
2577       else:
2578         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2579
2580       test = not (node_files and isinstance(node_files, dict))
2581       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2582               "Node did not return file checksum data")
2583       if test:
2584         ignore_nodes.add(node.name)
2585         continue
2586
2587       # Build per-checksum mapping from filename to nodes having it
2588       for (filename, checksum) in node_files.items():
2589         assert filename in nodefiles
2590         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2591
2592     for (filename, checksums) in fileinfo.items():
2593       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2594
2595       # Nodes having the file
2596       with_file = frozenset(node_name
2597                             for nodes in fileinfo[filename].values()
2598                             for node_name in nodes) - ignore_nodes
2599
2600       expected_nodes = nodefiles[filename] - ignore_nodes
2601
2602       # Nodes missing file
2603       missing_file = expected_nodes - with_file
2604
2605       if filename in files_opt:
2606         # All or no nodes
2607         errorif(missing_file and missing_file != expected_nodes,
2608                 constants.CV_ECLUSTERFILECHECK, None,
2609                 "File %s is optional, but it must exist on all or no"
2610                 " nodes (not found on %s)",
2611                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2612       else:
2613         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2614                 "File %s is missing from node(s) %s", filename,
2615                 utils.CommaJoin(utils.NiceSort(missing_file)))
2616
2617         # Warn if a node has a file it shouldn't
2618         unexpected = with_file - expected_nodes
2619         errorif(unexpected,
2620                 constants.CV_ECLUSTERFILECHECK, None,
2621                 "File %s should not exist on node(s) %s",
2622                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2623
2624       # See if there are multiple versions of the file
2625       test = len(checksums) > 1
2626       if test:
2627         variants = ["variant %s on %s" %
2628                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2629                     for (idx, (checksum, nodes)) in
2630                       enumerate(sorted(checksums.items()))]
2631       else:
2632         variants = []
2633
2634       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2635               "File %s found with %s different checksums (%s)",
2636               filename, len(checksums), "; ".join(variants))
2637
2638   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2639                       drbd_map):
2640     """Verifies and the node DRBD status.
2641
2642     @type ninfo: L{objects.Node}
2643     @param ninfo: the node to check
2644     @param nresult: the remote results for the node
2645     @param instanceinfo: the dict of instances
2646     @param drbd_helper: the configured DRBD usermode helper
2647     @param drbd_map: the DRBD map as returned by
2648         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2649
2650     """
2651     node = ninfo.name
2652     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2653
2654     if drbd_helper:
2655       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2656       test = (helper_result == None)
2657       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2658                "no drbd usermode helper returned")
2659       if helper_result:
2660         status, payload = helper_result
2661         test = not status
2662         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2663                  "drbd usermode helper check unsuccessful: %s", payload)
2664         test = status and (payload != drbd_helper)
2665         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2666                  "wrong drbd usermode helper: %s", payload)
2667
2668     # compute the DRBD minors
2669     node_drbd = {}
2670     for minor, instance in drbd_map[node].items():
2671       test = instance not in instanceinfo
2672       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2673                "ghost instance '%s' in temporary DRBD map", instance)
2674         # ghost instance should not be running, but otherwise we
2675         # don't give double warnings (both ghost instance and
2676         # unallocated minor in use)
2677       if test:
2678         node_drbd[minor] = (instance, False)
2679       else:
2680         instance = instanceinfo[instance]
2681         node_drbd[minor] = (instance.name,
2682                             instance.admin_state == constants.ADMINST_UP)
2683
2684     # and now check them
2685     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2686     test = not isinstance(used_minors, (tuple, list))
2687     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2688              "cannot parse drbd status file: %s", str(used_minors))
2689     if test:
2690       # we cannot check drbd status
2691       return
2692
2693     for minor, (iname, must_exist) in node_drbd.items():
2694       test = minor not in used_minors and must_exist
2695       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2696                "drbd minor %d of instance %s is not active", minor, iname)
2697     for minor in used_minors:
2698       test = minor not in node_drbd
2699       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2700                "unallocated drbd minor %d is in use", minor)
2701
2702   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2703     """Builds the node OS structures.
2704
2705     @type ninfo: L{objects.Node}
2706     @param ninfo: the node to check
2707     @param nresult: the remote results for the node
2708     @param nimg: the node image object
2709
2710     """
2711     node = ninfo.name
2712     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2713
2714     remote_os = nresult.get(constants.NV_OSLIST, None)
2715     test = (not isinstance(remote_os, list) or
2716             not compat.all(isinstance(v, list) and len(v) == 7
2717                            for v in remote_os))
2718
2719     _ErrorIf(test, constants.CV_ENODEOS, node,
2720              "node hasn't returned valid OS data")
2721
2722     nimg.os_fail = test
2723
2724     if test:
2725       return
2726
2727     os_dict = {}
2728
2729     for (name, os_path, status, diagnose,
2730          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2731
2732       if name not in os_dict:
2733         os_dict[name] = []
2734
2735       # parameters is a list of lists instead of list of tuples due to
2736       # JSON lacking a real tuple type, fix it:
2737       parameters = [tuple(v) for v in parameters]
2738       os_dict[name].append((os_path, status, diagnose,
2739                             set(variants), set(parameters), set(api_ver)))
2740
2741     nimg.oslist = os_dict
2742
2743   def _VerifyNodeOS(self, ninfo, nimg, base):
2744     """Verifies the node OS list.
2745
2746     @type ninfo: L{objects.Node}
2747     @param ninfo: the node to check
2748     @param nimg: the node image object
2749     @param base: the 'template' node we match against (e.g. from the master)
2750
2751     """
2752     node = ninfo.name
2753     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2754
2755     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2756
2757     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2758     for os_name, os_data in nimg.oslist.items():
2759       assert os_data, "Empty OS status for OS %s?!" % os_name
2760       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2761       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2762                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2763       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2764                "OS '%s' has multiple entries (first one shadows the rest): %s",
2765                os_name, utils.CommaJoin([v[0] for v in os_data]))
2766       # comparisons with the 'base' image
2767       test = os_name not in base.oslist
2768       _ErrorIf(test, constants.CV_ENODEOS, node,
2769                "Extra OS %s not present on reference node (%s)",
2770                os_name, base.name)
2771       if test:
2772         continue
2773       assert base.oslist[os_name], "Base node has empty OS status?"
2774       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2775       if not b_status:
2776         # base OS is invalid, skipping
2777         continue
2778       for kind, a, b in [("API version", f_api, b_api),
2779                          ("variants list", f_var, b_var),
2780                          ("parameters", beautify_params(f_param),
2781                           beautify_params(b_param))]:
2782         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2783                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2784                  kind, os_name, base.name,
2785                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2786
2787     # check any missing OSes
2788     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2789     _ErrorIf(missing, constants.CV_ENODEOS, node,
2790              "OSes present on reference node %s but missing on this node: %s",
2791              base.name, utils.CommaJoin(missing))
2792
2793   def _VerifyOob(self, ninfo, nresult):
2794     """Verifies out of band functionality of a node.
2795
2796     @type ninfo: L{objects.Node}
2797     @param ninfo: the node to check
2798     @param nresult: the remote results for the node
2799
2800     """
2801     node = ninfo.name
2802     # We just have to verify the paths on master and/or master candidates
2803     # as the oob helper is invoked on the master
2804     if ((ninfo.master_candidate or ninfo.master_capable) and
2805         constants.NV_OOB_PATHS in nresult):
2806       for path_result in nresult[constants.NV_OOB_PATHS]:
2807         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2808
2809   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2810     """Verifies and updates the node volume data.
2811
2812     This function will update a L{NodeImage}'s internal structures
2813     with data from the remote call.
2814
2815     @type ninfo: L{objects.Node}
2816     @param ninfo: the node to check
2817     @param nresult: the remote results for the node
2818     @param nimg: the node image object
2819     @param vg_name: the configured VG name
2820
2821     """
2822     node = ninfo.name
2823     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2824
2825     nimg.lvm_fail = True
2826     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2827     if vg_name is None:
2828       pass
2829     elif isinstance(lvdata, basestring):
2830       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2831                utils.SafeEncode(lvdata))
2832     elif not isinstance(lvdata, dict):
2833       _ErrorIf(True, constants.CV_ENODELVM, node,
2834                "rpc call to node failed (lvlist)")
2835     else:
2836       nimg.volumes = lvdata
2837       nimg.lvm_fail = False
2838
2839   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2840     """Verifies and updates the node instance list.
2841
2842     If the listing was successful, then updates this node's instance
2843     list. Otherwise, it marks the RPC call as failed for the instance
2844     list key.
2845
2846     @type ninfo: L{objects.Node}
2847     @param ninfo: the node to check
2848     @param nresult: the remote results for the node
2849     @param nimg: the node image object
2850
2851     """
2852     idata = nresult.get(constants.NV_INSTANCELIST, None)
2853     test = not isinstance(idata, list)
2854     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2855                   "rpc call to node failed (instancelist): %s",
2856                   utils.SafeEncode(str(idata)))
2857     if test:
2858       nimg.hyp_fail = True
2859     else:
2860       nimg.instances = idata
2861
2862   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2863     """Verifies and computes a node information map
2864
2865     @type ninfo: L{objects.Node}
2866     @param ninfo: the node to check
2867     @param nresult: the remote results for the node
2868     @param nimg: the node image object
2869     @param vg_name: the configured VG name
2870
2871     """
2872     node = ninfo.name
2873     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2874
2875     # try to read free memory (from the hypervisor)
2876     hv_info = nresult.get(constants.NV_HVINFO, None)
2877     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2878     _ErrorIf(test, constants.CV_ENODEHV, node,
2879              "rpc call to node failed (hvinfo)")
2880     if not test:
2881       try:
2882         nimg.mfree = int(hv_info["memory_free"])
2883       except (ValueError, TypeError):
2884         _ErrorIf(True, constants.CV_ENODERPC, node,
2885                  "node returned invalid nodeinfo, check hypervisor")
2886
2887     # FIXME: devise a free space model for file based instances as well
2888     if vg_name is not None:
2889       test = (constants.NV_VGLIST not in nresult or
2890               vg_name not in nresult[constants.NV_VGLIST])
2891       _ErrorIf(test, constants.CV_ENODELVM, node,
2892                "node didn't return data for the volume group '%s'"
2893                " - it is either missing or broken", vg_name)
2894       if not test:
2895         try:
2896           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2897         except (ValueError, TypeError):
2898           _ErrorIf(True, constants.CV_ENODERPC, node,
2899                    "node returned invalid LVM info, check LVM status")
2900
2901   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2902     """Gets per-disk status information for all instances.
2903
2904     @type nodelist: list of strings
2905     @param nodelist: Node names
2906     @type node_image: dict of (name, L{objects.Node})
2907     @param node_image: Node objects
2908     @type instanceinfo: dict of (name, L{objects.Instance})
2909     @param instanceinfo: Instance objects
2910     @rtype: {instance: {node: [(succes, payload)]}}
2911     @return: a dictionary of per-instance dictionaries with nodes as
2912         keys and disk information as values; the disk information is a
2913         list of tuples (success, payload)
2914
2915     """
2916     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2917
2918     node_disks = {}
2919     node_disks_devonly = {}
2920     diskless_instances = set()
2921     diskless = constants.DT_DISKLESS
2922
2923     for nname in nodelist:
2924       node_instances = list(itertools.chain(node_image[nname].pinst,
2925                                             node_image[nname].sinst))
2926       diskless_instances.update(inst for inst in node_instances
2927                                 if instanceinfo[inst].disk_template == diskless)
2928       disks = [(inst, disk)
2929                for inst in node_instances
2930                for disk in instanceinfo[inst].disks]
2931
2932       if not disks:
2933         # No need to collect data
2934         continue
2935
2936       node_disks[nname] = disks
2937
2938       # _AnnotateDiskParams makes already copies of the disks
2939       devonly = []
2940       for (inst, dev) in disks:
2941         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2942         self.cfg.SetDiskID(anno_disk, nname)
2943         devonly.append(anno_disk)
2944
2945       node_disks_devonly[nname] = devonly
2946
2947     assert len(node_disks) == len(node_disks_devonly)
2948
2949     # Collect data from all nodes with disks
2950     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2951                                                           node_disks_devonly)
2952
2953     assert len(result) == len(node_disks)
2954
2955     instdisk = {}
2956
2957     for (nname, nres) in result.items():
2958       disks = node_disks[nname]
2959
2960       if nres.offline:
2961         # No data from this node
2962         data = len(disks) * [(False, "node offline")]
2963       else:
2964         msg = nres.fail_msg
2965         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2966                  "while getting disk information: %s", msg)
2967         if msg:
2968           # No data from this node
2969           data = len(disks) * [(False, msg)]
2970         else:
2971           data = []
2972           for idx, i in enumerate(nres.payload):
2973             if isinstance(i, (tuple, list)) and len(i) == 2:
2974               data.append(i)
2975             else:
2976               logging.warning("Invalid result from node %s, entry %d: %s",
2977                               nname, idx, i)
2978               data.append((False, "Invalid result from the remote node"))
2979
2980       for ((inst, _), status) in zip(disks, data):
2981         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2982
2983     # Add empty entries for diskless instances.
2984     for inst in diskless_instances:
2985       assert inst not in instdisk
2986       instdisk[inst] = {}
2987
2988     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2989                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2990                       compat.all(isinstance(s, (tuple, list)) and
2991                                  len(s) == 2 for s in statuses)
2992                       for inst, nnames in instdisk.items()
2993                       for nname, statuses in nnames.items())
2994     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2995
2996     return instdisk
2997
2998   @staticmethod
2999   def _SshNodeSelector(group_uuid, all_nodes):
3000     """Create endless iterators for all potential SSH check hosts.
3001
3002     """
3003     nodes = [node for node in all_nodes
3004              if (node.group != group_uuid and
3005                  not node.offline)]
3006     keyfunc = operator.attrgetter("group")
3007
3008     return map(itertools.cycle,
3009                [sorted(map(operator.attrgetter("name"), names))
3010                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3011                                                   keyfunc)])
3012
3013   @classmethod
3014   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3015     """Choose which nodes should talk to which other nodes.
3016
3017     We will make nodes contact all nodes in their group, and one node from
3018     every other group.
3019
3020     @warning: This algorithm has a known issue if one node group is much
3021       smaller than others (e.g. just one node). In such a case all other
3022       nodes will talk to the single node.
3023
3024     """
3025     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3026     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3027
3028     return (online_nodes,
3029             dict((name, sorted([i.next() for i in sel]))
3030                  for name in online_nodes))
3031
3032   def BuildHooksEnv(self):
3033     """Build hooks env.
3034
3035     Cluster-Verify hooks just ran in the post phase and their failure makes
3036     the output be logged in the verify output and the verification to fail.
3037
3038     """
3039     env = {
3040       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3041       }
3042
3043     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3044                for node in self.my_node_info.values())
3045
3046     return env
3047
3048   def BuildHooksNodes(self):
3049     """Build hooks nodes.
3050
3051     """
3052     return ([], self.my_node_names)
3053
3054   def Exec(self, feedback_fn):
3055     """Verify integrity of the node group, performing various test on nodes.
3056
3057     """
3058     # This method has too many local variables. pylint: disable=R0914
3059     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3060
3061     if not self.my_node_names:
3062       # empty node group
3063       feedback_fn("* Empty node group, skipping verification")
3064       return True
3065
3066     self.bad = False
3067     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3068     verbose = self.op.verbose
3069     self._feedback_fn = feedback_fn
3070
3071     vg_name = self.cfg.GetVGName()
3072     drbd_helper = self.cfg.GetDRBDHelper()
3073     cluster = self.cfg.GetClusterInfo()
3074     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3075     hypervisors = cluster.enabled_hypervisors
3076     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3077
3078     i_non_redundant = [] # Non redundant instances
3079     i_non_a_balanced = [] # Non auto-balanced instances
3080     i_offline = 0 # Count of offline instances
3081     n_offline = 0 # Count of offline nodes
3082     n_drained = 0 # Count of nodes being drained
3083     node_vol_should = {}
3084
3085     # FIXME: verify OS list
3086
3087     # File verification
3088     filemap = _ComputeAncillaryFiles(cluster, False)
3089
3090     # do local checksums
3091     master_node = self.master_node = self.cfg.GetMasterNode()
3092     master_ip = self.cfg.GetMasterIP()
3093
3094     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3095
3096     user_scripts = []
3097     if self.cfg.GetUseExternalMipScript():
3098       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3099
3100     node_verify_param = {
3101       constants.NV_FILELIST:
3102         utils.UniqueSequence(filename
3103                              for files in filemap
3104                              for filename in files),
3105       constants.NV_NODELIST:
3106         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3107                                   self.all_node_info.values()),
3108       constants.NV_HYPERVISOR: hypervisors,
3109       constants.NV_HVPARAMS:
3110         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3111       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3112                                  for node in node_data_list
3113                                  if not node.offline],
3114       constants.NV_INSTANCELIST: hypervisors,
3115       constants.NV_VERSION: None,
3116       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3117       constants.NV_NODESETUP: None,
3118       constants.NV_TIME: None,
3119       constants.NV_MASTERIP: (master_node, master_ip),
3120       constants.NV_OSLIST: None,
3121       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3122       constants.NV_USERSCRIPTS: user_scripts,
3123       }
3124
3125     if vg_name is not None:
3126       node_verify_param[constants.NV_VGLIST] = None
3127       node_verify_param[constants.NV_LVLIST] = vg_name
3128       node_verify_param[constants.NV_PVLIST] = [vg_name]
3129
3130     if drbd_helper:
3131       node_verify_param[constants.NV_DRBDLIST] = None
3132       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3133
3134     # bridge checks
3135     # FIXME: this needs to be changed per node-group, not cluster-wide
3136     bridges = set()
3137     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3138     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3139       bridges.add(default_nicpp[constants.NIC_LINK])
3140     for instance in self.my_inst_info.values():
3141       for nic in instance.nics:
3142         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3143         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3144           bridges.add(full_nic[constants.NIC_LINK])
3145
3146     if bridges:
3147       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3148
3149     # Build our expected cluster state
3150     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3151                                                  name=node.name,
3152                                                  vm_capable=node.vm_capable))
3153                       for node in node_data_list)
3154
3155     # Gather OOB paths
3156     oob_paths = []
3157     for node in self.all_node_info.values():
3158       path = _SupportsOob(self.cfg, node)
3159       if path and path not in oob_paths:
3160         oob_paths.append(path)
3161
3162     if oob_paths:
3163       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3164
3165     for instance in self.my_inst_names:
3166       inst_config = self.my_inst_info[instance]
3167       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3168         i_offline += 1
3169
3170       for nname in inst_config.all_nodes:
3171         if nname not in node_image:
3172           gnode = self.NodeImage(name=nname)
3173           gnode.ghost = (nname not in self.all_node_info)
3174           node_image[nname] = gnode
3175
3176       inst_config.MapLVsByNode(node_vol_should)
3177
3178       pnode = inst_config.primary_node
3179       node_image[pnode].pinst.append(instance)
3180
3181       for snode in inst_config.secondary_nodes:
3182         nimg = node_image[snode]
3183         nimg.sinst.append(instance)
3184         if pnode not in nimg.sbp:
3185           nimg.sbp[pnode] = []
3186         nimg.sbp[pnode].append(instance)
3187
3188     # At this point, we have the in-memory data structures complete,
3189     # except for the runtime information, which we'll gather next
3190
3191     # Due to the way our RPC system works, exact response times cannot be
3192     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3193     # time before and after executing the request, we can at least have a time
3194     # window.
3195     nvinfo_starttime = time.time()
3196     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3197                                            node_verify_param,
3198                                            self.cfg.GetClusterName())
3199     nvinfo_endtime = time.time()
3200
3201     if self.extra_lv_nodes and vg_name is not None:
3202       extra_lv_nvinfo = \
3203           self.rpc.call_node_verify(self.extra_lv_nodes,
3204                                     {constants.NV_LVLIST: vg_name},
3205                                     self.cfg.GetClusterName())
3206     else:
3207       extra_lv_nvinfo = {}
3208
3209     all_drbd_map = self.cfg.ComputeDRBDMap()
3210
3211     feedback_fn("* Gathering disk information (%s nodes)" %
3212                 len(self.my_node_names))
3213     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3214                                      self.my_inst_info)
3215
3216     feedback_fn("* Verifying configuration file consistency")
3217
3218     # If not all nodes are being checked, we need to make sure the master node
3219     # and a non-checked vm_capable node are in the list.
3220     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3221     if absent_nodes:
3222       vf_nvinfo = all_nvinfo.copy()
3223       vf_node_info = list(self.my_node_info.values())
3224       additional_nodes = []
3225       if master_node not in self.my_node_info:
3226         additional_nodes.append(master_node)
3227         vf_node_info.append(self.all_node_info[master_node])
3228       # Add the first vm_capable node we find which is not included,
3229       # excluding the master node (which we already have)
3230       for node in absent_nodes:
3231         nodeinfo = self.all_node_info[node]
3232         if (nodeinfo.vm_capable and not nodeinfo.offline and
3233             node != master_node):
3234           additional_nodes.append(node)
3235           vf_node_info.append(self.all_node_info[node])
3236           break
3237       key = constants.NV_FILELIST
3238       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3239                                                  {key: node_verify_param[key]},
3240                                                  self.cfg.GetClusterName()))
3241     else:
3242       vf_nvinfo = all_nvinfo
3243       vf_node_info = self.my_node_info.values()
3244
3245     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3246
3247     feedback_fn("* Verifying node status")
3248
3249     refos_img = None
3250
3251     for node_i in node_data_list:
3252       node = node_i.name
3253       nimg = node_image[node]
3254
3255       if node_i.offline:
3256         if verbose:
3257           feedback_fn("* Skipping offline node %s" % (node,))
3258         n_offline += 1
3259         continue
3260
3261       if node == master_node:
3262         ntype = "master"
3263       elif node_i.master_candidate:
3264         ntype = "master candidate"
3265       elif node_i.drained:
3266         ntype = "drained"
3267         n_drained += 1
3268       else:
3269         ntype = "regular"
3270       if verbose:
3271         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3272
3273       msg = all_nvinfo[node].fail_msg
3274       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3275                msg)
3276       if msg:
3277         nimg.rpc_fail = True
3278         continue
3279
3280       nresult = all_nvinfo[node].payload
3281
3282       nimg.call_ok = self._VerifyNode(node_i, nresult)
3283       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3284       self._VerifyNodeNetwork(node_i, nresult)
3285       self._VerifyNodeUserScripts(node_i, nresult)
3286       self._VerifyOob(node_i, nresult)
3287
3288       if nimg.vm_capable:
3289         self._VerifyNodeLVM(node_i, nresult, vg_name)
3290         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3291                              all_drbd_map)
3292
3293         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3294         self._UpdateNodeInstances(node_i, nresult, nimg)
3295         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3296         self._UpdateNodeOS(node_i, nresult, nimg)
3297
3298         if not nimg.os_fail:
3299           if refos_img is None:
3300             refos_img = nimg
3301           self._VerifyNodeOS(node_i, nimg, refos_img)
3302         self._VerifyNodeBridges(node_i, nresult, bridges)
3303
3304         # Check whether all running instancies are primary for the node. (This
3305         # can no longer be done from _VerifyInstance below, since some of the
3306         # wrong instances could be from other node groups.)
3307         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3308
3309         for inst in non_primary_inst:
3310           test = inst in self.all_inst_info
3311           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3312                    "instance should not run on node %s", node_i.name)
3313           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3314                    "node is running unknown instance %s", inst)
3315
3316     for node, result in extra_lv_nvinfo.items():
3317       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3318                               node_image[node], vg_name)
3319
3320     feedback_fn("* Verifying instance status")
3321     for instance in self.my_inst_names:
3322       if verbose:
3323         feedback_fn("* Verifying instance %s" % instance)
3324       inst_config = self.my_inst_info[instance]
3325       self._VerifyInstance(instance, inst_config, node_image,
3326                            instdisk[instance])
3327       inst_nodes_offline = []
3328
3329       pnode = inst_config.primary_node
3330       pnode_img = node_image[pnode]
3331       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3332                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3333                " primary node failed", instance)
3334
3335       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3336                pnode_img.offline,
3337                constants.CV_EINSTANCEBADNODE, instance,
3338                "instance is marked as running and lives on offline node %s",
3339                inst_config.primary_node)
3340
3341       # If the instance is non-redundant we cannot survive losing its primary
3342       # node, so we are not N+1 compliant. On the other hand we have no disk
3343       # templates with more than one secondary so that situation is not well
3344       # supported either.
3345       # FIXME: does not support file-backed instances
3346       if not inst_config.secondary_nodes:
3347         i_non_redundant.append(instance)
3348
3349       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3350                constants.CV_EINSTANCELAYOUT,
3351                instance, "instance has multiple secondary nodes: %s",
3352                utils.CommaJoin(inst_config.secondary_nodes),
3353                code=self.ETYPE_WARNING)
3354
3355       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3356         pnode = inst_config.primary_node
3357         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3358         instance_groups = {}
3359
3360         for node in instance_nodes:
3361           instance_groups.setdefault(self.all_node_info[node].group,
3362                                      []).append(node)
3363
3364         pretty_list = [
3365           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3366           # Sort so that we always list the primary node first.
3367           for group, nodes in sorted(instance_groups.items(),
3368                                      key=lambda (_, nodes): pnode in nodes,
3369                                      reverse=True)]
3370
3371         self._ErrorIf(len(instance_groups) > 1,
3372                       constants.CV_EINSTANCESPLITGROUPS,
3373                       instance, "instance has primary and secondary nodes in"
3374                       " different groups: %s", utils.CommaJoin(pretty_list),
3375                       code=self.ETYPE_WARNING)
3376
3377       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3378         i_non_a_balanced.append(instance)
3379
3380       for snode in inst_config.secondary_nodes:
3381         s_img = node_image[snode]
3382         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3383                  snode, "instance %s, connection to secondary node failed",
3384                  instance)
3385
3386         if s_img.offline:
3387           inst_nodes_offline.append(snode)
3388
3389       # warn that the instance lives on offline nodes
3390       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3391                "instance has offline secondary node(s) %s",
3392                utils.CommaJoin(inst_nodes_offline))
3393       # ... or ghost/non-vm_capable nodes
3394       for node in inst_config.all_nodes:
3395         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3396                  instance, "instance lives on ghost node %s", node)
3397         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3398                  instance, "instance lives on non-vm_capable node %s", node)
3399
3400     feedback_fn("* Verifying orphan volumes")
3401     reserved = utils.FieldSet(*cluster.reserved_lvs)
3402
3403     # We will get spurious "unknown volume" warnings if any node of this group
3404     # is secondary for an instance whose primary is in another group. To avoid
3405     # them, we find these instances and add their volumes to node_vol_should.
3406     for inst in self.all_inst_info.values():
3407       for secondary in inst.secondary_nodes:
3408         if (secondary in self.my_node_info
3409             and inst.name not in self.my_inst_info):
3410           inst.MapLVsByNode(node_vol_should)
3411           break
3412
3413     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3414
3415     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3416       feedback_fn("* Verifying N+1 Memory redundancy")
3417       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3418
3419     feedback_fn("* Other Notes")
3420     if i_non_redundant:
3421       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3422                   % len(i_non_redundant))
3423
3424     if i_non_a_balanced:
3425       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3426                   % len(i_non_a_balanced))
3427
3428     if i_offline:
3429       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3430
3431     if n_offline:
3432       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3433
3434     if n_drained:
3435       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3436
3437     return not self.bad
3438
3439   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3440     """Analyze the post-hooks' result
3441
3442     This method analyses the hook result, handles it, and sends some
3443     nicely-formatted feedback back to the user.
3444
3445     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3446         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3447     @param hooks_results: the results of the multi-node hooks rpc call
3448     @param feedback_fn: function used send feedback back to the caller
3449     @param lu_result: previous Exec result
3450     @return: the new Exec result, based on the previous result
3451         and hook results
3452
3453     """
3454     # We only really run POST phase hooks, only for non-empty groups,
3455     # and are only interested in their results
3456     if not self.my_node_names:
3457       # empty node group
3458       pass
3459     elif phase == constants.HOOKS_PHASE_POST:
3460       # Used to change hooks' output to proper indentation
3461       feedback_fn("* Hooks Results")
3462       assert hooks_results, "invalid result from hooks"
3463
3464       for node_name in hooks_results:
3465         res = hooks_results[node_name]
3466         msg = res.fail_msg
3467         test = msg and not res.offline
3468         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3469                       "Communication failure in hooks execution: %s", msg)
3470         if res.offline or msg:
3471           # No need to investigate payload if node is offline or gave
3472           # an error.
3473           continue
3474         for script, hkr, output in res.payload:
3475           test = hkr == constants.HKR_FAIL
3476           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3477                         "Script %s failed, output:", script)
3478           if test:
3479             output = self._HOOKS_INDENT_RE.sub("      ", output)
3480             feedback_fn("%s" % output)
3481             lu_result = False
3482
3483     return lu_result
3484
3485
3486 class LUClusterVerifyDisks(NoHooksLU):
3487   """Verifies the cluster disks status.
3488
3489   """
3490   REQ_BGL = False
3491
3492   def ExpandNames(self):
3493     self.share_locks = _ShareAll()
3494     self.needed_locks = {
3495       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3496       }
3497
3498   def Exec(self, feedback_fn):
3499     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3500
3501     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3502     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3503                            for group in group_names])
3504
3505
3506 class LUGroupVerifyDisks(NoHooksLU):
3507   """Verifies the status of all disks in a node group.
3508
3509   """
3510   REQ_BGL = False
3511
3512   def ExpandNames(self):
3513     # Raises errors.OpPrereqError on its own if group can't be found
3514     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3515
3516     self.share_locks = _ShareAll()
3517     self.needed_locks = {
3518       locking.LEVEL_INSTANCE: [],
3519       locking.LEVEL_NODEGROUP: [],
3520       locking.LEVEL_NODE: [],
3521       }
3522
3523   def DeclareLocks(self, level):
3524     if level == locking.LEVEL_INSTANCE:
3525       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3526
3527       # Lock instances optimistically, needs verification once node and group
3528       # locks have been acquired
3529       self.needed_locks[locking.LEVEL_INSTANCE] = \
3530         self.cfg.GetNodeGroupInstances(self.group_uuid)
3531
3532     elif level == locking.LEVEL_NODEGROUP:
3533       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3534
3535       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3536         set([self.group_uuid] +
3537             # Lock all groups used by instances optimistically; this requires
3538             # going via the node before it's locked, requiring verification
3539             # later on
3540             [group_uuid
3541              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3542              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3543
3544     elif level == locking.LEVEL_NODE:
3545       # This will only lock the nodes in the group to be verified which contain
3546       # actual instances
3547       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3548       self._LockInstancesNodes()
3549
3550       # Lock all nodes in group to be verified
3551       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3552       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3553       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3554
3555   def CheckPrereq(self):
3556     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3557     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3558     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3559
3560     assert self.group_uuid in owned_groups
3561
3562     # Check if locked instances are still correct
3563     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3564
3565     # Get instance information
3566     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3567
3568     # Check if node groups for locked instances are still correct
3569     _CheckInstancesNodeGroups(self.cfg, self.instances,
3570                               owned_groups, owned_nodes, self.group_uuid)
3571
3572   def Exec(self, feedback_fn):
3573     """Verify integrity of cluster disks.
3574
3575     @rtype: tuple of three items
3576     @return: a tuple of (dict of node-to-node_error, list of instances
3577         which need activate-disks, dict of instance: (node, volume) for
3578         missing volumes
3579
3580     """
3581     res_nodes = {}
3582     res_instances = set()
3583     res_missing = {}
3584
3585     nv_dict = _MapInstanceDisksToNodes([inst
3586             for inst in self.instances.values()
3587             if inst.admin_state == constants.ADMINST_UP])
3588
3589     if nv_dict:
3590       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3591                              set(self.cfg.GetVmCapableNodeList()))
3592
3593       node_lvs = self.rpc.call_lv_list(nodes, [])
3594
3595       for (node, node_res) in node_lvs.items():
3596         if node_res.offline:
3597           continue
3598
3599         msg = node_res.fail_msg
3600         if msg:
3601           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3602           res_nodes[node] = msg
3603           continue
3604
3605         for lv_name, (_, _, lv_online) in node_res.payload.items():
3606           inst = nv_dict.pop((node, lv_name), None)
3607           if not (lv_online or inst is None):
3608             res_instances.add(inst)
3609
3610       # any leftover items in nv_dict are missing LVs, let's arrange the data
3611       # better
3612       for key, inst in nv_dict.iteritems():
3613         res_missing.setdefault(inst, []).append(list(key))
3614
3615     return (res_nodes, list(res_instances), res_missing)
3616
3617
3618 class LUClusterRepairDiskSizes(NoHooksLU):
3619   """Verifies the cluster disks sizes.
3620
3621   """
3622   REQ_BGL = False
3623
3624   def ExpandNames(self):
3625     if self.op.instances:
3626       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3627       self.needed_locks = {
3628         locking.LEVEL_NODE_RES: [],
3629         locking.LEVEL_INSTANCE: self.wanted_names,
3630         }
3631       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3632     else:
3633       self.wanted_names = None
3634       self.needed_locks = {
3635         locking.LEVEL_NODE_RES: locking.ALL_SET,
3636         locking.LEVEL_INSTANCE: locking.ALL_SET,
3637         }
3638     self.share_locks = {
3639       locking.LEVEL_NODE_RES: 1,
3640       locking.LEVEL_INSTANCE: 0,
3641       }
3642
3643   def DeclareLocks(self, level):
3644     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3645       self._LockInstancesNodes(primary_only=True, level=level)
3646
3647   def CheckPrereq(self):
3648     """Check prerequisites.
3649
3650     This only checks the optional instance list against the existing names.
3651
3652     """
3653     if self.wanted_names is None:
3654       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3655
3656     self.wanted_instances = \
3657         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3658
3659   def _EnsureChildSizes(self, disk):
3660     """Ensure children of the disk have the needed disk size.
3661
3662     This is valid mainly for DRBD8 and fixes an issue where the
3663     children have smaller disk size.
3664
3665     @param disk: an L{ganeti.objects.Disk} object
3666
3667     """
3668     if disk.dev_type == constants.LD_DRBD8:
3669       assert disk.children, "Empty children for DRBD8?"
3670       fchild = disk.children[0]
3671       mismatch = fchild.size < disk.size
3672       if mismatch:
3673         self.LogInfo("Child disk has size %d, parent %d, fixing",
3674                      fchild.size, disk.size)
3675         fchild.size = disk.size
3676
3677       # and we recurse on this child only, not on the metadev
3678       return self._EnsureChildSizes(fchild) or mismatch
3679     else:
3680       return False
3681
3682   def Exec(self, feedback_fn):
3683     """Verify the size of cluster disks.
3684
3685     """
3686     # TODO: check child disks too
3687     # TODO: check differences in size between primary/secondary nodes
3688     per_node_disks = {}
3689     for instance in self.wanted_instances:
3690       pnode = instance.primary_node
3691       if pnode not in per_node_disks:
3692         per_node_disks[pnode] = []
3693       for idx, disk in enumerate(instance.disks):
3694         per_node_disks[pnode].append((instance, idx, disk))
3695
3696     assert not (frozenset(per_node_disks.keys()) -
3697                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3698       "Not owning correct locks"
3699     assert not self.owned_locks(locking.LEVEL_NODE)
3700
3701     changed = []
3702     for node, dskl in per_node_disks.items():
3703       newl = [v[2].Copy() for v in dskl]
3704       for dsk in newl:
3705         self.cfg.SetDiskID(dsk, node)
3706       result = self.rpc.call_blockdev_getsize(node, newl)
3707       if result.fail_msg:
3708         self.LogWarning("Failure in blockdev_getsize call to node"
3709                         " %s, ignoring", node)
3710         continue
3711       if len(result.payload) != len(dskl):
3712         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3713                         " result.payload=%s", node, len(dskl), result.payload)
3714         self.LogWarning("Invalid result from node %s, ignoring node results",
3715                         node)
3716         continue
3717       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3718         if size is None:
3719           self.LogWarning("Disk %d of instance %s did not return size"
3720                           " information, ignoring", idx, instance.name)
3721           continue
3722         if not isinstance(size, (int, long)):
3723           self.LogWarning("Disk %d of instance %s did not return valid"
3724                           " size information, ignoring", idx, instance.name)
3725           continue
3726         size = size >> 20
3727         if size != disk.size:
3728           self.LogInfo("Disk %d of instance %s has mismatched size,"
3729                        " correcting: recorded %d, actual %d", idx,
3730                        instance.name, disk.size, size)
3731           disk.size = size
3732           self.cfg.Update(instance, feedback_fn)
3733           changed.append((instance.name, idx, size))
3734         if self._EnsureChildSizes(disk):
3735           self.cfg.Update(instance, feedback_fn)
3736           changed.append((instance.name, idx, disk.size))
3737     return changed
3738
3739
3740 class LUClusterRename(LogicalUnit):
3741   """Rename the cluster.
3742
3743   """
3744   HPATH = "cluster-rename"
3745   HTYPE = constants.HTYPE_CLUSTER
3746
3747   def BuildHooksEnv(self):
3748     """Build hooks env.
3749
3750     """
3751     return {
3752       "OP_TARGET": self.cfg.GetClusterName(),
3753       "NEW_NAME": self.op.name,
3754       }
3755
3756   def BuildHooksNodes(self):
3757     """Build hooks nodes.
3758
3759     """
3760     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3761
3762   def CheckPrereq(self):
3763     """Verify that the passed name is a valid one.
3764
3765     """
3766     hostname = netutils.GetHostname(name=self.op.name,
3767                                     family=self.cfg.GetPrimaryIPFamily())
3768
3769     new_name = hostname.name
3770     self.ip = new_ip = hostname.ip
3771     old_name = self.cfg.GetClusterName()
3772     old_ip = self.cfg.GetMasterIP()
3773     if new_name == old_name and new_ip == old_ip:
3774       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3775                                  " cluster has changed",
3776                                  errors.ECODE_INVAL)
3777     if new_ip != old_ip:
3778       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3779         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3780                                    " reachable on the network" %
3781                                    new_ip, errors.ECODE_NOTUNIQUE)
3782
3783     self.op.name = new_name
3784
3785   def Exec(self, feedback_fn):
3786     """Rename the cluster.
3787
3788     """
3789     clustername = self.op.name
3790     new_ip = self.ip
3791
3792     # shutdown the master IP
3793     master_params = self.cfg.GetMasterNetworkParameters()
3794     ems = self.cfg.GetUseExternalMipScript()
3795     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3796                                                      master_params, ems)
3797     result.Raise("Could not disable the master role")
3798
3799     try:
3800       cluster = self.cfg.GetClusterInfo()
3801       cluster.cluster_name = clustername
3802       cluster.master_ip = new_ip
3803       self.cfg.Update(cluster, feedback_fn)
3804
3805       # update the known hosts file
3806       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3807       node_list = self.cfg.GetOnlineNodeList()
3808       try:
3809         node_list.remove(master_params.name)
3810       except ValueError:
3811         pass
3812       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3813     finally:
3814       master_params.ip = new_ip
3815       result = self.rpc.call_node_activate_master_ip(master_params.name,
3816                                                      master_params, ems)
3817       msg = result.fail_msg
3818       if msg:
3819         self.LogWarning("Could not re-enable the master role on"
3820                         " the master, please restart manually: %s", msg)
3821
3822     return clustername
3823
3824
3825 def _ValidateNetmask(cfg, netmask):
3826   """Checks if a netmask is valid.
3827
3828   @type cfg: L{config.ConfigWriter}
3829   @param cfg: The cluster configuration
3830   @type netmask: int
3831   @param netmask: the netmask to be verified
3832   @raise errors.OpPrereqError: if the validation fails
3833
3834   """
3835   ip_family = cfg.GetPrimaryIPFamily()
3836   try:
3837     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3838   except errors.ProgrammerError:
3839     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3840                                ip_family)
3841   if not ipcls.ValidateNetmask(netmask):
3842     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3843                                 (netmask))
3844
3845
3846 class LUClusterSetParams(LogicalUnit):
3847   """Change the parameters of the cluster.
3848
3849   """
3850   HPATH = "cluster-modify"
3851   HTYPE = constants.HTYPE_CLUSTER
3852   REQ_BGL = False
3853
3854   def CheckArguments(self):
3855     """Check parameters
3856
3857     """
3858     if self.op.uid_pool:
3859       uidpool.CheckUidPool(self.op.uid_pool)
3860
3861     if self.op.add_uids:
3862       uidpool.CheckUidPool(self.op.add_uids)
3863
3864     if self.op.remove_uids:
3865       uidpool.CheckUidPool(self.op.remove_uids)
3866
3867     if self.op.master_netmask is not None:
3868       _ValidateNetmask(self.cfg, self.op.master_netmask)
3869
3870     if self.op.diskparams:
3871       for dt_params in self.op.diskparams.values():
3872         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3873       try:
3874         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3875       except errors.OpPrereqError, err:
3876         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3877                                    errors.ECODE_INVAL)
3878
3879   def ExpandNames(self):
3880     # FIXME: in the future maybe other cluster params won't require checking on
3881     # all nodes to be modified.
3882     self.needed_locks = {
3883       locking.LEVEL_NODE: locking.ALL_SET,
3884       locking.LEVEL_INSTANCE: locking.ALL_SET,
3885       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3886     }
3887     self.share_locks = {
3888         locking.LEVEL_NODE: 1,
3889         locking.LEVEL_INSTANCE: 1,
3890         locking.LEVEL_NODEGROUP: 1,
3891     }
3892
3893   def BuildHooksEnv(self):
3894     """Build hooks env.
3895
3896     """
3897     return {
3898       "OP_TARGET": self.cfg.GetClusterName(),
3899       "NEW_VG_NAME": self.op.vg_name,
3900       }
3901
3902   def BuildHooksNodes(self):
3903     """Build hooks nodes.
3904
3905     """
3906     mn = self.cfg.GetMasterNode()
3907     return ([mn], [mn])
3908
3909   def CheckPrereq(self):
3910     """Check prerequisites.
3911
3912     This checks whether the given params don't conflict and
3913     if the given volume group is valid.
3914
3915     """
3916     if self.op.vg_name is not None and not self.op.vg_name:
3917       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3918         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3919                                    " instances exist", errors.ECODE_INVAL)
3920
3921     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3922       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3923         raise errors.OpPrereqError("Cannot disable drbd helper while"
3924                                    " drbd-based instances exist",
3925                                    errors.ECODE_INVAL)
3926
3927     node_list = self.owned_locks(locking.LEVEL_NODE)
3928
3929     # if vg_name not None, checks given volume group on all nodes
3930     if self.op.vg_name:
3931       vglist = self.rpc.call_vg_list(node_list)
3932       for node in node_list:
3933         msg = vglist[node].fail_msg
3934         if msg:
3935           # ignoring down node
3936           self.LogWarning("Error while gathering data on node %s"
3937                           " (ignoring node): %s", node, msg)
3938           continue
3939         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3940                                               self.op.vg_name,
3941                                               constants.MIN_VG_SIZE)
3942         if vgstatus:
3943           raise errors.OpPrereqError("Error on node '%s': %s" %
3944                                      (node, vgstatus), errors.ECODE_ENVIRON)
3945
3946     if self.op.drbd_helper:
3947       # checks given drbd helper on all nodes
3948       helpers = self.rpc.call_drbd_helper(node_list)
3949       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3950         if ninfo.offline:
3951           self.LogInfo("Not checking drbd helper on offline node %s", node)
3952           continue
3953         msg = helpers[node].fail_msg
3954         if msg:
3955           raise errors.OpPrereqError("Error checking drbd helper on node"
3956                                      " '%s': %s" % (node, msg),
3957                                      errors.ECODE_ENVIRON)
3958         node_helper = helpers[node].payload
3959         if node_helper != self.op.drbd_helper:
3960           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3961                                      (node, node_helper), errors.ECODE_ENVIRON)
3962
3963     self.cluster = cluster = self.cfg.GetClusterInfo()
3964     # validate params changes
3965     if self.op.beparams:
3966       objects.UpgradeBeParams(self.op.beparams)
3967       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3968       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3969
3970     if self.op.ndparams:
3971       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3972       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3973
3974       # TODO: we need a more general way to handle resetting
3975       # cluster-level parameters to default values
3976       if self.new_ndparams["oob_program"] == "":
3977         self.new_ndparams["oob_program"] = \
3978             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3979
3980     if self.op.hv_state:
3981       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3982                                             self.cluster.hv_state_static)
3983       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3984                                for hv, values in new_hv_state.items())
3985
3986     if self.op.disk_state:
3987       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3988                                                 self.cluster.disk_state_static)
3989       self.new_disk_state = \
3990         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3991                             for name, values in svalues.items()))
3992              for storage, svalues in new_disk_state.items())
3993
3994     if self.op.ipolicy:
3995       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3996                                             group_policy=False)
3997
3998       all_instances = self.cfg.GetAllInstancesInfo().values()
3999       violations = set()
4000       for group in self.cfg.GetAllNodeGroupsInfo().values():
4001         instances = frozenset([inst for inst in all_instances
4002                                if compat.any(node in group.members
4003                                              for node in inst.all_nodes)])
4004         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4005         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4006                                                                    group),
4007                                             new_ipolicy, instances)
4008         if new:
4009           violations.update(new)
4010
4011       if violations:
4012         self.LogWarning("After the ipolicy change the following instances"
4013                         " violate them: %s",
4014                         utils.CommaJoin(utils.NiceSort(violations)))
4015
4016     if self.op.nicparams:
4017       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4018       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4019       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4020       nic_errors = []
4021
4022       # check all instances for consistency
4023       for instance in self.cfg.GetAllInstancesInfo().values():
4024         for nic_idx, nic in enumerate(instance.nics):
4025           params_copy = copy.deepcopy(nic.nicparams)
4026           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4027
4028           # check parameter syntax
4029           try:
4030             objects.NIC.CheckParameterSyntax(params_filled)
4031           except errors.ConfigurationError, err:
4032             nic_errors.append("Instance %s, nic/%d: %s" %
4033                               (instance.name, nic_idx, err))
4034
4035           # if we're moving instances to routed, check that they have an ip
4036           target_mode = params_filled[constants.NIC_MODE]
4037           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4038             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4039                               " address" % (instance.name, nic_idx))
4040       if nic_errors:
4041         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4042                                    "\n".join(nic_errors))
4043
4044     # hypervisor list/parameters
4045     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4046     if self.op.hvparams:
4047       for hv_name, hv_dict in self.op.hvparams.items():
4048         if hv_name not in self.new_hvparams:
4049           self.new_hvparams[hv_name] = hv_dict
4050         else:
4051           self.new_hvparams[hv_name].update(hv_dict)
4052
4053     # disk template parameters
4054     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4055     if self.op.diskparams:
4056       for dt_name, dt_params in self.op.diskparams.items():
4057         if dt_name not in self.op.diskparams:
4058           self.new_diskparams[dt_name] = dt_params
4059         else:
4060           self.new_diskparams[dt_name].update(dt_params)
4061
4062     # os hypervisor parameters
4063     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4064     if self.op.os_hvp:
4065       for os_name, hvs in self.op.os_hvp.items():
4066         if os_name not in self.new_os_hvp:
4067           self.new_os_hvp[os_name] = hvs
4068         else:
4069           for hv_name, hv_dict in hvs.items():
4070             if hv_name not in self.new_os_hvp[os_name]:
4071               self.new_os_hvp[os_name][hv_name] = hv_dict
4072             else:
4073               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4074
4075     # os parameters
4076     self.new_osp = objects.FillDict(cluster.osparams, {})
4077     if self.op.osparams:
4078       for os_name, osp in self.op.osparams.items():
4079         if os_name not in self.new_osp:
4080           self.new_osp[os_name] = {}
4081
4082         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4083                                                   use_none=True)
4084
4085         if not self.new_osp[os_name]:
4086           # we removed all parameters
4087           del self.new_osp[os_name]
4088         else:
4089           # check the parameter validity (remote check)
4090           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4091                          os_name, self.new_osp[os_name])
4092
4093     # changes to the hypervisor list
4094     if self.op.enabled_hypervisors is not None:
4095       self.hv_list = self.op.enabled_hypervisors
4096       for hv in self.hv_list:
4097         # if the hypervisor doesn't already exist in the cluster
4098         # hvparams, we initialize it to empty, and then (in both
4099         # cases) we make sure to fill the defaults, as we might not
4100         # have a complete defaults list if the hypervisor wasn't
4101         # enabled before
4102         if hv not in new_hvp:
4103           new_hvp[hv] = {}
4104         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4105         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4106     else:
4107       self.hv_list = cluster.enabled_hypervisors
4108
4109     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4110       # either the enabled list has changed, or the parameters have, validate
4111       for hv_name, hv_params in self.new_hvparams.items():
4112         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4113             (self.op.enabled_hypervisors and
4114              hv_name in self.op.enabled_hypervisors)):
4115           # either this is a new hypervisor, or its parameters have changed
4116           hv_class = hypervisor.GetHypervisor(hv_name)
4117           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4118           hv_class.CheckParameterSyntax(hv_params)
4119           _CheckHVParams(self, node_list, hv_name, hv_params)
4120
4121     if self.op.os_hvp:
4122       # no need to check any newly-enabled hypervisors, since the
4123       # defaults have already been checked in the above code-block
4124       for os_name, os_hvp in self.new_os_hvp.items():
4125         for hv_name, hv_params in os_hvp.items():
4126           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4127           # we need to fill in the new os_hvp on top of the actual hv_p
4128           cluster_defaults = self.new_hvparams.get(hv_name, {})
4129           new_osp = objects.FillDict(cluster_defaults, hv_params)
4130           hv_class = hypervisor.GetHypervisor(hv_name)
4131           hv_class.CheckParameterSyntax(new_osp)
4132           _CheckHVParams(self, node_list, hv_name, new_osp)
4133
4134     if self.op.default_iallocator:
4135       alloc_script = utils.FindFile(self.op.default_iallocator,
4136                                     constants.IALLOCATOR_SEARCH_PATH,
4137                                     os.path.isfile)
4138       if alloc_script is None:
4139         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4140                                    " specified" % self.op.default_iallocator,
4141                                    errors.ECODE_INVAL)
4142
4143   def Exec(self, feedback_fn):
4144     """Change the parameters of the cluster.
4145
4146     """
4147     if self.op.vg_name is not None:
4148       new_volume = self.op.vg_name
4149       if not new_volume:
4150         new_volume = None
4151       if new_volume != self.cfg.GetVGName():
4152         self.cfg.SetVGName(new_volume)
4153       else:
4154         feedback_fn("Cluster LVM configuration already in desired"
4155                     " state, not changing")
4156     if self.op.drbd_helper is not None:
4157       new_helper = self.op.drbd_helper
4158       if not new_helper:
4159         new_helper = None
4160       if new_helper != self.cfg.GetDRBDHelper():
4161         self.cfg.SetDRBDHelper(new_helper)
4162       else:
4163         feedback_fn("Cluster DRBD helper already in desired state,"
4164                     " not changing")
4165     if self.op.hvparams:
4166       self.cluster.hvparams = self.new_hvparams
4167     if self.op.os_hvp:
4168       self.cluster.os_hvp = self.new_os_hvp
4169     if self.op.enabled_hypervisors is not None:
4170       self.cluster.hvparams = self.new_hvparams
4171       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4172     if self.op.beparams:
4173       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4174     if self.op.nicparams:
4175       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4176     if self.op.ipolicy:
4177       self.cluster.ipolicy = self.new_ipolicy
4178     if self.op.osparams:
4179       self.cluster.osparams = self.new_osp
4180     if self.op.ndparams:
4181       self.cluster.ndparams = self.new_ndparams
4182     if self.op.diskparams:
4183       self.cluster.diskparams = self.new_diskparams
4184     if self.op.hv_state:
4185       self.cluster.hv_state_static = self.new_hv_state
4186     if self.op.disk_state:
4187       self.cluster.disk_state_static = self.new_disk_state
4188
4189     if self.op.candidate_pool_size is not None:
4190       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4191       # we need to update the pool size here, otherwise the save will fail
4192       _AdjustCandidatePool(self, [])
4193
4194     if self.op.maintain_node_health is not None:
4195       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4196         feedback_fn("Note: CONFD was disabled at build time, node health"
4197                     " maintenance is not useful (still enabling it)")
4198       self.cluster.maintain_node_health = self.op.maintain_node_health
4199
4200     if self.op.prealloc_wipe_disks is not None:
4201       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4202
4203     if self.op.add_uids is not None:
4204       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4205
4206     if self.op.remove_uids is not None:
4207       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4208
4209     if self.op.uid_pool is not None:
4210       self.cluster.uid_pool = self.op.uid_pool
4211
4212     if self.op.default_iallocator is not None:
4213       self.cluster.default_iallocator = self.op.default_iallocator
4214
4215     if self.op.reserved_lvs is not None:
4216       self.cluster.reserved_lvs = self.op.reserved_lvs
4217
4218     if self.op.use_external_mip_script is not None:
4219       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4220
4221     def helper_os(aname, mods, desc):
4222       desc += " OS list"
4223       lst = getattr(self.cluster, aname)
4224       for key, val in mods:
4225         if key == constants.DDM_ADD:
4226           if val in lst:
4227             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4228           else:
4229             lst.append(val)
4230         elif key == constants.DDM_REMOVE:
4231           if val in lst:
4232             lst.remove(val)
4233           else:
4234             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4235         else:
4236           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4237
4238     if self.op.hidden_os:
4239       helper_os("hidden_os", self.op.hidden_os, "hidden")
4240
4241     if self.op.blacklisted_os:
4242       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4243
4244     if self.op.master_netdev:
4245       master_params = self.cfg.GetMasterNetworkParameters()
4246       ems = self.cfg.GetUseExternalMipScript()
4247       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4248                   self.cluster.master_netdev)
4249       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4250                                                        master_params, ems)
4251       result.Raise("Could not disable the master ip")
4252       feedback_fn("Changing master_netdev from %s to %s" %
4253                   (master_params.netdev, self.op.master_netdev))
4254       self.cluster.master_netdev = self.op.master_netdev
4255
4256     if self.op.master_netmask:
4257       master_params = self.cfg.GetMasterNetworkParameters()
4258       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4259       result = self.rpc.call_node_change_master_netmask(master_params.name,
4260                                                         master_params.netmask,
4261                                                         self.op.master_netmask,
4262                                                         master_params.ip,
4263                                                         master_params.netdev)
4264       if result.fail_msg:
4265         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4266         feedback_fn(msg)
4267
4268       self.cluster.master_netmask = self.op.master_netmask
4269
4270     self.cfg.Update(self.cluster, feedback_fn)
4271
4272     if self.op.master_netdev:
4273       master_params = self.cfg.GetMasterNetworkParameters()
4274       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4275                   self.op.master_netdev)
4276       ems = self.cfg.GetUseExternalMipScript()
4277       result = self.rpc.call_node_activate_master_ip(master_params.name,
4278                                                      master_params, ems)
4279       if result.fail_msg:
4280         self.LogWarning("Could not re-enable the master ip on"
4281                         " the master, please restart manually: %s",
4282                         result.fail_msg)
4283
4284
4285 def _UploadHelper(lu, nodes, fname):
4286   """Helper for uploading a file and showing warnings.
4287
4288   """
4289   if os.path.exists(fname):
4290     result = lu.rpc.call_upload_file(nodes, fname)
4291     for to_node, to_result in result.items():
4292       msg = to_result.fail_msg
4293       if msg:
4294         msg = ("Copy of file %s to node %s failed: %s" %
4295                (fname, to_node, msg))
4296         lu.proc.LogWarning(msg)
4297
4298
4299 def _ComputeAncillaryFiles(cluster, redist):
4300   """Compute files external to Ganeti which need to be consistent.
4301
4302   @type redist: boolean
4303   @param redist: Whether to include files which need to be redistributed
4304
4305   """
4306   # Compute files for all nodes
4307   files_all = set([
4308     constants.SSH_KNOWN_HOSTS_FILE,
4309     constants.CONFD_HMAC_KEY,
4310     constants.CLUSTER_DOMAIN_SECRET_FILE,
4311     constants.SPICE_CERT_FILE,
4312     constants.SPICE_CACERT_FILE,
4313     constants.RAPI_USERS_FILE,
4314     ])
4315
4316   if not redist:
4317     files_all.update(constants.ALL_CERT_FILES)
4318     files_all.update(ssconf.SimpleStore().GetFileList())
4319   else:
4320     # we need to ship at least the RAPI certificate
4321     files_all.add(constants.RAPI_CERT_FILE)
4322
4323   if cluster.modify_etc_hosts:
4324     files_all.add(constants.ETC_HOSTS)
4325
4326   if cluster.use_external_mip_script:
4327     files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4328
4329   # Files which are optional, these must:
4330   # - be present in one other category as well
4331   # - either exist or not exist on all nodes of that category (mc, vm all)
4332   files_opt = set([
4333     constants.RAPI_USERS_FILE,
4334     ])
4335
4336   # Files which should only be on master candidates
4337   files_mc = set()
4338
4339   if not redist:
4340     files_mc.add(constants.CLUSTER_CONF_FILE)
4341
4342   # Files which should only be on VM-capable nodes
4343   files_vm = set(filename
4344     for hv_name in cluster.enabled_hypervisors
4345     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4346
4347   files_opt |= set(filename
4348     for hv_name in cluster.enabled_hypervisors
4349     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4350
4351   # Filenames in each category must be unique
4352   all_files_set = files_all | files_mc | files_vm
4353   assert (len(all_files_set) ==
4354           sum(map(len, [files_all, files_mc, files_vm]))), \
4355          "Found file listed in more than one file list"
4356
4357   # Optional files must be present in one other category
4358   assert all_files_set.issuperset(files_opt), \
4359          "Optional file not in a different required list"
4360
4361   return (files_all, files_opt, files_mc, files_vm)
4362
4363
4364 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4365   """Distribute additional files which are part of the cluster configuration.
4366
4367   ConfigWriter takes care of distributing the config and ssconf files, but
4368   there are more files which should be distributed to all nodes. This function
4369   makes sure those are copied.
4370
4371   @param lu: calling logical unit
4372   @param additional_nodes: list of nodes not in the config to distribute to
4373   @type additional_vm: boolean
4374   @param additional_vm: whether the additional nodes are vm-capable or not
4375
4376   """
4377   # Gather target nodes
4378   cluster = lu.cfg.GetClusterInfo()
4379   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4380
4381   online_nodes = lu.cfg.GetOnlineNodeList()
4382   online_set = frozenset(online_nodes)
4383   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4384
4385   if additional_nodes is not None:
4386     online_nodes.extend(additional_nodes)
4387     if additional_vm:
4388       vm_nodes.extend(additional_nodes)
4389
4390   # Never distribute to master node
4391   for nodelist in [online_nodes, vm_nodes]:
4392     if master_info.name in nodelist:
4393       nodelist.remove(master_info.name)
4394
4395   # Gather file lists
4396   (files_all, _, files_mc, files_vm) = \
4397     _ComputeAncillaryFiles(cluster, True)
4398
4399   # Never re-distribute configuration file from here
4400   assert not (constants.CLUSTER_CONF_FILE in files_all or
4401               constants.CLUSTER_CONF_FILE in files_vm)
4402   assert not files_mc, "Master candidates not handled in this function"
4403
4404   filemap = [
4405     (online_nodes, files_all),
4406     (vm_nodes, files_vm),
4407     ]
4408
4409   # Upload the files
4410   for (node_list, files) in filemap:
4411     for fname in files:
4412       _UploadHelper(lu, node_list, fname)
4413
4414
4415 class LUClusterRedistConf(NoHooksLU):
4416   """Force the redistribution of cluster configuration.
4417
4418   This is a very simple LU.
4419
4420   """
4421   REQ_BGL = False
4422
4423   def ExpandNames(self):
4424     self.needed_locks = {
4425       locking.LEVEL_NODE: locking.ALL_SET,
4426     }
4427     self.share_locks[locking.LEVEL_NODE] = 1
4428
4429   def Exec(self, feedback_fn):
4430     """Redistribute the configuration.
4431
4432     """
4433     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4434     _RedistributeAncillaryFiles(self)
4435
4436
4437 class LUClusterActivateMasterIp(NoHooksLU):
4438   """Activate the master IP on the master node.
4439
4440   """
4441   def Exec(self, feedback_fn):
4442     """Activate the master IP.
4443
4444     """
4445     master_params = self.cfg.GetMasterNetworkParameters()
4446     ems = self.cfg.GetUseExternalMipScript()
4447     result = self.rpc.call_node_activate_master_ip(master_params.name,
4448                                                    master_params, ems)
4449     result.Raise("Could not activate the master IP")
4450
4451
4452 class LUClusterDeactivateMasterIp(NoHooksLU):
4453   """Deactivate the master IP on the master node.
4454
4455   """
4456   def Exec(self, feedback_fn):
4457     """Deactivate the master IP.
4458
4459     """
4460     master_params = self.cfg.GetMasterNetworkParameters()
4461     ems = self.cfg.GetUseExternalMipScript()
4462     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4463                                                      master_params, ems)
4464     result.Raise("Could not deactivate the master IP")
4465
4466
4467 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4468   """Sleep and poll for an instance's disk to sync.
4469
4470   """
4471   if not instance.disks or disks is not None and not disks:
4472     return True
4473
4474   disks = _ExpandCheckDisks(instance, disks)
4475
4476   if not oneshot:
4477     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4478
4479   node = instance.primary_node
4480
4481   for dev in disks:
4482     lu.cfg.SetDiskID(dev, node)
4483
4484   # TODO: Convert to utils.Retry
4485
4486   retries = 0
4487   degr_retries = 10 # in seconds, as we sleep 1 second each time
4488   while True:
4489     max_time = 0
4490     done = True
4491     cumul_degraded = False
4492     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4493     msg = rstats.fail_msg
4494     if msg:
4495       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4496       retries += 1
4497       if retries >= 10:
4498         raise errors.RemoteError("Can't contact node %s for mirror data,"
4499                                  " aborting." % node)
4500       time.sleep(6)
4501       continue
4502     rstats = rstats.payload
4503     retries = 0
4504     for i, mstat in enumerate(rstats):
4505       if mstat is None:
4506         lu.LogWarning("Can't compute data for node %s/%s",
4507                            node, disks[i].iv_name)
4508         continue
4509
4510       cumul_degraded = (cumul_degraded or
4511                         (mstat.is_degraded and mstat.sync_percent is None))
4512       if mstat.sync_percent is not None:
4513         done = False
4514         if mstat.estimated_time is not None:
4515           rem_time = ("%s remaining (estimated)" %
4516                       utils.FormatSeconds(mstat.estimated_time))
4517           max_time = mstat.estimated_time
4518         else:
4519           rem_time = "no time estimate"
4520         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4521                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4522
4523     # if we're done but degraded, let's do a few small retries, to
4524     # make sure we see a stable and not transient situation; therefore
4525     # we force restart of the loop
4526     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4527       logging.info("Degraded disks found, %d retries left", degr_retries)
4528       degr_retries -= 1
4529       time.sleep(1)
4530       continue
4531
4532     if done or oneshot:
4533       break
4534
4535     time.sleep(min(60, max_time))
4536
4537   if done:
4538     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4539   return not cumul_degraded
4540
4541
4542 def _BlockdevFind(lu, node, dev, instance):
4543   """Wrapper around call_blockdev_find to annotate diskparams.
4544
4545   @param lu: A reference to the lu object
4546   @param node: The node to call out
4547   @param dev: The device to find
4548   @param instance: The instance object the device belongs to
4549   @returns The result of the rpc call
4550
4551   """
4552   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4553   return lu.rpc.call_blockdev_find(node, disk)
4554
4555
4556 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4557   """Wrapper around L{_CheckDiskConsistencyInner}.
4558
4559   """
4560   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4561   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4562                                     ldisk=ldisk)
4563
4564
4565 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4566                                ldisk=False):
4567   """Check that mirrors are not degraded.
4568
4569   @attention: The device has to be annotated already.
4570
4571   The ldisk parameter, if True, will change the test from the
4572   is_degraded attribute (which represents overall non-ok status for
4573   the device(s)) to the ldisk (representing the local storage status).
4574
4575   """
4576   lu.cfg.SetDiskID(dev, node)
4577
4578   result = True
4579
4580   if on_primary or dev.AssembleOnSecondary():
4581     rstats = lu.rpc.call_blockdev_find(node, dev)
4582     msg = rstats.fail_msg
4583     if msg:
4584       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4585       result = False
4586     elif not rstats.payload:
4587       lu.LogWarning("Can't find disk on node %s", node)
4588       result = False
4589     else:
4590       if ldisk:
4591         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4592       else:
4593         result = result and not rstats.payload.is_degraded
4594
4595   if dev.children:
4596     for child in dev.children:
4597       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4598                                                      on_primary)
4599
4600   return result
4601
4602
4603 class LUOobCommand(NoHooksLU):
4604   """Logical unit for OOB handling.
4605
4606   """
4607   REQ_BGL = False
4608   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4609
4610   def ExpandNames(self):
4611     """Gather locks we need.
4612
4613     """
4614     if self.op.node_names:
4615       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4616       lock_names = self.op.node_names
4617     else:
4618       lock_names = locking.ALL_SET
4619
4620     self.needed_locks = {
4621       locking.LEVEL_NODE: lock_names,
4622       }
4623
4624   def CheckPrereq(self):
4625     """Check prerequisites.
4626
4627     This checks:
4628      - the node exists in the configuration
4629      - OOB is supported
4630
4631     Any errors are signaled by raising errors.OpPrereqError.
4632
4633     """
4634     self.nodes = []
4635     self.master_node = self.cfg.GetMasterNode()
4636
4637     assert self.op.power_delay >= 0.0
4638
4639     if self.op.node_names:
4640       if (self.op.command in self._SKIP_MASTER and
4641           self.master_node in self.op.node_names):
4642         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4643         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4644
4645         if master_oob_handler:
4646           additional_text = ("run '%s %s %s' if you want to operate on the"
4647                              " master regardless") % (master_oob_handler,
4648                                                       self.op.command,
4649                                                       self.master_node)
4650         else:
4651           additional_text = "it does not support out-of-band operations"
4652
4653         raise errors.OpPrereqError(("Operating on the master node %s is not"
4654                                     " allowed for %s; %s") %
4655                                    (self.master_node, self.op.command,
4656                                     additional_text), errors.ECODE_INVAL)
4657     else:
4658       self.op.node_names = self.cfg.GetNodeList()
4659       if self.op.command in self._SKIP_MASTER:
4660         self.op.node_names.remove(self.master_node)
4661
4662     if self.op.command in self._SKIP_MASTER:
4663       assert self.master_node not in self.op.node_names
4664
4665     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4666       if node is None:
4667         raise errors.OpPrereqError("Node %s not found" % node_name,
4668                                    errors.ECODE_NOENT)
4669       else:
4670         self.nodes.append(node)
4671
4672       if (not self.op.ignore_status and
4673           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4674         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4675                                     " not marked offline") % node_name,
4676                                    errors.ECODE_STATE)
4677
4678   def Exec(self, feedback_fn):
4679     """Execute OOB and return result if we expect any.
4680
4681     """
4682     master_node = self.master_node
4683     ret = []
4684
4685     for idx, node in enumerate(utils.NiceSort(self.nodes,
4686                                               key=lambda node: node.name)):
4687       node_entry = [(constants.RS_NORMAL, node.name)]
4688       ret.append(node_entry)
4689
4690       oob_program = _SupportsOob(self.cfg, node)
4691
4692       if not oob_program:
4693         node_entry.append((constants.RS_UNAVAIL, None))
4694         continue
4695
4696       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4697                    self.op.command, oob_program, node.name)
4698       result = self.rpc.call_run_oob(master_node, oob_program,
4699                                      self.op.command, node.name,
4700                                      self.op.timeout)
4701
4702       if result.fail_msg:
4703         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4704                         node.name, result.fail_msg)
4705         node_entry.append((constants.RS_NODATA, None))
4706       else:
4707         try:
4708           self._CheckPayload(result)
4709         except errors.OpExecError, err:
4710           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4711                           node.name, err)
4712           node_entry.append((constants.RS_NODATA, None))
4713         else:
4714           if self.op.command == constants.OOB_HEALTH:
4715             # For health we should log important events
4716             for item, status in result.payload:
4717               if status in [constants.OOB_STATUS_WARNING,
4718                             constants.OOB_STATUS_CRITICAL]:
4719                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4720                                 item, node.name, status)
4721
4722           if self.op.command == constants.OOB_POWER_ON:
4723             node.powered = True
4724           elif self.op.command == constants.OOB_POWER_OFF:
4725             node.powered = False
4726           elif self.op.command == constants.OOB_POWER_STATUS:
4727             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4728             if powered != node.powered:
4729               logging.warning(("Recorded power state (%s) of node '%s' does not"
4730                                " match actual power state (%s)"), node.powered,
4731                               node.name, powered)
4732
4733           # For configuration changing commands we should update the node
4734           if self.op.command in (constants.OOB_POWER_ON,
4735                                  constants.OOB_POWER_OFF):
4736             self.cfg.Update(node, feedback_fn)
4737
4738           node_entry.append((constants.RS_NORMAL, result.payload))
4739
4740           if (self.op.command == constants.OOB_POWER_ON and
4741               idx < len(self.nodes) - 1):
4742             time.sleep(self.op.power_delay)
4743
4744     return ret
4745
4746   def _CheckPayload(self, result):
4747     """Checks if the payload is valid.
4748
4749     @param result: RPC result
4750     @raises errors.OpExecError: If payload is not valid
4751
4752     """
4753     errs = []
4754     if self.op.command == constants.OOB_HEALTH:
4755       if not isinstance(result.payload, list):
4756         errs.append("command 'health' is expected to return a list but got %s" %
4757                     type(result.payload))
4758       else:
4759         for item, status in result.payload:
4760           if status not in constants.OOB_STATUSES:
4761             errs.append("health item '%s' has invalid status '%s'" %
4762                         (item, status))
4763
4764     if self.op.command == constants.OOB_POWER_STATUS:
4765       if not isinstance(result.payload, dict):
4766         errs.append("power-status is expected to return a dict but got %s" %
4767                     type(result.payload))
4768
4769     if self.op.command in [
4770         constants.OOB_POWER_ON,
4771         constants.OOB_POWER_OFF,
4772         constants.OOB_POWER_CYCLE,
4773         ]:
4774       if result.payload is not None:
4775         errs.append("%s is expected to not return payload but got '%s'" %
4776                     (self.op.command, result.payload))
4777
4778     if errs:
4779       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4780                                utils.CommaJoin(errs))
4781
4782
4783 class _OsQuery(_QueryBase):
4784   FIELDS = query.OS_FIELDS
4785
4786   def ExpandNames(self, lu):
4787     # Lock all nodes in shared mode
4788     # Temporary removal of locks, should be reverted later
4789     # TODO: reintroduce locks when they are lighter-weight
4790     lu.needed_locks = {}
4791     #self.share_locks[locking.LEVEL_NODE] = 1
4792     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4793
4794     # The following variables interact with _QueryBase._GetNames
4795     if self.names:
4796       self.wanted = self.names
4797     else:
4798       self.wanted = locking.ALL_SET
4799
4800     self.do_locking = self.use_locking
4801
4802   def DeclareLocks(self, lu, level):
4803     pass
4804
4805   @staticmethod
4806   def _DiagnoseByOS(rlist):
4807     """Remaps a per-node return list into an a per-os per-node dictionary
4808
4809     @param rlist: a map with node names as keys and OS objects as values
4810
4811     @rtype: dict
4812     @return: a dictionary with osnames as keys and as value another
4813         map, with nodes as keys and tuples of (path, status, diagnose,
4814         variants, parameters, api_versions) as values, eg::
4815
4816           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4817                                      (/srv/..., False, "invalid api")],
4818                            "node2": [(/srv/..., True, "", [], [])]}
4819           }
4820
4821     """
4822     all_os = {}
4823     # we build here the list of nodes that didn't fail the RPC (at RPC
4824     # level), so that nodes with a non-responding node daemon don't
4825     # make all OSes invalid
4826     good_nodes = [node_name for node_name in rlist
4827                   if not rlist[node_name].fail_msg]
4828     for node_name, nr in rlist.items():
4829       if nr.fail_msg or not nr.payload:
4830         continue
4831       for (name, path, status, diagnose, variants,
4832            params, api_versions) in nr.payload:
4833         if name not in all_os:
4834           # build a list of nodes for this os containing empty lists
4835           # for each node in node_list
4836           all_os[name] = {}
4837           for nname in good_nodes:
4838             all_os[name][nname] = []
4839         # convert params from [name, help] to (name, help)
4840         params = [tuple(v) for v in params]
4841         all_os[name][node_name].append((path, status, diagnose,
4842                                         variants, params, api_versions))
4843     return all_os
4844
4845   def _GetQueryData(self, lu):
4846     """Computes the list of nodes and their attributes.
4847
4848     """
4849     # Locking is not used
4850     assert not (compat.any(lu.glm.is_owned(level)
4851                            for level in locking.LEVELS
4852                            if level != locking.LEVEL_CLUSTER) or
4853                 self.do_locking or self.use_locking)
4854
4855     valid_nodes = [node.name
4856                    for node in lu.cfg.GetAllNodesInfo().values()
4857                    if not node.offline and node.vm_capable]
4858     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4859     cluster = lu.cfg.GetClusterInfo()
4860
4861     data = {}
4862
4863     for (os_name, os_data) in pol.items():
4864       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4865                           hidden=(os_name in cluster.hidden_os),
4866                           blacklisted=(os_name in cluster.blacklisted_os))
4867
4868       variants = set()
4869       parameters = set()
4870       api_versions = set()
4871
4872       for idx, osl in enumerate(os_data.values()):
4873         info.valid = bool(info.valid and osl and osl[0][1])
4874         if not info.valid:
4875           break
4876
4877         (node_variants, node_params, node_api) = osl[0][3:6]
4878         if idx == 0:
4879           # First entry
4880           variants.update(node_variants)
4881           parameters.update(node_params)
4882           api_versions.update(node_api)
4883         else:
4884           # Filter out inconsistent values
4885           variants.intersection_update(node_variants)
4886           parameters.intersection_update(node_params)
4887           api_versions.intersection_update(node_api)
4888
4889       info.variants = list(variants)
4890       info.parameters = list(parameters)
4891       info.api_versions = list(api_versions)
4892
4893       data[os_name] = info
4894
4895     # Prepare data in requested order
4896     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4897             if name in data]
4898
4899
4900 class LUOsDiagnose(NoHooksLU):
4901   """Logical unit for OS diagnose/query.
4902
4903   """
4904   REQ_BGL = False
4905
4906   @staticmethod
4907   def _BuildFilter(fields, names):
4908     """Builds a filter for querying OSes.
4909
4910     """
4911     name_filter = qlang.MakeSimpleFilter("name", names)
4912
4913     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4914     # respective field is not requested
4915     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4916                      for fname in ["hidden", "blacklisted"]
4917                      if fname not in fields]
4918     if "valid" not in fields:
4919       status_filter.append([qlang.OP_TRUE, "valid"])
4920
4921     if status_filter:
4922       status_filter.insert(0, qlang.OP_AND)
4923     else:
4924       status_filter = None
4925
4926     if name_filter and status_filter:
4927       return [qlang.OP_AND, name_filter, status_filter]
4928     elif name_filter:
4929       return name_filter
4930     else:
4931       return status_filter
4932
4933   def CheckArguments(self):
4934     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4935                        self.op.output_fields, False)
4936
4937   def ExpandNames(self):
4938     self.oq.ExpandNames(self)
4939
4940   def Exec(self, feedback_fn):
4941     return self.oq.OldStyleQuery(self)
4942
4943
4944 class LUNodeRemove(LogicalUnit):
4945   """Logical unit for removing a node.
4946
4947   """
4948   HPATH = "node-remove"
4949   HTYPE = constants.HTYPE_NODE
4950
4951   def BuildHooksEnv(self):
4952     """Build hooks env.
4953
4954     """
4955     return {
4956       "OP_TARGET": self.op.node_name,
4957       "NODE_NAME": self.op.node_name,
4958       }
4959
4960   def BuildHooksNodes(self):
4961     """Build hooks nodes.
4962
4963     This doesn't run on the target node in the pre phase as a failed
4964     node would then be impossible to remove.
4965
4966     """
4967     all_nodes = self.cfg.GetNodeList()
4968     try:
4969       all_nodes.remove(self.op.node_name)
4970     except ValueError:
4971       pass
4972     return (all_nodes, all_nodes)
4973
4974   def CheckPrereq(self):
4975     """Check prerequisites.
4976
4977     This checks:
4978      - the node exists in the configuration
4979      - it does not have primary or secondary instances
4980      - it's not the master
4981
4982     Any errors are signaled by raising errors.OpPrereqError.
4983
4984     """
4985     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4986     node = self.cfg.GetNodeInfo(self.op.node_name)
4987     assert node is not None
4988
4989     masternode = self.cfg.GetMasterNode()
4990     if node.name == masternode:
4991       raise errors.OpPrereqError("Node is the master node, failover to another"
4992                                  " node is required", errors.ECODE_INVAL)
4993
4994     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4995       if node.name in instance.all_nodes:
4996         raise errors.OpPrereqError("Instance %s is still running on the node,"
4997                                    " please remove first" % instance_name,
4998                                    errors.ECODE_INVAL)
4999     self.op.node_name = node.name
5000     self.node = node
5001
5002   def Exec(self, feedback_fn):
5003     """Removes the node from the cluster.
5004
5005     """
5006     node = self.node
5007     logging.info("Stopping the node daemon and removing configs from node %s",
5008                  node.name)
5009
5010     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5011
5012     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5013       "Not owning BGL"
5014
5015     # Promote nodes to master candidate as needed
5016     _AdjustCandidatePool(self, exceptions=[node.name])
5017     self.context.RemoveNode(node.name)
5018
5019     # Run post hooks on the node before it's removed
5020     _RunPostHook(self, node.name)
5021
5022     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5023     msg = result.fail_msg
5024     if msg:
5025       self.LogWarning("Errors encountered on the remote node while leaving"
5026                       " the cluster: %s", msg)
5027
5028     # Remove node from our /etc/hosts
5029     if self.cfg.GetClusterInfo().modify_etc_hosts:
5030       master_node = self.cfg.GetMasterNode()
5031       result = self.rpc.call_etc_hosts_modify(master_node,
5032                                               constants.ETC_HOSTS_REMOVE,
5033                                               node.name, None)
5034       result.Raise("Can't update hosts file with new host data")
5035       _RedistributeAncillaryFiles(self)
5036
5037
5038 class _NodeQuery(_QueryBase):
5039   FIELDS = query.NODE_FIELDS
5040
5041   def ExpandNames(self, lu):
5042     lu.needed_locks = {}
5043     lu.share_locks = _ShareAll()
5044
5045     if self.names:
5046       self.wanted = _GetWantedNodes(lu, self.names)
5047     else:
5048       self.wanted = locking.ALL_SET
5049
5050     self.do_locking = (self.use_locking and
5051                        query.NQ_LIVE in self.requested_data)
5052
5053     if self.do_locking:
5054       # If any non-static field is requested we need to lock the nodes
5055       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5056
5057   def DeclareLocks(self, lu, level):
5058     pass
5059
5060   def _GetQueryData(self, lu):
5061     """Computes the list of nodes and their attributes.
5062
5063     """
5064     all_info = lu.cfg.GetAllNodesInfo()
5065
5066     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5067
5068     # Gather data as requested
5069     if query.NQ_LIVE in self.requested_data:
5070       # filter out non-vm_capable nodes
5071       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5072
5073       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5074                                         [lu.cfg.GetHypervisorType()])
5075       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5076                        for (name, nresult) in node_data.items()
5077                        if not nresult.fail_msg and nresult.payload)
5078     else:
5079       live_data = None
5080
5081     if query.NQ_INST in self.requested_data:
5082       node_to_primary = dict([(name, set()) for name in nodenames])
5083       node_to_secondary = dict([(name, set()) for name in nodenames])
5084
5085       inst_data = lu.cfg.GetAllInstancesInfo()
5086
5087       for inst in inst_data.values():
5088         if inst.primary_node in node_to_primary:
5089           node_to_primary[inst.primary_node].add(inst.name)
5090         for secnode in inst.secondary_nodes:
5091           if secnode in node_to_secondary:
5092             node_to_secondary[secnode].add(inst.name)
5093     else:
5094       node_to_primary = None
5095       node_to_secondary = None
5096
5097     if query.NQ_OOB in self.requested_data:
5098       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5099                          for name, node in all_info.iteritems())
5100     else:
5101       oob_support = None
5102
5103     if query.NQ_GROUP in self.requested_data:
5104       groups = lu.cfg.GetAllNodeGroupsInfo()
5105     else:
5106       groups = {}
5107
5108     return query.NodeQueryData([all_info[name] for name in nodenames],
5109                                live_data, lu.cfg.GetMasterNode(),
5110                                node_to_primary, node_to_secondary, groups,
5111                                oob_support, lu.cfg.GetClusterInfo())
5112
5113
5114 class LUNodeQuery(NoHooksLU):
5115   """Logical unit for querying nodes.
5116
5117   """
5118   # pylint: disable=W0142
5119   REQ_BGL = False
5120
5121   def CheckArguments(self):
5122     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5123                          self.op.output_fields, self.op.use_locking)
5124
5125   def ExpandNames(self):
5126     self.nq.ExpandNames(self)
5127
5128   def DeclareLocks(self, level):
5129     self.nq.DeclareLocks(self, level)
5130
5131   def Exec(self, feedback_fn):
5132     return self.nq.OldStyleQuery(self)
5133
5134
5135 class LUNodeQueryvols(NoHooksLU):
5136   """Logical unit for getting volumes on node(s).
5137
5138   """
5139   REQ_BGL = False
5140   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5141   _FIELDS_STATIC = utils.FieldSet("node")
5142
5143   def CheckArguments(self):
5144     _CheckOutputFields(static=self._FIELDS_STATIC,
5145                        dynamic=self._FIELDS_DYNAMIC,
5146                        selected=self.op.output_fields)
5147
5148   def ExpandNames(self):
5149     self.share_locks = _ShareAll()
5150     self.needed_locks = {}
5151
5152     if not self.op.nodes:
5153       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5154     else:
5155       self.needed_locks[locking.LEVEL_NODE] = \
5156         _GetWantedNodes(self, self.op.nodes)
5157
5158   def Exec(self, feedback_fn):
5159     """Computes the list of nodes and their attributes.
5160
5161     """
5162     nodenames = self.owned_locks(locking.LEVEL_NODE)
5163     volumes = self.rpc.call_node_volumes(nodenames)
5164
5165     ilist = self.cfg.GetAllInstancesInfo()
5166     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5167
5168     output = []
5169     for node in nodenames:
5170       nresult = volumes[node]
5171       if nresult.offline:
5172         continue
5173       msg = nresult.fail_msg
5174       if msg:
5175         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5176         continue
5177
5178       node_vols = sorted(nresult.payload,
5179                          key=operator.itemgetter("dev"))
5180
5181       for vol in node_vols:
5182         node_output = []
5183         for field in self.op.output_fields:
5184           if field == "node":
5185             val = node
5186           elif field == "phys":
5187             val = vol["dev"]
5188           elif field == "vg":
5189             val = vol["vg"]
5190           elif field == "name":
5191             val = vol["name"]
5192           elif field == "size":
5193             val = int(float(vol["size"]))
5194           elif field == "instance":
5195             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5196           else:
5197             raise errors.ParameterError(field)
5198           node_output.append(str(val))
5199
5200         output.append(node_output)
5201
5202     return output
5203
5204
5205 class LUNodeQueryStorage(NoHooksLU):
5206   """Logical unit for getting information on storage units on node(s).
5207
5208   """
5209   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5210   REQ_BGL = False
5211
5212   def CheckArguments(self):
5213     _CheckOutputFields(static=self._FIELDS_STATIC,
5214                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5215                        selected=self.op.output_fields)
5216
5217   def ExpandNames(self):
5218     self.share_locks = _ShareAll()
5219     self.needed_locks = {}
5220
5221     if self.op.nodes:
5222       self.needed_locks[locking.LEVEL_NODE] = \
5223         _GetWantedNodes(self, self.op.nodes)
5224     else:
5225       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5226
5227   def Exec(self, feedback_fn):
5228     """Computes the list of nodes and their attributes.
5229
5230     """
5231     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5232
5233     # Always get name to sort by
5234     if constants.SF_NAME in self.op.output_fields:
5235       fields = self.op.output_fields[:]
5236     else:
5237       fields = [constants.SF_NAME] + self.op.output_fields
5238
5239     # Never ask for node or type as it's only known to the LU
5240     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5241       while extra in fields:
5242         fields.remove(extra)
5243
5244     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5245     name_idx = field_idx[constants.SF_NAME]
5246
5247     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5248     data = self.rpc.call_storage_list(self.nodes,
5249                                       self.op.storage_type, st_args,
5250                                       self.op.name, fields)
5251
5252     result = []
5253
5254     for node in utils.NiceSort(self.nodes):
5255       nresult = data[node]
5256       if nresult.offline:
5257         continue
5258
5259       msg = nresult.fail_msg
5260       if msg:
5261         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5262         continue
5263
5264       rows = dict([(row[name_idx], row) for row in nresult.payload])
5265
5266       for name in utils.NiceSort(rows.keys()):
5267         row = rows[name]
5268
5269         out = []
5270
5271         for field in self.op.output_fields:
5272           if field == constants.SF_NODE:
5273             val = node
5274           elif field == constants.SF_TYPE:
5275             val = self.op.storage_type
5276           elif field in field_idx:
5277             val = row[field_idx[field]]
5278           else:
5279             raise errors.ParameterError(field)
5280
5281           out.append(val)
5282
5283         result.append(out)
5284
5285     return result
5286
5287
5288 class _InstanceQuery(_QueryBase):
5289   FIELDS = query.INSTANCE_FIELDS
5290
5291   def ExpandNames(self, lu):
5292     lu.needed_locks = {}
5293     lu.share_locks = _ShareAll()
5294
5295     if self.names:
5296       self.wanted = _GetWantedInstances(lu, self.names)
5297     else:
5298       self.wanted = locking.ALL_SET
5299
5300     self.do_locking = (self.use_locking and
5301                        query.IQ_LIVE in self.requested_data)
5302     if self.do_locking:
5303       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5304       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5305       lu.needed_locks[locking.LEVEL_NODE] = []
5306       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5307
5308     self.do_grouplocks = (self.do_locking and
5309                           query.IQ_NODES in self.requested_data)
5310
5311   def DeclareLocks(self, lu, level):
5312     if self.do_locking:
5313       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5314         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5315
5316         # Lock all groups used by instances optimistically; this requires going
5317         # via the node before it's locked, requiring verification later on
5318         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5319           set(group_uuid
5320               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5321               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5322       elif level == locking.LEVEL_NODE:
5323         lu._LockInstancesNodes() # pylint: disable=W0212
5324
5325   @staticmethod
5326   def _CheckGroupLocks(lu):
5327     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5328     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5329
5330     # Check if node groups for locked instances are still correct
5331     for instance_name in owned_instances:
5332       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5333
5334   def _GetQueryData(self, lu):
5335     """Computes the list of instances and their attributes.
5336
5337     """
5338     if self.do_grouplocks:
5339       self._CheckGroupLocks(lu)
5340
5341     cluster = lu.cfg.GetClusterInfo()
5342     all_info = lu.cfg.GetAllInstancesInfo()
5343
5344     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5345
5346     instance_list = [all_info[name] for name in instance_names]
5347     nodes = frozenset(itertools.chain(*(inst.all_nodes
5348                                         for inst in instance_list)))
5349     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5350     bad_nodes = []
5351     offline_nodes = []
5352     wrongnode_inst = set()
5353
5354     # Gather data as requested
5355     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5356       live_data = {}
5357       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5358       for name in nodes:
5359         result = node_data[name]
5360         if result.offline:
5361           # offline nodes will be in both lists
5362           assert result.fail_msg
5363           offline_nodes.append(name)
5364         if result.fail_msg:
5365           bad_nodes.append(name)
5366         elif result.payload:
5367           for inst in result.payload:
5368             if inst in all_info:
5369               if all_info[inst].primary_node == name:
5370                 live_data.update(result.payload)
5371               else:
5372                 wrongnode_inst.add(inst)
5373             else:
5374               # orphan instance; we don't list it here as we don't
5375               # handle this case yet in the output of instance listing
5376               logging.warning("Orphan instance '%s' found on node %s",
5377                               inst, name)
5378         # else no instance is alive
5379     else:
5380       live_data = {}
5381
5382     if query.IQ_DISKUSAGE in self.requested_data:
5383       disk_usage = dict((inst.name,
5384                          _ComputeDiskSize(inst.disk_template,
5385                                           [{constants.IDISK_SIZE: disk.size}
5386                                            for disk in inst.disks]))
5387                         for inst in instance_list)
5388     else:
5389       disk_usage = None
5390
5391     if query.IQ_CONSOLE in self.requested_data:
5392       consinfo = {}
5393       for inst in instance_list:
5394         if inst.name in live_data:
5395           # Instance is running
5396           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5397         else:
5398           consinfo[inst.name] = None
5399       assert set(consinfo.keys()) == set(instance_names)
5400     else:
5401       consinfo = None
5402
5403     if query.IQ_NODES in self.requested_data:
5404       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5405                                             instance_list)))
5406       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5407       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5408                     for uuid in set(map(operator.attrgetter("group"),
5409                                         nodes.values())))
5410     else:
5411       nodes = None
5412       groups = None
5413
5414     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5415                                    disk_usage, offline_nodes, bad_nodes,
5416                                    live_data, wrongnode_inst, consinfo,
5417                                    nodes, groups)
5418
5419
5420 class LUQuery(NoHooksLU):
5421   """Query for resources/items of a certain kind.
5422
5423   """
5424   # pylint: disable=W0142
5425   REQ_BGL = False
5426
5427   def CheckArguments(self):
5428     qcls = _GetQueryImplementation(self.op.what)
5429
5430     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5431
5432   def ExpandNames(self):
5433     self.impl.ExpandNames(self)
5434
5435   def DeclareLocks(self, level):
5436     self.impl.DeclareLocks(self, level)
5437
5438   def Exec(self, feedback_fn):
5439     return self.impl.NewStyleQuery(self)
5440
5441
5442 class LUQueryFields(NoHooksLU):
5443   """Query for resources/items of a certain kind.
5444
5445   """
5446   # pylint: disable=W0142
5447   REQ_BGL = False
5448
5449   def CheckArguments(self):
5450     self.qcls = _GetQueryImplementation(self.op.what)
5451
5452   def ExpandNames(self):
5453     self.needed_locks = {}
5454
5455   def Exec(self, feedback_fn):
5456     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5457
5458
5459 class LUNodeModifyStorage(NoHooksLU):
5460   """Logical unit for modifying a storage volume on a node.
5461
5462   """
5463   REQ_BGL = False
5464
5465   def CheckArguments(self):
5466     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5467
5468     storage_type = self.op.storage_type
5469
5470     try:
5471       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5472     except KeyError:
5473       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5474                                  " modified" % storage_type,
5475                                  errors.ECODE_INVAL)
5476
5477     diff = set(self.op.changes.keys()) - modifiable
5478     if diff:
5479       raise errors.OpPrereqError("The following fields can not be modified for"
5480                                  " storage units of type '%s': %r" %
5481                                  (storage_type, list(diff)),
5482                                  errors.ECODE_INVAL)
5483
5484   def ExpandNames(self):
5485     self.needed_locks = {
5486       locking.LEVEL_NODE: self.op.node_name,
5487       }
5488
5489   def Exec(self, feedback_fn):
5490     """Computes the list of nodes and their attributes.
5491
5492     """
5493     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5494     result = self.rpc.call_storage_modify(self.op.node_name,
5495                                           self.op.storage_type, st_args,
5496                                           self.op.name, self.op.changes)
5497     result.Raise("Failed to modify storage unit '%s' on %s" %
5498                  (self.op.name, self.op.node_name))
5499
5500
5501 class LUNodeAdd(LogicalUnit):
5502   """Logical unit for adding node to the cluster.
5503
5504   """
5505   HPATH = "node-add"
5506   HTYPE = constants.HTYPE_NODE
5507   _NFLAGS = ["master_capable", "vm_capable"]
5508
5509   def CheckArguments(self):
5510     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5511     # validate/normalize the node name
5512     self.hostname = netutils.GetHostname(name=self.op.node_name,
5513                                          family=self.primary_ip_family)
5514     self.op.node_name = self.hostname.name
5515
5516     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5517       raise errors.OpPrereqError("Cannot readd the master node",
5518                                  errors.ECODE_STATE)
5519
5520     if self.op.readd and self.op.group:
5521       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5522                                  " being readded", errors.ECODE_INVAL)
5523
5524   def BuildHooksEnv(self):
5525     """Build hooks env.
5526
5527     This will run on all nodes before, and on all nodes + the new node after.
5528
5529     """
5530     return {
5531       "OP_TARGET": self.op.node_name,
5532       "NODE_NAME": self.op.node_name,
5533       "NODE_PIP": self.op.primary_ip,
5534       "NODE_SIP": self.op.secondary_ip,
5535       "MASTER_CAPABLE": str(self.op.master_capable),
5536       "VM_CAPABLE": str(self.op.vm_capable),
5537       }
5538
5539   def BuildHooksNodes(self):
5540     """Build hooks nodes.
5541
5542     """
5543     # Exclude added node
5544     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5545     post_nodes = pre_nodes + [self.op.node_name, ]
5546
5547     return (pre_nodes, post_nodes)
5548
5549   def CheckPrereq(self):
5550     """Check prerequisites.
5551
5552     This checks:
5553      - the new node is not already in the config
5554      - it is resolvable
5555      - its parameters (single/dual homed) matches the cluster
5556
5557     Any errors are signaled by raising errors.OpPrereqError.
5558
5559     """
5560     cfg = self.cfg
5561     hostname = self.hostname
5562     node = hostname.name
5563     primary_ip = self.op.primary_ip = hostname.ip
5564     if self.op.secondary_ip is None:
5565       if self.primary_ip_family == netutils.IP6Address.family:
5566         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5567                                    " IPv4 address must be given as secondary",
5568                                    errors.ECODE_INVAL)
5569       self.op.secondary_ip = primary_ip
5570
5571     secondary_ip = self.op.secondary_ip
5572     if not netutils.IP4Address.IsValid(secondary_ip):
5573       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5574                                  " address" % secondary_ip, errors.ECODE_INVAL)
5575
5576     node_list = cfg.GetNodeList()
5577     if not self.op.readd and node in node_list:
5578       raise errors.OpPrereqError("Node %s is already in the configuration" %
5579                                  node, errors.ECODE_EXISTS)
5580     elif self.op.readd and node not in node_list:
5581       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5582                                  errors.ECODE_NOENT)
5583
5584     self.changed_primary_ip = False
5585
5586     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5587       if self.op.readd and node == existing_node_name:
5588         if existing_node.secondary_ip != secondary_ip:
5589           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5590                                      " address configuration as before",
5591                                      errors.ECODE_INVAL)
5592         if existing_node.primary_ip != primary_ip:
5593           self.changed_primary_ip = True
5594
5595         continue
5596
5597       if (existing_node.primary_ip == primary_ip or
5598           existing_node.secondary_ip == primary_ip or
5599           existing_node.primary_ip == secondary_ip or
5600           existing_node.secondary_ip == secondary_ip):
5601         raise errors.OpPrereqError("New node ip address(es) conflict with"
5602                                    " existing node %s" % existing_node.name,
5603                                    errors.ECODE_NOTUNIQUE)
5604
5605     # After this 'if' block, None is no longer a valid value for the
5606     # _capable op attributes
5607     if self.op.readd:
5608       old_node = self.cfg.GetNodeInfo(node)
5609       assert old_node is not None, "Can't retrieve locked node %s" % node
5610       for attr in self._NFLAGS:
5611         if getattr(self.op, attr) is None:
5612           setattr(self.op, attr, getattr(old_node, attr))
5613     else:
5614       for attr in self._NFLAGS:
5615         if getattr(self.op, attr) is None:
5616           setattr(self.op, attr, True)
5617
5618     if self.op.readd and not self.op.vm_capable:
5619       pri, sec = cfg.GetNodeInstances(node)
5620       if pri or sec:
5621         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5622                                    " flag set to false, but it already holds"
5623                                    " instances" % node,
5624                                    errors.ECODE_STATE)
5625
5626     # check that the type of the node (single versus dual homed) is the
5627     # same as for the master
5628     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5629     master_singlehomed = myself.secondary_ip == myself.primary_ip
5630     newbie_singlehomed = secondary_ip == primary_ip
5631     if master_singlehomed != newbie_singlehomed:
5632       if master_singlehomed:
5633         raise errors.OpPrereqError("The master has no secondary ip but the"
5634                                    " new node has one",
5635                                    errors.ECODE_INVAL)
5636       else:
5637         raise errors.OpPrereqError("The master has a secondary ip but the"
5638                                    " new node doesn't have one",
5639                                    errors.ECODE_INVAL)
5640
5641     # checks reachability
5642     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5643       raise errors.OpPrereqError("Node not reachable by ping",
5644                                  errors.ECODE_ENVIRON)
5645
5646     if not newbie_singlehomed:
5647       # check reachability from my secondary ip to newbie's secondary ip
5648       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5649                            source=myself.secondary_ip):
5650         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5651                                    " based ping to node daemon port",
5652                                    errors.ECODE_ENVIRON)
5653
5654     if self.op.readd:
5655       exceptions = [node]
5656     else:
5657       exceptions = []
5658
5659     if self.op.master_capable:
5660       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5661     else:
5662       self.master_candidate = False
5663
5664     if self.op.readd:
5665       self.new_node = old_node
5666     else:
5667       node_group = cfg.LookupNodeGroup(self.op.group)
5668       self.new_node = objects.Node(name=node,
5669                                    primary_ip=primary_ip,
5670                                    secondary_ip=secondary_ip,
5671                                    master_candidate=self.master_candidate,
5672                                    offline=False, drained=False,
5673                                    group=node_group)
5674
5675     if self.op.ndparams:
5676       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5677
5678     if self.op.hv_state:
5679       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5680
5681     if self.op.disk_state:
5682       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5683
5684     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5685     #       it a property on the base class.
5686     result = rpc.DnsOnlyRunner().call_version([node])[node]
5687     result.Raise("Can't get version information from node %s" % node)
5688     if constants.PROTOCOL_VERSION == result.payload:
5689       logging.info("Communication to node %s fine, sw version %s match",
5690                    node, result.payload)
5691     else:
5692       raise errors.OpPrereqError("Version mismatch master version %s,"
5693                                  " node version %s" %
5694                                  (constants.PROTOCOL_VERSION, result.payload),
5695                                  errors.ECODE_ENVIRON)
5696
5697   def Exec(self, feedback_fn):
5698     """Adds the new node to the cluster.
5699
5700     """
5701     new_node = self.new_node
5702     node = new_node.name
5703
5704     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5705       "Not owning BGL"
5706
5707     # We adding a new node so we assume it's powered
5708     new_node.powered = True
5709
5710     # for re-adds, reset the offline/drained/master-candidate flags;
5711     # we need to reset here, otherwise offline would prevent RPC calls
5712     # later in the procedure; this also means that if the re-add
5713     # fails, we are left with a non-offlined, broken node
5714     if self.op.readd:
5715       new_node.drained = new_node.offline = False # pylint: disable=W0201
5716       self.LogInfo("Readding a node, the offline/drained flags were reset")
5717       # if we demote the node, we do cleanup later in the procedure
5718       new_node.master_candidate = self.master_candidate
5719       if self.changed_primary_ip:
5720         new_node.primary_ip = self.op.primary_ip
5721
5722     # copy the master/vm_capable flags
5723     for attr in self._NFLAGS:
5724       setattr(new_node, attr, getattr(self.op, attr))
5725
5726     # notify the user about any possible mc promotion
5727     if new_node.master_candidate:
5728       self.LogInfo("Node will be a master candidate")
5729
5730     if self.op.ndparams:
5731       new_node.ndparams = self.op.ndparams
5732     else:
5733       new_node.ndparams = {}
5734
5735     if self.op.hv_state:
5736       new_node.hv_state_static = self.new_hv_state
5737
5738     if self.op.disk_state:
5739       new_node.disk_state_static = self.new_disk_state
5740
5741     # Add node to our /etc/hosts, and add key to known_hosts
5742     if self.cfg.GetClusterInfo().modify_etc_hosts:
5743       master_node = self.cfg.GetMasterNode()
5744       result = self.rpc.call_etc_hosts_modify(master_node,
5745                                               constants.ETC_HOSTS_ADD,
5746                                               self.hostname.name,
5747                                               self.hostname.ip)
5748       result.Raise("Can't update hosts file with new host data")
5749
5750     if new_node.secondary_ip != new_node.primary_ip:
5751       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5752                                False)
5753
5754     node_verify_list = [self.cfg.GetMasterNode()]
5755     node_verify_param = {
5756       constants.NV_NODELIST: ([node], {}),
5757       # TODO: do a node-net-test as well?
5758     }
5759
5760     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5761                                        self.cfg.GetClusterName())
5762     for verifier in node_verify_list:
5763       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5764       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5765       if nl_payload:
5766         for failed in nl_payload:
5767           feedback_fn("ssh/hostname verification failed"
5768                       " (checking from %s): %s" %
5769                       (verifier, nl_payload[failed]))
5770         raise errors.OpExecError("ssh/hostname verification failed")
5771
5772     if self.op.readd:
5773       _RedistributeAncillaryFiles(self)
5774       self.context.ReaddNode(new_node)
5775       # make sure we redistribute the config
5776       self.cfg.Update(new_node, feedback_fn)
5777       # and make sure the new node will not have old files around
5778       if not new_node.master_candidate:
5779         result = self.rpc.call_node_demote_from_mc(new_node.name)
5780         msg = result.fail_msg
5781         if msg:
5782           self.LogWarning("Node failed to demote itself from master"
5783                           " candidate status: %s" % msg)
5784     else:
5785       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5786                                   additional_vm=self.op.vm_capable)
5787       self.context.AddNode(new_node, self.proc.GetECId())
5788
5789
5790 class LUNodeSetParams(LogicalUnit):
5791   """Modifies the parameters of a node.
5792
5793   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5794       to the node role (as _ROLE_*)
5795   @cvar _R2F: a dictionary from node role to tuples of flags
5796   @cvar _FLAGS: a list of attribute names corresponding to the flags
5797
5798   """
5799   HPATH = "node-modify"
5800   HTYPE = constants.HTYPE_NODE
5801   REQ_BGL = False
5802   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5803   _F2R = {
5804     (True, False, False): _ROLE_CANDIDATE,
5805     (False, True, False): _ROLE_DRAINED,
5806     (False, False, True): _ROLE_OFFLINE,
5807     (False, False, False): _ROLE_REGULAR,
5808     }
5809   _R2F = dict((v, k) for k, v in _F2R.items())
5810   _FLAGS = ["master_candidate", "drained", "offline"]
5811
5812   def CheckArguments(self):
5813     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5814     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5815                 self.op.master_capable, self.op.vm_capable,
5816                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5817                 self.op.disk_state]
5818     if all_mods.count(None) == len(all_mods):
5819       raise errors.OpPrereqError("Please pass at least one modification",
5820                                  errors.ECODE_INVAL)
5821     if all_mods.count(True) > 1:
5822       raise errors.OpPrereqError("Can't set the node into more than one"
5823                                  " state at the same time",
5824                                  errors.ECODE_INVAL)
5825
5826     # Boolean value that tells us whether we might be demoting from MC
5827     self.might_demote = (self.op.master_candidate == False or
5828                          self.op.offline == True or
5829                          self.op.drained == True or
5830                          self.op.master_capable == False)
5831
5832     if self.op.secondary_ip:
5833       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5834         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5835                                    " address" % self.op.secondary_ip,
5836                                    errors.ECODE_INVAL)
5837
5838     self.lock_all = self.op.auto_promote and self.might_demote
5839     self.lock_instances = self.op.secondary_ip is not None
5840
5841   def _InstanceFilter(self, instance):
5842     """Filter for getting affected instances.
5843
5844     """
5845     return (instance.disk_template in constants.DTS_INT_MIRROR and
5846             self.op.node_name in instance.all_nodes)
5847
5848   def ExpandNames(self):
5849     if self.lock_all:
5850       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5851     else:
5852       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5853
5854     # Since modifying a node can have severe effects on currently running
5855     # operations the resource lock is at least acquired in shared mode
5856     self.needed_locks[locking.LEVEL_NODE_RES] = \
5857       self.needed_locks[locking.LEVEL_NODE]
5858
5859     # Get node resource and instance locks in shared mode; they are not used
5860     # for anything but read-only access
5861     self.share_locks[locking.LEVEL_NODE_RES] = 1
5862     self.share_locks[locking.LEVEL_INSTANCE] = 1
5863
5864     if self.lock_instances:
5865       self.needed_locks[locking.LEVEL_INSTANCE] = \
5866         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5867
5868   def BuildHooksEnv(self):
5869     """Build hooks env.
5870
5871     This runs on the master node.
5872
5873     """
5874     return {
5875       "OP_TARGET": self.op.node_name,
5876       "MASTER_CANDIDATE": str(self.op.master_candidate),
5877       "OFFLINE": str(self.op.offline),
5878       "DRAINED": str(self.op.drained),
5879       "MASTER_CAPABLE": str(self.op.master_capable),
5880       "VM_CAPABLE": str(self.op.vm_capable),
5881       }
5882
5883   def BuildHooksNodes(self):
5884     """Build hooks nodes.
5885
5886     """
5887     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5888     return (nl, nl)
5889
5890   def CheckPrereq(self):
5891     """Check prerequisites.
5892
5893     This only checks the instance list against the existing names.
5894
5895     """
5896     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5897
5898     if self.lock_instances:
5899       affected_instances = \
5900         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5901
5902       # Verify instance locks
5903       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5904       wanted_instances = frozenset(affected_instances.keys())
5905       if wanted_instances - owned_instances:
5906         raise errors.OpPrereqError("Instances affected by changing node %s's"
5907                                    " secondary IP address have changed since"
5908                                    " locks were acquired, wanted '%s', have"
5909                                    " '%s'; retry the operation" %
5910                                    (self.op.node_name,
5911                                     utils.CommaJoin(wanted_instances),
5912                                     utils.CommaJoin(owned_instances)),
5913                                    errors.ECODE_STATE)
5914     else:
5915       affected_instances = None
5916
5917     if (self.op.master_candidate is not None or
5918         self.op.drained is not None or
5919         self.op.offline is not None):
5920       # we can't change the master's node flags
5921       if self.op.node_name == self.cfg.GetMasterNode():
5922         raise errors.OpPrereqError("The master role can be changed"
5923                                    " only via master-failover",
5924                                    errors.ECODE_INVAL)
5925
5926     if self.op.master_candidate and not node.master_capable:
5927       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5928                                  " it a master candidate" % node.name,
5929                                  errors.ECODE_STATE)
5930
5931     if self.op.vm_capable == False:
5932       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5933       if ipri or isec:
5934         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5935                                    " the vm_capable flag" % node.name,
5936                                    errors.ECODE_STATE)
5937
5938     if node.master_candidate and self.might_demote and not self.lock_all:
5939       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5940       # check if after removing the current node, we're missing master
5941       # candidates
5942       (mc_remaining, mc_should, _) = \
5943           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5944       if mc_remaining < mc_should:
5945         raise errors.OpPrereqError("Not enough master candidates, please"
5946                                    " pass auto promote option to allow"
5947                                    " promotion (--auto-promote or RAPI"
5948                                    " auto_promote=True)", errors.ECODE_STATE)
5949
5950     self.old_flags = old_flags = (node.master_candidate,
5951                                   node.drained, node.offline)
5952     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5953     self.old_role = old_role = self._F2R[old_flags]
5954
5955     # Check for ineffective changes
5956     for attr in self._FLAGS:
5957       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5958         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5959         setattr(self.op, attr, None)
5960
5961     # Past this point, any flag change to False means a transition
5962     # away from the respective state, as only real changes are kept
5963
5964     # TODO: We might query the real power state if it supports OOB
5965     if _SupportsOob(self.cfg, node):
5966       if self.op.offline is False and not (node.powered or
5967                                            self.op.powered == True):
5968         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5969                                     " offline status can be reset") %
5970                                    self.op.node_name)
5971     elif self.op.powered is not None:
5972       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5973                                   " as it does not support out-of-band"
5974                                   " handling") % self.op.node_name)
5975
5976     # If we're being deofflined/drained, we'll MC ourself if needed
5977     if (self.op.drained == False or self.op.offline == False or
5978         (self.op.master_capable and not node.master_capable)):
5979       if _DecideSelfPromotion(self):
5980         self.op.master_candidate = True
5981         self.LogInfo("Auto-promoting node to master candidate")
5982
5983     # If we're no longer master capable, we'll demote ourselves from MC
5984     if self.op.master_capable == False and node.master_candidate:
5985       self.LogInfo("Demoting from master candidate")
5986       self.op.master_candidate = False
5987
5988     # Compute new role
5989     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5990     if self.op.master_candidate:
5991       new_role = self._ROLE_CANDIDATE
5992     elif self.op.drained:
5993       new_role = self._ROLE_DRAINED
5994     elif self.op.offline:
5995       new_role = self._ROLE_OFFLINE
5996     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5997       # False is still in new flags, which means we're un-setting (the
5998       # only) True flag
5999       new_role = self._ROLE_REGULAR
6000     else: # no new flags, nothing, keep old role
6001       new_role = old_role
6002
6003     self.new_role = new_role
6004
6005     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6006       # Trying to transition out of offline status
6007       result = self.rpc.call_version([node.name])[node.name]
6008       if result.fail_msg:
6009         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6010                                    " to report its version: %s" %
6011                                    (node.name, result.fail_msg),
6012                                    errors.ECODE_STATE)
6013       else:
6014         self.LogWarning("Transitioning node from offline to online state"
6015                         " without using re-add. Please make sure the node"
6016                         " is healthy!")
6017
6018     if self.op.secondary_ip:
6019       # Ok even without locking, because this can't be changed by any LU
6020       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6021       master_singlehomed = master.secondary_ip == master.primary_ip
6022       if master_singlehomed and self.op.secondary_ip:
6023         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6024                                    " homed cluster", errors.ECODE_INVAL)
6025
6026       assert not (frozenset(affected_instances) -
6027                   self.owned_locks(locking.LEVEL_INSTANCE))
6028
6029       if node.offline:
6030         if affected_instances:
6031           raise errors.OpPrereqError("Cannot change secondary IP address:"
6032                                      " offline node has instances (%s)"
6033                                      " configured to use it" %
6034                                      utils.CommaJoin(affected_instances.keys()))
6035       else:
6036         # On online nodes, check that no instances are running, and that
6037         # the node has the new ip and we can reach it.
6038         for instance in affected_instances.values():
6039           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6040                               msg="cannot change secondary ip")
6041
6042         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6043         if master.name != node.name:
6044           # check reachability from master secondary ip to new secondary ip
6045           if not netutils.TcpPing(self.op.secondary_ip,
6046                                   constants.DEFAULT_NODED_PORT,
6047                                   source=master.secondary_ip):
6048             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6049                                        " based ping to node daemon port",
6050                                        errors.ECODE_ENVIRON)
6051
6052     if self.op.ndparams:
6053       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6054       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6055       self.new_ndparams = new_ndparams
6056
6057     if self.op.hv_state:
6058       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6059                                                  self.node.hv_state_static)
6060
6061     if self.op.disk_state:
6062       self.new_disk_state = \
6063         _MergeAndVerifyDiskState(self.op.disk_state,
6064                                  self.node.disk_state_static)
6065
6066   def Exec(self, feedback_fn):
6067     """Modifies a node.
6068
6069     """
6070     node = self.node
6071     old_role = self.old_role
6072     new_role = self.new_role
6073
6074     result = []
6075
6076     if self.op.ndparams:
6077       node.ndparams = self.new_ndparams
6078
6079     if self.op.powered is not None:
6080       node.powered = self.op.powered
6081
6082     if self.op.hv_state:
6083       node.hv_state_static = self.new_hv_state
6084
6085     if self.op.disk_state:
6086       node.disk_state_static = self.new_disk_state
6087
6088     for attr in ["master_capable", "vm_capable"]:
6089       val = getattr(self.op, attr)
6090       if val is not None:
6091         setattr(node, attr, val)
6092         result.append((attr, str(val)))
6093
6094     if new_role != old_role:
6095       # Tell the node to demote itself, if no longer MC and not offline
6096       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6097         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6098         if msg:
6099           self.LogWarning("Node failed to demote itself: %s", msg)
6100
6101       new_flags = self._R2F[new_role]
6102       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6103         if of != nf:
6104           result.append((desc, str(nf)))
6105       (node.master_candidate, node.drained, node.offline) = new_flags
6106
6107       # we locked all nodes, we adjust the CP before updating this node
6108       if self.lock_all:
6109         _AdjustCandidatePool(self, [node.name])
6110
6111     if self.op.secondary_ip:
6112       node.secondary_ip = self.op.secondary_ip
6113       result.append(("secondary_ip", self.op.secondary_ip))
6114
6115     # this will trigger configuration file update, if needed
6116     self.cfg.Update(node, feedback_fn)
6117
6118     # this will trigger job queue propagation or cleanup if the mc
6119     # flag changed
6120     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6121       self.context.ReaddNode(node)
6122
6123     return result
6124
6125
6126 class LUNodePowercycle(NoHooksLU):
6127   """Powercycles a node.
6128
6129   """
6130   REQ_BGL = False
6131
6132   def CheckArguments(self):
6133     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6134     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6135       raise errors.OpPrereqError("The node is the master and the force"
6136                                  " parameter was not set",
6137                                  errors.ECODE_INVAL)
6138
6139   def ExpandNames(self):
6140     """Locking for PowercycleNode.
6141
6142     This is a last-resort option and shouldn't block on other
6143     jobs. Therefore, we grab no locks.
6144
6145     """
6146     self.needed_locks = {}
6147
6148   def Exec(self, feedback_fn):
6149     """Reboots a node.
6150
6151     """
6152     result = self.rpc.call_node_powercycle(self.op.node_name,
6153                                            self.cfg.GetHypervisorType())
6154     result.Raise("Failed to schedule the reboot")
6155     return result.payload
6156
6157
6158 class LUClusterQuery(NoHooksLU):
6159   """Query cluster configuration.
6160
6161   """
6162   REQ_BGL = False
6163
6164   def ExpandNames(self):
6165     self.needed_locks = {}
6166
6167   def Exec(self, feedback_fn):
6168     """Return cluster config.
6169
6170     """
6171     cluster = self.cfg.GetClusterInfo()
6172     os_hvp = {}
6173
6174     # Filter just for enabled hypervisors
6175     for os_name, hv_dict in cluster.os_hvp.items():
6176       os_hvp[os_name] = {}
6177       for hv_name, hv_params in hv_dict.items():
6178         if hv_name in cluster.enabled_hypervisors:
6179           os_hvp[os_name][hv_name] = hv_params
6180
6181     # Convert ip_family to ip_version
6182     primary_ip_version = constants.IP4_VERSION
6183     if cluster.primary_ip_family == netutils.IP6Address.family:
6184       primary_ip_version = constants.IP6_VERSION
6185
6186     result = {
6187       "software_version": constants.RELEASE_VERSION,
6188       "protocol_version": constants.PROTOCOL_VERSION,
6189       "config_version": constants.CONFIG_VERSION,
6190       "os_api_version": max(constants.OS_API_VERSIONS),
6191       "export_version": constants.EXPORT_VERSION,
6192       "architecture": runtime.GetArchInfo(),
6193       "name": cluster.cluster_name,
6194       "master": cluster.master_node,
6195       "default_hypervisor": cluster.primary_hypervisor,
6196       "enabled_hypervisors": cluster.enabled_hypervisors,
6197       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6198                         for hypervisor_name in cluster.enabled_hypervisors]),
6199       "os_hvp": os_hvp,
6200       "beparams": cluster.beparams,
6201       "osparams": cluster.osparams,
6202       "ipolicy": cluster.ipolicy,
6203       "nicparams": cluster.nicparams,
6204       "ndparams": cluster.ndparams,
6205       "diskparams": cluster.diskparams,
6206       "candidate_pool_size": cluster.candidate_pool_size,
6207       "master_netdev": cluster.master_netdev,
6208       "master_netmask": cluster.master_netmask,
6209       "use_external_mip_script": cluster.use_external_mip_script,
6210       "volume_group_name": cluster.volume_group_name,
6211       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6212       "file_storage_dir": cluster.file_storage_dir,
6213       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6214       "maintain_node_health": cluster.maintain_node_health,
6215       "ctime": cluster.ctime,
6216       "mtime": cluster.mtime,
6217       "uuid": cluster.uuid,
6218       "tags": list(cluster.GetTags()),
6219       "uid_pool": cluster.uid_pool,
6220       "default_iallocator": cluster.default_iallocator,
6221       "reserved_lvs": cluster.reserved_lvs,
6222       "primary_ip_version": primary_ip_version,
6223       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6224       "hidden_os": cluster.hidden_os,
6225       "blacklisted_os": cluster.blacklisted_os,
6226       }
6227
6228     return result
6229
6230
6231 class LUClusterConfigQuery(NoHooksLU):
6232   """Return configuration values.
6233
6234   """
6235   REQ_BGL = False
6236
6237   def CheckArguments(self):
6238     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6239
6240   def ExpandNames(self):
6241     self.cq.ExpandNames(self)
6242
6243   def DeclareLocks(self, level):
6244     self.cq.DeclareLocks(self, level)
6245
6246   def Exec(self, feedback_fn):
6247     result = self.cq.OldStyleQuery(self)
6248
6249     assert len(result) == 1
6250
6251     return result[0]
6252
6253
6254 class _ClusterQuery(_QueryBase):
6255   FIELDS = query.CLUSTER_FIELDS
6256
6257   #: Do not sort (there is only one item)
6258   SORT_FIELD = None
6259
6260   def ExpandNames(self, lu):
6261     lu.needed_locks = {}
6262
6263     # The following variables interact with _QueryBase._GetNames
6264     self.wanted = locking.ALL_SET
6265     self.do_locking = self.use_locking
6266
6267     if self.do_locking:
6268       raise errors.OpPrereqError("Can not use locking for cluster queries",
6269                                  errors.ECODE_INVAL)
6270
6271   def DeclareLocks(self, lu, level):
6272     pass
6273
6274   def _GetQueryData(self, lu):
6275     """Computes the list of nodes and their attributes.
6276
6277     """
6278     # Locking is not used
6279     assert not (compat.any(lu.glm.is_owned(level)
6280                            for level in locking.LEVELS
6281                            if level != locking.LEVEL_CLUSTER) or
6282                 self.do_locking or self.use_locking)
6283
6284     if query.CQ_CONFIG in self.requested_data:
6285       cluster = lu.cfg.GetClusterInfo()
6286     else:
6287       cluster = NotImplemented
6288
6289     if query.CQ_QUEUE_DRAINED in self.requested_data:
6290       drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6291     else:
6292       drain_flag = NotImplemented
6293
6294     if query.CQ_WATCHER_PAUSE in self.requested_data:
6295       watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6296     else:
6297       watcher_pause = NotImplemented
6298
6299     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6300
6301
6302 class LUInstanceActivateDisks(NoHooksLU):
6303   """Bring up an instance's disks.
6304
6305   """
6306   REQ_BGL = False
6307
6308   def ExpandNames(self):
6309     self._ExpandAndLockInstance()
6310     self.needed_locks[locking.LEVEL_NODE] = []
6311     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6312
6313   def DeclareLocks(self, level):
6314     if level == locking.LEVEL_NODE:
6315       self._LockInstancesNodes()
6316
6317   def CheckPrereq(self):
6318     """Check prerequisites.
6319
6320     This checks that the instance is in the cluster.
6321
6322     """
6323     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6324     assert self.instance is not None, \
6325       "Cannot retrieve locked instance %s" % self.op.instance_name
6326     _CheckNodeOnline(self, self.instance.primary_node)
6327
6328   def Exec(self, feedback_fn):
6329     """Activate the disks.
6330
6331     """
6332     disks_ok, disks_info = \
6333               _AssembleInstanceDisks(self, self.instance,
6334                                      ignore_size=self.op.ignore_size)
6335     if not disks_ok:
6336       raise errors.OpExecError("Cannot activate block devices")
6337
6338     return disks_info
6339
6340
6341 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6342                            ignore_size=False):
6343   """Prepare the block devices for an instance.
6344
6345   This sets up the block devices on all nodes.
6346
6347   @type lu: L{LogicalUnit}
6348   @param lu: the logical unit on whose behalf we execute
6349   @type instance: L{objects.Instance}
6350   @param instance: the instance for whose disks we assemble
6351   @type disks: list of L{objects.Disk} or None
6352   @param disks: which disks to assemble (or all, if None)
6353   @type ignore_secondaries: boolean
6354   @param ignore_secondaries: if true, errors on secondary nodes
6355       won't result in an error return from the function
6356   @type ignore_size: boolean
6357   @param ignore_size: if true, the current known size of the disk
6358       will not be used during the disk activation, useful for cases
6359       when the size is wrong
6360   @return: False if the operation failed, otherwise a list of
6361       (host, instance_visible_name, node_visible_name)
6362       with the mapping from node devices to instance devices
6363
6364   """
6365   device_info = []
6366   disks_ok = True
6367   iname = instance.name
6368   disks = _ExpandCheckDisks(instance, disks)
6369
6370   # With the two passes mechanism we try to reduce the window of
6371   # opportunity for the race condition of switching DRBD to primary
6372   # before handshaking occured, but we do not eliminate it
6373
6374   # The proper fix would be to wait (with some limits) until the
6375   # connection has been made and drbd transitions from WFConnection
6376   # into any other network-connected state (Connected, SyncTarget,
6377   # SyncSource, etc.)
6378
6379   # 1st pass, assemble on all nodes in secondary mode
6380   for idx, inst_disk in enumerate(disks):
6381     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6382       if ignore_size:
6383         node_disk = node_disk.Copy()
6384         node_disk.UnsetSize()
6385       lu.cfg.SetDiskID(node_disk, node)
6386       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6387                                              False, idx)
6388       msg = result.fail_msg
6389       if msg:
6390         is_offline_secondary = (node in instance.secondary_nodes and
6391                                 result.offline)
6392         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6393                            " (is_primary=False, pass=1): %s",
6394                            inst_disk.iv_name, node, msg)
6395         if not (ignore_secondaries or is_offline_secondary):
6396           disks_ok = False
6397
6398   # FIXME: race condition on drbd migration to primary
6399
6400   # 2nd pass, do only the primary node
6401   for idx, inst_disk in enumerate(disks):
6402     dev_path = None
6403
6404     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6405       if node != instance.primary_node:
6406         continue
6407       if ignore_size:
6408         node_disk = node_disk.Copy()
6409         node_disk.UnsetSize()
6410       lu.cfg.SetDiskID(node_disk, node)
6411       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6412                                              True, idx)
6413       msg = result.fail_msg
6414       if msg:
6415         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6416                            " (is_primary=True, pass=2): %s",
6417                            inst_disk.iv_name, node, msg)
6418         disks_ok = False
6419       else:
6420         dev_path = result.payload
6421
6422     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6423
6424   # leave the disks configured for the primary node
6425   # this is a workaround that would be fixed better by
6426   # improving the logical/physical id handling
6427   for disk in disks:
6428     lu.cfg.SetDiskID(disk, instance.primary_node)
6429
6430   return disks_ok, device_info
6431
6432
6433 def _StartInstanceDisks(lu, instance, force):
6434   """Start the disks of an instance.
6435
6436   """
6437   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6438                                            ignore_secondaries=force)
6439   if not disks_ok:
6440     _ShutdownInstanceDisks(lu, instance)
6441     if force is not None and not force:
6442       lu.proc.LogWarning("", hint="If the message above refers to a"
6443                          " secondary node,"
6444                          " you can retry the operation using '--force'.")
6445     raise errors.OpExecError("Disk consistency error")
6446
6447
6448 class LUInstanceDeactivateDisks(NoHooksLU):
6449   """Shutdown an instance's disks.
6450
6451   """
6452   REQ_BGL = False
6453
6454   def ExpandNames(self):
6455     self._ExpandAndLockInstance()
6456     self.needed_locks[locking.LEVEL_NODE] = []
6457     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6458
6459   def DeclareLocks(self, level):
6460     if level == locking.LEVEL_NODE:
6461       self._LockInstancesNodes()
6462
6463   def CheckPrereq(self):
6464     """Check prerequisites.
6465
6466     This checks that the instance is in the cluster.
6467
6468     """
6469     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6470     assert self.instance is not None, \
6471       "Cannot retrieve locked instance %s" % self.op.instance_name
6472
6473   def Exec(self, feedback_fn):
6474     """Deactivate the disks
6475
6476     """
6477     instance = self.instance
6478     if self.op.force:
6479       _ShutdownInstanceDisks(self, instance)
6480     else:
6481       _SafeShutdownInstanceDisks(self, instance)
6482
6483
6484 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6485   """Shutdown block devices of an instance.
6486
6487   This function checks if an instance is running, before calling
6488   _ShutdownInstanceDisks.
6489
6490   """
6491   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6492   _ShutdownInstanceDisks(lu, instance, disks=disks)
6493
6494
6495 def _ExpandCheckDisks(instance, disks):
6496   """Return the instance disks selected by the disks list
6497
6498   @type disks: list of L{objects.Disk} or None
6499   @param disks: selected disks
6500   @rtype: list of L{objects.Disk}
6501   @return: selected instance disks to act on
6502
6503   """
6504   if disks is None:
6505     return instance.disks
6506   else:
6507     if not set(disks).issubset(instance.disks):
6508       raise errors.ProgrammerError("Can only act on disks belonging to the"
6509                                    " target instance")
6510     return disks
6511
6512
6513 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6514   """Shutdown block devices of an instance.
6515
6516   This does the shutdown on all nodes of the instance.
6517
6518   If the ignore_primary is false, errors on the primary node are
6519   ignored.
6520
6521   """
6522   all_result = True
6523   disks = _ExpandCheckDisks(instance, disks)
6524
6525   for disk in disks:
6526     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6527       lu.cfg.SetDiskID(top_disk, node)
6528       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6529       msg = result.fail_msg
6530       if msg:
6531         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6532                       disk.iv_name, node, msg)
6533         if ((node == instance.primary_node and not ignore_primary) or
6534             (node != instance.primary_node and not result.offline)):
6535           all_result = False
6536   return all_result
6537
6538
6539 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6540   """Checks if a node has enough free memory.
6541
6542   This function check if a given node has the needed amount of free
6543   memory. In case the node has less memory or we cannot get the
6544   information from the node, this function raise an OpPrereqError
6545   exception.
6546
6547   @type lu: C{LogicalUnit}
6548   @param lu: a logical unit from which we get configuration data
6549   @type node: C{str}
6550   @param node: the node to check
6551   @type reason: C{str}
6552   @param reason: string to use in the error message
6553   @type requested: C{int}
6554   @param requested: the amount of memory in MiB to check for
6555   @type hypervisor_name: C{str}
6556   @param hypervisor_name: the hypervisor to ask for memory stats
6557   @rtype: integer
6558   @return: node current free memory
6559   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6560       we cannot check the node
6561
6562   """
6563   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6564   nodeinfo[node].Raise("Can't get data from node %s" % node,
6565                        prereq=True, ecode=errors.ECODE_ENVIRON)
6566   (_, _, (hv_info, )) = nodeinfo[node].payload
6567
6568   free_mem = hv_info.get("memory_free", None)
6569   if not isinstance(free_mem, int):
6570     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6571                                " was '%s'" % (node, free_mem),
6572                                errors.ECODE_ENVIRON)
6573   if requested > free_mem:
6574     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6575                                " needed %s MiB, available %s MiB" %
6576                                (node, reason, requested, free_mem),
6577                                errors.ECODE_NORES)
6578   return free_mem
6579
6580
6581 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6582   """Checks if nodes have enough free disk space in the all VGs.
6583
6584   This function check if all given nodes have the needed amount of
6585   free disk. In case any node has less disk or we cannot get the
6586   information from the node, this function raise an OpPrereqError
6587   exception.
6588
6589   @type lu: C{LogicalUnit}
6590   @param lu: a logical unit from which we get configuration data
6591   @type nodenames: C{list}
6592   @param nodenames: the list of node names to check
6593   @type req_sizes: C{dict}
6594   @param req_sizes: the hash of vg and corresponding amount of disk in
6595       MiB to check for
6596   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6597       or we cannot check the node
6598
6599   """
6600   for vg, req_size in req_sizes.items():
6601     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6602
6603
6604 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6605   """Checks if nodes have enough free disk space in the specified VG.
6606
6607   This function check if all given nodes have the needed amount of
6608   free disk. In case any node has less disk or we cannot get the
6609   information from the node, this function raise an OpPrereqError
6610   exception.
6611
6612   @type lu: C{LogicalUnit}
6613   @param lu: a logical unit from which we get configuration data
6614   @type nodenames: C{list}
6615   @param nodenames: the list of node names to check
6616   @type vg: C{str}
6617   @param vg: the volume group to check
6618   @type requested: C{int}
6619   @param requested: the amount of disk in MiB to check for
6620   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6621       or we cannot check the node
6622
6623   """
6624   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6625   for node in nodenames:
6626     info = nodeinfo[node]
6627     info.Raise("Cannot get current information from node %s" % node,
6628                prereq=True, ecode=errors.ECODE_ENVIRON)
6629     (_, (vg_info, ), _) = info.payload
6630     vg_free = vg_info.get("vg_free", None)
6631     if not isinstance(vg_free, int):
6632       raise errors.OpPrereqError("Can't compute free disk space on node"
6633                                  " %s for vg %s, result was '%s'" %
6634                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6635     if requested > vg_free:
6636       raise errors.OpPrereqError("Not enough disk space on target node %s"
6637                                  " vg %s: required %d MiB, available %d MiB" %
6638                                  (node, vg, requested, vg_free),
6639                                  errors.ECODE_NORES)
6640
6641
6642 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6643   """Checks if nodes have enough physical CPUs
6644
6645   This function checks if all given nodes have the needed number of
6646   physical CPUs. In case any node has less CPUs or we cannot get the
6647   information from the node, this function raises an OpPrereqError
6648   exception.
6649
6650   @type lu: C{LogicalUnit}
6651   @param lu: a logical unit from which we get configuration data
6652   @type nodenames: C{list}
6653   @param nodenames: the list of node names to check
6654   @type requested: C{int}
6655   @param requested: the minimum acceptable number of physical CPUs
6656   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6657       or we cannot check the node
6658
6659   """
6660   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6661   for node in nodenames:
6662     info = nodeinfo[node]
6663     info.Raise("Cannot get current information from node %s" % node,
6664                prereq=True, ecode=errors.ECODE_ENVIRON)
6665     (_, _, (hv_info, )) = info.payload
6666     num_cpus = hv_info.get("cpu_total", None)
6667     if not isinstance(num_cpus, int):
6668       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6669                                  " on node %s, result was '%s'" %
6670                                  (node, num_cpus), errors.ECODE_ENVIRON)
6671     if requested > num_cpus:
6672       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6673                                  "required" % (node, num_cpus, requested),
6674                                  errors.ECODE_NORES)
6675
6676
6677 class LUInstanceStartup(LogicalUnit):
6678   """Starts an instance.
6679
6680   """
6681   HPATH = "instance-start"
6682   HTYPE = constants.HTYPE_INSTANCE
6683   REQ_BGL = False
6684
6685   def CheckArguments(self):
6686     # extra beparams
6687     if self.op.beparams:
6688       # fill the beparams dict
6689       objects.UpgradeBeParams(self.op.beparams)
6690       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6691
6692   def ExpandNames(self):
6693     self._ExpandAndLockInstance()
6694     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6695
6696   def DeclareLocks(self, level):
6697     if level == locking.LEVEL_NODE_RES:
6698       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6699
6700   def BuildHooksEnv(self):
6701     """Build hooks env.
6702
6703     This runs on master, primary and secondary nodes of the instance.
6704
6705     """
6706     env = {
6707       "FORCE": self.op.force,
6708       }
6709
6710     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6711
6712     return env
6713
6714   def BuildHooksNodes(self):
6715     """Build hooks nodes.
6716
6717     """
6718     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6719     return (nl, nl)
6720
6721   def CheckPrereq(self):
6722     """Check prerequisites.
6723
6724     This checks that the instance is in the cluster.
6725
6726     """
6727     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6728     assert self.instance is not None, \
6729       "Cannot retrieve locked instance %s" % self.op.instance_name
6730
6731     # extra hvparams
6732     if self.op.hvparams:
6733       # check hypervisor parameter syntax (locally)
6734       cluster = self.cfg.GetClusterInfo()
6735       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6736       filled_hvp = cluster.FillHV(instance)
6737       filled_hvp.update(self.op.hvparams)
6738       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6739       hv_type.CheckParameterSyntax(filled_hvp)
6740       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6741
6742     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6743
6744     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6745
6746     if self.primary_offline and self.op.ignore_offline_nodes:
6747       self.proc.LogWarning("Ignoring offline primary node")
6748
6749       if self.op.hvparams or self.op.beparams:
6750         self.proc.LogWarning("Overridden parameters are ignored")
6751     else:
6752       _CheckNodeOnline(self, instance.primary_node)
6753
6754       bep = self.cfg.GetClusterInfo().FillBE(instance)
6755       bep.update(self.op.beparams)
6756
6757       # check bridges existence
6758       _CheckInstanceBridgesExist(self, instance)
6759
6760       remote_info = self.rpc.call_instance_info(instance.primary_node,
6761                                                 instance.name,
6762                                                 instance.hypervisor)
6763       remote_info.Raise("Error checking node %s" % instance.primary_node,
6764                         prereq=True, ecode=errors.ECODE_ENVIRON)
6765       if not remote_info.payload: # not running already
6766         _CheckNodeFreeMemory(self, instance.primary_node,
6767                              "starting instance %s" % instance.name,
6768                              bep[constants.BE_MINMEM], instance.hypervisor)
6769
6770   def Exec(self, feedback_fn):
6771     """Start the instance.
6772
6773     """
6774     instance = self.instance
6775     force = self.op.force
6776
6777     if not self.op.no_remember:
6778       self.cfg.MarkInstanceUp(instance.name)
6779
6780     if self.primary_offline:
6781       assert self.op.ignore_offline_nodes
6782       self.proc.LogInfo("Primary node offline, marked instance as started")
6783     else:
6784       node_current = instance.primary_node
6785
6786       _StartInstanceDisks(self, instance, force)
6787
6788       result = \
6789         self.rpc.call_instance_start(node_current,
6790                                      (instance, self.op.hvparams,
6791                                       self.op.beparams),
6792                                      self.op.startup_paused)
6793       msg = result.fail_msg
6794       if msg:
6795         _ShutdownInstanceDisks(self, instance)
6796         raise errors.OpExecError("Could not start instance: %s" % msg)
6797
6798
6799 class LUInstanceReboot(LogicalUnit):
6800   """Reboot an instance.
6801
6802   """
6803   HPATH = "instance-reboot"
6804   HTYPE = constants.HTYPE_INSTANCE
6805   REQ_BGL = False
6806
6807   def ExpandNames(self):
6808     self._ExpandAndLockInstance()
6809
6810   def BuildHooksEnv(self):
6811     """Build hooks env.
6812
6813     This runs on master, primary and secondary nodes of the instance.
6814
6815     """
6816     env = {
6817       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6818       "REBOOT_TYPE": self.op.reboot_type,
6819       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6820       }
6821
6822     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6823
6824     return env
6825
6826   def BuildHooksNodes(self):
6827     """Build hooks nodes.
6828
6829     """
6830     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6831     return (nl, nl)
6832
6833   def CheckPrereq(self):
6834     """Check prerequisites.
6835
6836     This checks that the instance is in the cluster.
6837
6838     """
6839     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6840     assert self.instance is not None, \
6841       "Cannot retrieve locked instance %s" % self.op.instance_name
6842     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6843     _CheckNodeOnline(self, instance.primary_node)
6844
6845     # check bridges existence
6846     _CheckInstanceBridgesExist(self, instance)
6847
6848   def Exec(self, feedback_fn):
6849     """Reboot the instance.
6850
6851     """
6852     instance = self.instance
6853     ignore_secondaries = self.op.ignore_secondaries
6854     reboot_type = self.op.reboot_type
6855
6856     remote_info = self.rpc.call_instance_info(instance.primary_node,
6857                                               instance.name,
6858                                               instance.hypervisor)
6859     remote_info.Raise("Error checking node %s" % instance.primary_node)
6860     instance_running = bool(remote_info.payload)
6861
6862     node_current = instance.primary_node
6863
6864     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6865                                             constants.INSTANCE_REBOOT_HARD]:
6866       for disk in instance.disks:
6867         self.cfg.SetDiskID(disk, node_current)
6868       result = self.rpc.call_instance_reboot(node_current, instance,
6869                                              reboot_type,
6870                                              self.op.shutdown_timeout)
6871       result.Raise("Could not reboot instance")
6872     else:
6873       if instance_running:
6874         result = self.rpc.call_instance_shutdown(node_current, instance,
6875                                                  self.op.shutdown_timeout)
6876         result.Raise("Could not shutdown instance for full reboot")
6877         _ShutdownInstanceDisks(self, instance)
6878       else:
6879         self.LogInfo("Instance %s was already stopped, starting now",
6880                      instance.name)
6881       _StartInstanceDisks(self, instance, ignore_secondaries)
6882       result = self.rpc.call_instance_start(node_current,
6883                                             (instance, None, None), False)
6884       msg = result.fail_msg
6885       if msg:
6886         _ShutdownInstanceDisks(self, instance)
6887         raise errors.OpExecError("Could not start instance for"
6888                                  " full reboot: %s" % msg)
6889
6890     self.cfg.MarkInstanceUp(instance.name)
6891
6892
6893 class LUInstanceShutdown(LogicalUnit):
6894   """Shutdown an instance.
6895
6896   """
6897   HPATH = "instance-stop"
6898   HTYPE = constants.HTYPE_INSTANCE
6899   REQ_BGL = False
6900
6901   def ExpandNames(self):
6902     self._ExpandAndLockInstance()
6903
6904   def BuildHooksEnv(self):
6905     """Build hooks env.
6906
6907     This runs on master, primary and secondary nodes of the instance.
6908
6909     """
6910     env = _BuildInstanceHookEnvByObject(self, self.instance)
6911     env["TIMEOUT"] = self.op.timeout
6912     return env
6913
6914   def BuildHooksNodes(self):
6915     """Build hooks nodes.
6916
6917     """
6918     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6919     return (nl, nl)
6920
6921   def CheckPrereq(self):
6922     """Check prerequisites.
6923
6924     This checks that the instance is in the cluster.
6925
6926     """
6927     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6928     assert self.instance is not None, \
6929       "Cannot retrieve locked instance %s" % self.op.instance_name
6930
6931     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6932
6933     self.primary_offline = \
6934       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6935
6936     if self.primary_offline and self.op.ignore_offline_nodes:
6937       self.proc.LogWarning("Ignoring offline primary node")
6938     else:
6939       _CheckNodeOnline(self, self.instance.primary_node)
6940
6941   def Exec(self, feedback_fn):
6942     """Shutdown the instance.
6943
6944     """
6945     instance = self.instance
6946     node_current = instance.primary_node
6947     timeout = self.op.timeout
6948
6949     if not self.op.no_remember:
6950       self.cfg.MarkInstanceDown(instance.name)
6951
6952     if self.primary_offline:
6953       assert self.op.ignore_offline_nodes
6954       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6955     else:
6956       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6957       msg = result.fail_msg
6958       if msg:
6959         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6960
6961       _ShutdownInstanceDisks(self, instance)
6962
6963
6964 class LUInstanceReinstall(LogicalUnit):
6965   """Reinstall an instance.
6966
6967   """
6968   HPATH = "instance-reinstall"
6969   HTYPE = constants.HTYPE_INSTANCE
6970   REQ_BGL = False
6971
6972   def ExpandNames(self):
6973     self._ExpandAndLockInstance()
6974
6975   def BuildHooksEnv(self):
6976     """Build hooks env.
6977
6978     This runs on master, primary and secondary nodes of the instance.
6979
6980     """
6981     return _BuildInstanceHookEnvByObject(self, self.instance)
6982
6983   def BuildHooksNodes(self):
6984     """Build hooks nodes.
6985
6986     """
6987     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6988     return (nl, nl)
6989
6990   def CheckPrereq(self):
6991     """Check prerequisites.
6992
6993     This checks that the instance is in the cluster and is not running.
6994
6995     """
6996     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6997     assert instance is not None, \
6998       "Cannot retrieve locked instance %s" % self.op.instance_name
6999     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7000                      " offline, cannot reinstall")
7001
7002     if instance.disk_template == constants.DT_DISKLESS:
7003       raise errors.OpPrereqError("Instance '%s' has no disks" %
7004                                  self.op.instance_name,
7005                                  errors.ECODE_INVAL)
7006     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7007
7008     if self.op.os_type is not None:
7009       # OS verification
7010       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7011       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7012       instance_os = self.op.os_type
7013     else:
7014       instance_os = instance.os
7015
7016     nodelist = list(instance.all_nodes)
7017
7018     if self.op.osparams:
7019       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7020       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7021       self.os_inst = i_osdict # the new dict (without defaults)
7022     else:
7023       self.os_inst = None
7024
7025     self.instance = instance
7026
7027   def Exec(self, feedback_fn):
7028     """Reinstall the instance.
7029
7030     """
7031     inst = self.instance
7032
7033     if self.op.os_type is not None:
7034       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7035       inst.os = self.op.os_type
7036       # Write to configuration
7037       self.cfg.Update(inst, feedback_fn)
7038
7039     _StartInstanceDisks(self, inst, None)
7040     try:
7041       feedback_fn("Running the instance OS create scripts...")
7042       # FIXME: pass debug option from opcode to backend
7043       result = self.rpc.call_instance_os_add(inst.primary_node,
7044                                              (inst, self.os_inst), True,
7045                                              self.op.debug_level)
7046       result.Raise("Could not install OS for instance %s on node %s" %
7047                    (inst.name, inst.primary_node))
7048     finally:
7049       _ShutdownInstanceDisks(self, inst)
7050
7051
7052 class LUInstanceRecreateDisks(LogicalUnit):
7053   """Recreate an instance's missing disks.
7054
7055   """
7056   HPATH = "instance-recreate-disks"
7057   HTYPE = constants.HTYPE_INSTANCE
7058   REQ_BGL = False
7059
7060   _MODIFYABLE = frozenset([
7061     constants.IDISK_SIZE,
7062     constants.IDISK_MODE,
7063     ])
7064
7065   # New or changed disk parameters may have different semantics
7066   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7067     constants.IDISK_ADOPT,
7068
7069     # TODO: Implement support changing VG while recreating
7070     constants.IDISK_VG,
7071     constants.IDISK_METAVG,
7072     ]))
7073
7074   def CheckArguments(self):
7075     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7076       # Normalize and convert deprecated list of disk indices
7077       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7078
7079     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7080     if duplicates:
7081       raise errors.OpPrereqError("Some disks have been specified more than"
7082                                  " once: %s" % utils.CommaJoin(duplicates),
7083                                  errors.ECODE_INVAL)
7084
7085     for (idx, params) in self.op.disks:
7086       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7087       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7088       if unsupported:
7089         raise errors.OpPrereqError("Parameters for disk %s try to change"
7090                                    " unmodifyable parameter(s): %s" %
7091                                    (idx, utils.CommaJoin(unsupported)),
7092                                    errors.ECODE_INVAL)
7093
7094   def ExpandNames(self):
7095     self._ExpandAndLockInstance()
7096     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7097     if self.op.nodes:
7098       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7099       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7100     else:
7101       self.needed_locks[locking.LEVEL_NODE] = []
7102     self.needed_locks[locking.LEVEL_NODE_RES] = []
7103
7104   def DeclareLocks(self, level):
7105     if level == locking.LEVEL_NODE:
7106       # if we replace the nodes, we only need to lock the old primary,
7107       # otherwise we need to lock all nodes for disk re-creation
7108       primary_only = bool(self.op.nodes)
7109       self._LockInstancesNodes(primary_only=primary_only)
7110     elif level == locking.LEVEL_NODE_RES:
7111       # Copy node locks
7112       self.needed_locks[locking.LEVEL_NODE_RES] = \
7113         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7114
7115   def BuildHooksEnv(self):
7116     """Build hooks env.
7117
7118     This runs on master, primary and secondary nodes of the instance.
7119
7120     """
7121     return _BuildInstanceHookEnvByObject(self, self.instance)
7122
7123   def BuildHooksNodes(self):
7124     """Build hooks nodes.
7125
7126     """
7127     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7128     return (nl, nl)
7129
7130   def CheckPrereq(self):
7131     """Check prerequisites.
7132
7133     This checks that the instance is in the cluster and is not running.
7134
7135     """
7136     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7137     assert instance is not None, \
7138       "Cannot retrieve locked instance %s" % self.op.instance_name
7139     if self.op.nodes:
7140       if len(self.op.nodes) != len(instance.all_nodes):
7141         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7142                                    " %d replacement nodes were specified" %
7143                                    (instance.name, len(instance.all_nodes),
7144                                     len(self.op.nodes)),
7145                                    errors.ECODE_INVAL)
7146       assert instance.disk_template != constants.DT_DRBD8 or \
7147           len(self.op.nodes) == 2
7148       assert instance.disk_template != constants.DT_PLAIN or \
7149           len(self.op.nodes) == 1
7150       primary_node = self.op.nodes[0]
7151     else:
7152       primary_node = instance.primary_node
7153     _CheckNodeOnline(self, primary_node)
7154
7155     if instance.disk_template == constants.DT_DISKLESS:
7156       raise errors.OpPrereqError("Instance '%s' has no disks" %
7157                                  self.op.instance_name, errors.ECODE_INVAL)
7158
7159     # if we replace nodes *and* the old primary is offline, we don't
7160     # check
7161     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7162     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7163     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7164     if not (self.op.nodes and old_pnode.offline):
7165       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7166                           msg="cannot recreate disks")
7167
7168     if self.op.disks:
7169       self.disks = dict(self.op.disks)
7170     else:
7171       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7172
7173     maxidx = max(self.disks.keys())
7174     if maxidx >= len(instance.disks):
7175       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7176                                  errors.ECODE_INVAL)
7177
7178     if (self.op.nodes and
7179         sorted(self.disks.keys()) != range(len(instance.disks))):
7180       raise errors.OpPrereqError("Can't recreate disks partially and"
7181                                  " change the nodes at the same time",
7182                                  errors.ECODE_INVAL)
7183
7184     self.instance = instance
7185
7186   def Exec(self, feedback_fn):
7187     """Recreate the disks.
7188
7189     """
7190     instance = self.instance
7191
7192     assert (self.owned_locks(locking.LEVEL_NODE) ==
7193             self.owned_locks(locking.LEVEL_NODE_RES))
7194
7195     to_skip = []
7196     mods = [] # keeps track of needed changes
7197
7198     for idx, disk in enumerate(instance.disks):
7199       try:
7200         changes = self.disks[idx]
7201       except KeyError:
7202         # Disk should not be recreated
7203         to_skip.append(idx)
7204         continue
7205
7206       # update secondaries for disks, if needed
7207       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7208         # need to update the nodes and minors
7209         assert len(self.op.nodes) == 2
7210         assert len(disk.logical_id) == 6 # otherwise disk internals
7211                                          # have changed
7212         (_, _, old_port, _, _, old_secret) = disk.logical_id
7213         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7214         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7215                   new_minors[0], new_minors[1], old_secret)
7216         assert len(disk.logical_id) == len(new_id)
7217       else:
7218         new_id = None
7219
7220       mods.append((idx, new_id, changes))
7221
7222     # now that we have passed all asserts above, we can apply the mods
7223     # in a single run (to avoid partial changes)
7224     for idx, new_id, changes in mods:
7225       disk = instance.disks[idx]
7226       if new_id is not None:
7227         assert disk.dev_type == constants.LD_DRBD8
7228         disk.logical_id = new_id
7229       if changes:
7230         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7231                     mode=changes.get(constants.IDISK_MODE, None))
7232
7233     # change primary node, if needed
7234     if self.op.nodes:
7235       instance.primary_node = self.op.nodes[0]
7236       self.LogWarning("Changing the instance's nodes, you will have to"
7237                       " remove any disks left on the older nodes manually")
7238
7239     if self.op.nodes:
7240       self.cfg.Update(instance, feedback_fn)
7241
7242     _CreateDisks(self, instance, to_skip=to_skip)
7243
7244
7245 class LUInstanceRename(LogicalUnit):
7246   """Rename an instance.
7247
7248   """
7249   HPATH = "instance-rename"
7250   HTYPE = constants.HTYPE_INSTANCE
7251
7252   def CheckArguments(self):
7253     """Check arguments.
7254
7255     """
7256     if self.op.ip_check and not self.op.name_check:
7257       # TODO: make the ip check more flexible and not depend on the name check
7258       raise errors.OpPrereqError("IP address check requires a name check",
7259                                  errors.ECODE_INVAL)
7260
7261   def BuildHooksEnv(self):
7262     """Build hooks env.
7263
7264     This runs on master, primary and secondary nodes of the instance.
7265
7266     """
7267     env = _BuildInstanceHookEnvByObject(self, self.instance)
7268     env["INSTANCE_NEW_NAME"] = self.op.new_name
7269     return env
7270
7271   def BuildHooksNodes(self):
7272     """Build hooks nodes.
7273
7274     """
7275     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7276     return (nl, nl)
7277
7278   def CheckPrereq(self):
7279     """Check prerequisites.
7280
7281     This checks that the instance is in the cluster and is not running.
7282
7283     """
7284     self.op.instance_name = _ExpandInstanceName(self.cfg,
7285                                                 self.op.instance_name)
7286     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7287     assert instance is not None
7288     _CheckNodeOnline(self, instance.primary_node)
7289     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7290                         msg="cannot rename")
7291     self.instance = instance
7292
7293     new_name = self.op.new_name
7294     if self.op.name_check:
7295       hostname = netutils.GetHostname(name=new_name)
7296       if hostname.name != new_name:
7297         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7298                      hostname.name)
7299       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7300         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7301                                     " same as given hostname '%s'") %
7302                                     (hostname.name, self.op.new_name),
7303                                     errors.ECODE_INVAL)
7304       new_name = self.op.new_name = hostname.name
7305       if (self.op.ip_check and
7306           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7307         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7308                                    (hostname.ip, new_name),
7309                                    errors.ECODE_NOTUNIQUE)
7310
7311     instance_list = self.cfg.GetInstanceList()
7312     if new_name in instance_list and new_name != instance.name:
7313       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7314                                  new_name, errors.ECODE_EXISTS)
7315
7316   def Exec(self, feedback_fn):
7317     """Rename the instance.
7318
7319     """
7320     inst = self.instance
7321     old_name = inst.name
7322
7323     rename_file_storage = False
7324     if (inst.disk_template in constants.DTS_FILEBASED and
7325         self.op.new_name != inst.name):
7326       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7327       rename_file_storage = True
7328
7329     self.cfg.RenameInstance(inst.name, self.op.new_name)
7330     # Change the instance lock. This is definitely safe while we hold the BGL.
7331     # Otherwise the new lock would have to be added in acquired mode.
7332     assert self.REQ_BGL
7333     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7334     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7335
7336     # re-read the instance from the configuration after rename
7337     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7338
7339     if rename_file_storage:
7340       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7341       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7342                                                      old_file_storage_dir,
7343                                                      new_file_storage_dir)
7344       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7345                    " (but the instance has been renamed in Ganeti)" %
7346                    (inst.primary_node, old_file_storage_dir,
7347                     new_file_storage_dir))
7348
7349     _StartInstanceDisks(self, inst, None)
7350     try:
7351       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7352                                                  old_name, self.op.debug_level)
7353       msg = result.fail_msg
7354       if msg:
7355         msg = ("Could not run OS rename script for instance %s on node %s"
7356                " (but the instance has been renamed in Ganeti): %s" %
7357                (inst.name, inst.primary_node, msg))
7358         self.proc.LogWarning(msg)
7359     finally:
7360       _ShutdownInstanceDisks(self, inst)
7361
7362     return inst.name
7363
7364
7365 class LUInstanceRemove(LogicalUnit):
7366   """Remove an instance.
7367
7368   """
7369   HPATH = "instance-remove"
7370   HTYPE = constants.HTYPE_INSTANCE
7371   REQ_BGL = False
7372
7373   def ExpandNames(self):
7374     self._ExpandAndLockInstance()
7375     self.needed_locks[locking.LEVEL_NODE] = []
7376     self.needed_locks[locking.LEVEL_NODE_RES] = []
7377     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7378
7379   def DeclareLocks(self, level):
7380     if level == locking.LEVEL_NODE:
7381       self._LockInstancesNodes()
7382     elif level == locking.LEVEL_NODE_RES:
7383       # Copy node locks
7384       self.needed_locks[locking.LEVEL_NODE_RES] = \
7385         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7386
7387   def BuildHooksEnv(self):
7388     """Build hooks env.
7389
7390     This runs on master, primary and secondary nodes of the instance.
7391
7392     """
7393     env = _BuildInstanceHookEnvByObject(self, self.instance)
7394     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7395     return env
7396
7397   def BuildHooksNodes(self):
7398     """Build hooks nodes.
7399
7400     """
7401     nl = [self.cfg.GetMasterNode()]
7402     nl_post = list(self.instance.all_nodes) + nl
7403     return (nl, nl_post)
7404
7405   def CheckPrereq(self):
7406     """Check prerequisites.
7407
7408     This checks that the instance is in the cluster.
7409
7410     """
7411     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7412     assert self.instance is not None, \
7413       "Cannot retrieve locked instance %s" % self.op.instance_name
7414
7415   def Exec(self, feedback_fn):
7416     """Remove the instance.
7417
7418     """
7419     instance = self.instance
7420     logging.info("Shutting down instance %s on node %s",
7421                  instance.name, instance.primary_node)
7422
7423     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7424                                              self.op.shutdown_timeout)
7425     msg = result.fail_msg
7426     if msg:
7427       if self.op.ignore_failures:
7428         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7429       else:
7430         raise errors.OpExecError("Could not shutdown instance %s on"
7431                                  " node %s: %s" %
7432                                  (instance.name, instance.primary_node, msg))
7433
7434     assert (self.owned_locks(locking.LEVEL_NODE) ==
7435             self.owned_locks(locking.LEVEL_NODE_RES))
7436     assert not (set(instance.all_nodes) -
7437                 self.owned_locks(locking.LEVEL_NODE)), \
7438       "Not owning correct locks"
7439
7440     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7441
7442
7443 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7444   """Utility function to remove an instance.
7445
7446   """
7447   logging.info("Removing block devices for instance %s", instance.name)
7448
7449   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7450     if not ignore_failures:
7451       raise errors.OpExecError("Can't remove instance's disks")
7452     feedback_fn("Warning: can't remove instance's disks")
7453
7454   logging.info("Removing instance %s out of cluster config", instance.name)
7455
7456   lu.cfg.RemoveInstance(instance.name)
7457
7458   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7459     "Instance lock removal conflict"
7460
7461   # Remove lock for the instance
7462   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7463
7464
7465 class LUInstanceQuery(NoHooksLU):
7466   """Logical unit for querying instances.
7467
7468   """
7469   # pylint: disable=W0142
7470   REQ_BGL = False
7471
7472   def CheckArguments(self):
7473     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7474                              self.op.output_fields, self.op.use_locking)
7475
7476   def ExpandNames(self):
7477     self.iq.ExpandNames(self)
7478
7479   def DeclareLocks(self, level):
7480     self.iq.DeclareLocks(self, level)
7481
7482   def Exec(self, feedback_fn):
7483     return self.iq.OldStyleQuery(self)
7484
7485
7486 class LUInstanceFailover(LogicalUnit):
7487   """Failover an instance.
7488
7489   """
7490   HPATH = "instance-failover"
7491   HTYPE = constants.HTYPE_INSTANCE
7492   REQ_BGL = False
7493
7494   def CheckArguments(self):
7495     """Check the arguments.
7496
7497     """
7498     self.iallocator = getattr(self.op, "iallocator", None)
7499     self.target_node = getattr(self.op, "target_node", None)
7500
7501   def ExpandNames(self):
7502     self._ExpandAndLockInstance()
7503
7504     if self.op.target_node is not None:
7505       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7506
7507     self.needed_locks[locking.LEVEL_NODE] = []
7508     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7509
7510     self.needed_locks[locking.LEVEL_NODE_RES] = []
7511     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7512
7513     ignore_consistency = self.op.ignore_consistency
7514     shutdown_timeout = self.op.shutdown_timeout
7515     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7516                                        cleanup=False,
7517                                        failover=True,
7518                                        ignore_consistency=ignore_consistency,
7519                                        shutdown_timeout=shutdown_timeout,
7520                                        ignore_ipolicy=self.op.ignore_ipolicy)
7521     self.tasklets = [self._migrater]
7522
7523   def DeclareLocks(self, level):
7524     if level == locking.LEVEL_NODE:
7525       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7526       if instance.disk_template in constants.DTS_EXT_MIRROR:
7527         if self.op.target_node is None:
7528           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7529         else:
7530           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7531                                                    self.op.target_node]
7532         del self.recalculate_locks[locking.LEVEL_NODE]
7533       else:
7534         self._LockInstancesNodes()
7535     elif level == locking.LEVEL_NODE_RES:
7536       # Copy node locks
7537       self.needed_locks[locking.LEVEL_NODE_RES] = \
7538         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7539
7540   def BuildHooksEnv(self):
7541     """Build hooks env.
7542
7543     This runs on master, primary and secondary nodes of the instance.
7544
7545     """
7546     instance = self._migrater.instance
7547     source_node = instance.primary_node
7548     target_node = self.op.target_node
7549     env = {
7550       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7551       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7552       "OLD_PRIMARY": source_node,
7553       "NEW_PRIMARY": target_node,
7554       }
7555
7556     if instance.disk_template in constants.DTS_INT_MIRROR:
7557       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7558       env["NEW_SECONDARY"] = source_node
7559     else:
7560       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7561
7562     env.update(_BuildInstanceHookEnvByObject(self, instance))
7563
7564     return env
7565
7566   def BuildHooksNodes(self):
7567     """Build hooks nodes.
7568
7569     """
7570     instance = self._migrater.instance
7571     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7572     return (nl, nl + [instance.primary_node])
7573
7574
7575 class LUInstanceMigrate(LogicalUnit):
7576   """Migrate an instance.
7577
7578   This is migration without shutting down, compared to the failover,
7579   which is done with shutdown.
7580
7581   """
7582   HPATH = "instance-migrate"
7583   HTYPE = constants.HTYPE_INSTANCE
7584   REQ_BGL = False
7585
7586   def ExpandNames(self):
7587     self._ExpandAndLockInstance()
7588
7589     if self.op.target_node is not None:
7590       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7591
7592     self.needed_locks[locking.LEVEL_NODE] = []
7593     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7594
7595     self.needed_locks[locking.LEVEL_NODE] = []
7596     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7597
7598     self._migrater = \
7599       TLMigrateInstance(self, self.op.instance_name,
7600                         cleanup=self.op.cleanup,
7601                         failover=False,
7602                         fallback=self.op.allow_failover,
7603                         allow_runtime_changes=self.op.allow_runtime_changes,
7604                         ignore_ipolicy=self.op.ignore_ipolicy)
7605     self.tasklets = [self._migrater]
7606
7607   def DeclareLocks(self, level):
7608     if level == locking.LEVEL_NODE:
7609       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7610       if instance.disk_template in constants.DTS_EXT_MIRROR:
7611         if self.op.target_node is None:
7612           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7613         else:
7614           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7615                                                    self.op.target_node]
7616         del self.recalculate_locks[locking.LEVEL_NODE]
7617       else:
7618         self._LockInstancesNodes()
7619     elif level == locking.LEVEL_NODE_RES:
7620       # Copy node locks
7621       self.needed_locks[locking.LEVEL_NODE_RES] = \
7622         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7623
7624   def BuildHooksEnv(self):
7625     """Build hooks env.
7626
7627     This runs on master, primary and secondary nodes of the instance.
7628
7629     """
7630     instance = self._migrater.instance
7631     source_node = instance.primary_node
7632     target_node = self.op.target_node
7633     env = _BuildInstanceHookEnvByObject(self, instance)
7634     env.update({
7635       "MIGRATE_LIVE": self._migrater.live,
7636       "MIGRATE_CLEANUP": self.op.cleanup,
7637       "OLD_PRIMARY": source_node,
7638       "NEW_PRIMARY": target_node,
7639       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7640       })
7641
7642     if instance.disk_template in constants.DTS_INT_MIRROR:
7643       env["OLD_SECONDARY"] = target_node
7644       env["NEW_SECONDARY"] = source_node
7645     else:
7646       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7647
7648     return env
7649
7650   def BuildHooksNodes(self):
7651     """Build hooks nodes.
7652
7653     """
7654     instance = self._migrater.instance
7655     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7656     return (nl, nl + [instance.primary_node])
7657
7658
7659 class LUInstanceMove(LogicalUnit):
7660   """Move an instance by data-copying.
7661
7662   """
7663   HPATH = "instance-move"
7664   HTYPE = constants.HTYPE_INSTANCE
7665   REQ_BGL = False
7666
7667   def ExpandNames(self):
7668     self._ExpandAndLockInstance()
7669     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7670     self.op.target_node = target_node
7671     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7672     self.needed_locks[locking.LEVEL_NODE_RES] = []
7673     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7674
7675   def DeclareLocks(self, level):
7676     if level == locking.LEVEL_NODE:
7677       self._LockInstancesNodes(primary_only=True)
7678     elif level == locking.LEVEL_NODE_RES:
7679       # Copy node locks
7680       self.needed_locks[locking.LEVEL_NODE_RES] = \
7681         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7682
7683   def BuildHooksEnv(self):
7684     """Build hooks env.
7685
7686     This runs on master, primary and secondary nodes of the instance.
7687
7688     """
7689     env = {
7690       "TARGET_NODE": self.op.target_node,
7691       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7692       }
7693     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7694     return env
7695
7696   def BuildHooksNodes(self):
7697     """Build hooks nodes.
7698
7699     """
7700     nl = [
7701       self.cfg.GetMasterNode(),
7702       self.instance.primary_node,
7703       self.op.target_node,
7704       ]
7705     return (nl, nl)
7706
7707   def CheckPrereq(self):
7708     """Check prerequisites.
7709
7710     This checks that the instance is in the cluster.
7711
7712     """
7713     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7714     assert self.instance is not None, \
7715       "Cannot retrieve locked instance %s" % self.op.instance_name
7716
7717     node = self.cfg.GetNodeInfo(self.op.target_node)
7718     assert node is not None, \
7719       "Cannot retrieve locked node %s" % self.op.target_node
7720
7721     self.target_node = target_node = node.name
7722
7723     if target_node == instance.primary_node:
7724       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7725                                  (instance.name, target_node),
7726                                  errors.ECODE_STATE)
7727
7728     bep = self.cfg.GetClusterInfo().FillBE(instance)
7729
7730     for idx, dsk in enumerate(instance.disks):
7731       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7732         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7733                                    " cannot copy" % idx, errors.ECODE_STATE)
7734
7735     _CheckNodeOnline(self, target_node)
7736     _CheckNodeNotDrained(self, target_node)
7737     _CheckNodeVmCapable(self, target_node)
7738     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7739                                      self.cfg.GetNodeGroup(node.group))
7740     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7741                             ignore=self.op.ignore_ipolicy)
7742
7743     if instance.admin_state == constants.ADMINST_UP:
7744       # check memory requirements on the secondary node
7745       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7746                            instance.name, bep[constants.BE_MAXMEM],
7747                            instance.hypervisor)
7748     else:
7749       self.LogInfo("Not checking memory on the secondary node as"
7750                    " instance will not be started")
7751
7752     # check bridge existance
7753     _CheckInstanceBridgesExist(self, instance, node=target_node)
7754
7755   def Exec(self, feedback_fn):
7756     """Move an instance.
7757
7758     The move is done by shutting it down on its present node, copying
7759     the data over (slow) and starting it on the new node.
7760
7761     """
7762     instance = self.instance
7763
7764     source_node = instance.primary_node
7765     target_node = self.target_node
7766
7767     self.LogInfo("Shutting down instance %s on source node %s",
7768                  instance.name, source_node)
7769
7770     assert (self.owned_locks(locking.LEVEL_NODE) ==
7771             self.owned_locks(locking.LEVEL_NODE_RES))
7772
7773     result = self.rpc.call_instance_shutdown(source_node, instance,
7774                                              self.op.shutdown_timeout)
7775     msg = result.fail_msg
7776     if msg:
7777       if self.op.ignore_consistency:
7778         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7779                              " Proceeding anyway. Please make sure node"
7780                              " %s is down. Error details: %s",
7781                              instance.name, source_node, source_node, msg)
7782       else:
7783         raise errors.OpExecError("Could not shutdown instance %s on"
7784                                  " node %s: %s" %
7785                                  (instance.name, source_node, msg))
7786
7787     # create the target disks
7788     try:
7789       _CreateDisks(self, instance, target_node=target_node)
7790     except errors.OpExecError:
7791       self.LogWarning("Device creation failed, reverting...")
7792       try:
7793         _RemoveDisks(self, instance, target_node=target_node)
7794       finally:
7795         self.cfg.ReleaseDRBDMinors(instance.name)
7796         raise
7797
7798     cluster_name = self.cfg.GetClusterInfo().cluster_name
7799
7800     errs = []
7801     # activate, get path, copy the data over
7802     for idx, disk in enumerate(instance.disks):
7803       self.LogInfo("Copying data for disk %d", idx)
7804       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7805                                                instance.name, True, idx)
7806       if result.fail_msg:
7807         self.LogWarning("Can't assemble newly created disk %d: %s",
7808                         idx, result.fail_msg)
7809         errs.append(result.fail_msg)
7810         break
7811       dev_path = result.payload
7812       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7813                                              target_node, dev_path,
7814                                              cluster_name)
7815       if result.fail_msg:
7816         self.LogWarning("Can't copy data over for disk %d: %s",
7817                         idx, result.fail_msg)
7818         errs.append(result.fail_msg)
7819         break
7820
7821     if errs:
7822       self.LogWarning("Some disks failed to copy, aborting")
7823       try:
7824         _RemoveDisks(self, instance, target_node=target_node)
7825       finally:
7826         self.cfg.ReleaseDRBDMinors(instance.name)
7827         raise errors.OpExecError("Errors during disk copy: %s" %
7828                                  (",".join(errs),))
7829
7830     instance.primary_node = target_node
7831     self.cfg.Update(instance, feedback_fn)
7832
7833     self.LogInfo("Removing the disks on the original node")
7834     _RemoveDisks(self, instance, target_node=source_node)
7835
7836     # Only start the instance if it's marked as up
7837     if instance.admin_state == constants.ADMINST_UP:
7838       self.LogInfo("Starting instance %s on node %s",
7839                    instance.name, target_node)
7840
7841       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7842                                            ignore_secondaries=True)
7843       if not disks_ok:
7844         _ShutdownInstanceDisks(self, instance)
7845         raise errors.OpExecError("Can't activate the instance's disks")
7846
7847       result = self.rpc.call_instance_start(target_node,
7848                                             (instance, None, None), False)
7849       msg = result.fail_msg
7850       if msg:
7851         _ShutdownInstanceDisks(self, instance)
7852         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7853                                  (instance.name, target_node, msg))
7854
7855
7856 class LUNodeMigrate(LogicalUnit):
7857   """Migrate all instances from a node.
7858
7859   """
7860   HPATH = "node-migrate"
7861   HTYPE = constants.HTYPE_NODE
7862   REQ_BGL = False
7863
7864   def CheckArguments(self):
7865     pass
7866
7867   def ExpandNames(self):
7868     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7869
7870     self.share_locks = _ShareAll()
7871     self.needed_locks = {
7872       locking.LEVEL_NODE: [self.op.node_name],
7873       }
7874
7875   def BuildHooksEnv(self):
7876     """Build hooks env.
7877
7878     This runs on the master, the primary and all the secondaries.
7879
7880     """
7881     return {
7882       "NODE_NAME": self.op.node_name,
7883       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7884       }
7885
7886   def BuildHooksNodes(self):
7887     """Build hooks nodes.
7888
7889     """
7890     nl = [self.cfg.GetMasterNode()]
7891     return (nl, nl)
7892
7893   def CheckPrereq(self):
7894     pass
7895
7896   def Exec(self, feedback_fn):
7897     # Prepare jobs for migration instances
7898     allow_runtime_changes = self.op.allow_runtime_changes
7899     jobs = [
7900       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7901                                  mode=self.op.mode,
7902                                  live=self.op.live,
7903                                  iallocator=self.op.iallocator,
7904                                  target_node=self.op.target_node,
7905                                  allow_runtime_changes=allow_runtime_changes,
7906                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7907       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7908       ]
7909
7910     # TODO: Run iallocator in this opcode and pass correct placement options to
7911     # OpInstanceMigrate. Since other jobs can modify the cluster between
7912     # running the iallocator and the actual migration, a good consistency model
7913     # will have to be found.
7914
7915     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7916             frozenset([self.op.node_name]))
7917
7918     return ResultWithJobs(jobs)
7919
7920
7921 class TLMigrateInstance(Tasklet):
7922   """Tasklet class for instance migration.
7923
7924   @type live: boolean
7925   @ivar live: whether the migration will be done live or non-live;
7926       this variable is initalized only after CheckPrereq has run
7927   @type cleanup: boolean
7928   @ivar cleanup: Wheater we cleanup from a failed migration
7929   @type iallocator: string
7930   @ivar iallocator: The iallocator used to determine target_node
7931   @type target_node: string
7932   @ivar target_node: If given, the target_node to reallocate the instance to
7933   @type failover: boolean
7934   @ivar failover: Whether operation results in failover or migration
7935   @type fallback: boolean
7936   @ivar fallback: Whether fallback to failover is allowed if migration not
7937                   possible
7938   @type ignore_consistency: boolean
7939   @ivar ignore_consistency: Wheter we should ignore consistency between source
7940                             and target node
7941   @type shutdown_timeout: int
7942   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7943   @type ignore_ipolicy: bool
7944   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7945
7946   """
7947
7948   # Constants
7949   _MIGRATION_POLL_INTERVAL = 1      # seconds
7950   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7951
7952   def __init__(self, lu, instance_name, cleanup=False,
7953                failover=False, fallback=False,
7954                ignore_consistency=False,
7955                allow_runtime_changes=True,
7956                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7957                ignore_ipolicy=False):
7958     """Initializes this class.
7959
7960     """
7961     Tasklet.__init__(self, lu)
7962
7963     # Parameters
7964     self.instance_name = instance_name
7965     self.cleanup = cleanup
7966     self.live = False # will be overridden later
7967     self.failover = failover
7968     self.fallback = fallback
7969     self.ignore_consistency = ignore_consistency
7970     self.shutdown_timeout = shutdown_timeout
7971     self.ignore_ipolicy = ignore_ipolicy
7972     self.allow_runtime_changes = allow_runtime_changes
7973
7974   def CheckPrereq(self):
7975     """Check prerequisites.
7976
7977     This checks that the instance is in the cluster.
7978
7979     """
7980     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7981     instance = self.cfg.GetInstanceInfo(instance_name)
7982     assert instance is not None
7983     self.instance = instance
7984     cluster = self.cfg.GetClusterInfo()
7985
7986     if (not self.cleanup and
7987         not instance.admin_state == constants.ADMINST_UP and
7988         not self.failover and self.fallback):
7989       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7990                       " switching to failover")
7991       self.failover = True
7992
7993     if instance.disk_template not in constants.DTS_MIRRORED:
7994       if self.failover:
7995         text = "failovers"
7996       else:
7997         text = "migrations"
7998       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7999                                  " %s" % (instance.disk_template, text),
8000                                  errors.ECODE_STATE)
8001
8002     if instance.disk_template in constants.DTS_EXT_MIRROR:
8003       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8004
8005       if self.lu.op.iallocator:
8006         self._RunAllocator()
8007       else:
8008         # We set set self.target_node as it is required by
8009         # BuildHooksEnv
8010         self.target_node = self.lu.op.target_node
8011
8012       # Check that the target node is correct in terms of instance policy
8013       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8014       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8015       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8016       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8017                               ignore=self.ignore_ipolicy)
8018
8019       # self.target_node is already populated, either directly or by the
8020       # iallocator run
8021       target_node = self.target_node
8022       if self.target_node == instance.primary_node:
8023         raise errors.OpPrereqError("Cannot migrate instance %s"
8024                                    " to its primary (%s)" %
8025                                    (instance.name, instance.primary_node))
8026
8027       if len(self.lu.tasklets) == 1:
8028         # It is safe to release locks only when we're the only tasklet
8029         # in the LU
8030         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8031                       keep=[instance.primary_node, self.target_node])
8032
8033     else:
8034       secondary_nodes = instance.secondary_nodes
8035       if not secondary_nodes:
8036         raise errors.ConfigurationError("No secondary node but using"
8037                                         " %s disk template" %
8038                                         instance.disk_template)
8039       target_node = secondary_nodes[0]
8040       if self.lu.op.iallocator or (self.lu.op.target_node and
8041                                    self.lu.op.target_node != target_node):
8042         if self.failover:
8043           text = "failed over"
8044         else:
8045           text = "migrated"
8046         raise errors.OpPrereqError("Instances with disk template %s cannot"
8047                                    " be %s to arbitrary nodes"
8048                                    " (neither an iallocator nor a target"
8049                                    " node can be passed)" %
8050                                    (instance.disk_template, text),
8051                                    errors.ECODE_INVAL)
8052       nodeinfo = self.cfg.GetNodeInfo(target_node)
8053       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8054       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8055       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8056                               ignore=self.ignore_ipolicy)
8057
8058     i_be = cluster.FillBE(instance)
8059
8060     # check memory requirements on the secondary node
8061     if (not self.cleanup and
8062          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8063       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8064                                                "migrating instance %s" %
8065                                                instance.name,
8066                                                i_be[constants.BE_MINMEM],
8067                                                instance.hypervisor)
8068     else:
8069       self.lu.LogInfo("Not checking memory on the secondary node as"
8070                       " instance will not be started")
8071
8072     # check if failover must be forced instead of migration
8073     if (not self.cleanup and not self.failover and
8074         i_be[constants.BE_ALWAYS_FAILOVER]):
8075       self.lu.LogInfo("Instance configured to always failover; fallback"
8076                       " to failover")
8077       self.failover = True
8078
8079     # check bridge existance
8080     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8081
8082     if not self.cleanup:
8083       _CheckNodeNotDrained(self.lu, target_node)
8084       if not self.failover:
8085         result = self.rpc.call_instance_migratable(instance.primary_node,
8086                                                    instance)
8087         if result.fail_msg and self.fallback:
8088           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8089                           " failover")
8090           self.failover = True
8091         else:
8092           result.Raise("Can't migrate, please use failover",
8093                        prereq=True, ecode=errors.ECODE_STATE)
8094
8095     assert not (self.failover and self.cleanup)
8096
8097     if not self.failover:
8098       if self.lu.op.live is not None and self.lu.op.mode is not None:
8099         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8100                                    " parameters are accepted",
8101                                    errors.ECODE_INVAL)
8102       if self.lu.op.live is not None:
8103         if self.lu.op.live:
8104           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8105         else:
8106           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8107         # reset the 'live' parameter to None so that repeated
8108         # invocations of CheckPrereq do not raise an exception
8109         self.lu.op.live = None
8110       elif self.lu.op.mode is None:
8111         # read the default value from the hypervisor
8112         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8113         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8114
8115       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8116     else:
8117       # Failover is never live
8118       self.live = False
8119
8120     if not (self.failover or self.cleanup):
8121       remote_info = self.rpc.call_instance_info(instance.primary_node,
8122                                                 instance.name,
8123                                                 instance.hypervisor)
8124       remote_info.Raise("Error checking instance on node %s" %
8125                         instance.primary_node)
8126       instance_running = bool(remote_info.payload)
8127       if instance_running:
8128         self.current_mem = int(remote_info.payload["memory"])
8129
8130   def _RunAllocator(self):
8131     """Run the allocator based on input opcode.
8132
8133     """
8134     # FIXME: add a self.ignore_ipolicy option
8135     ial = IAllocator(self.cfg, self.rpc,
8136                      mode=constants.IALLOCATOR_MODE_RELOC,
8137                      name=self.instance_name,
8138                      relocate_from=[self.instance.primary_node],
8139                      )
8140
8141     ial.Run(self.lu.op.iallocator)
8142
8143     if not ial.success:
8144       raise errors.OpPrereqError("Can't compute nodes using"
8145                                  " iallocator '%s': %s" %
8146                                  (self.lu.op.iallocator, ial.info),
8147                                  errors.ECODE_NORES)
8148     if len(ial.result) != ial.required_nodes:
8149       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8150                                  " of nodes (%s), required %s" %
8151                                  (self.lu.op.iallocator, len(ial.result),
8152                                   ial.required_nodes), errors.ECODE_FAULT)
8153     self.target_node = ial.result[0]
8154     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8155                  self.instance_name, self.lu.op.iallocator,
8156                  utils.CommaJoin(ial.result))
8157
8158   def _WaitUntilSync(self):
8159     """Poll with custom rpc for disk sync.
8160
8161     This uses our own step-based rpc call.
8162
8163     """
8164     self.feedback_fn("* wait until resync is done")
8165     all_done = False
8166     while not all_done:
8167       all_done = True
8168       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8169                                             self.nodes_ip,
8170                                             (self.instance.disks,
8171                                              self.instance))
8172       min_percent = 100
8173       for node, nres in result.items():
8174         nres.Raise("Cannot resync disks on node %s" % node)
8175         node_done, node_percent = nres.payload
8176         all_done = all_done and node_done
8177         if node_percent is not None:
8178           min_percent = min(min_percent, node_percent)
8179       if not all_done:
8180         if min_percent < 100:
8181           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8182         time.sleep(2)
8183
8184   def _EnsureSecondary(self, node):
8185     """Demote a node to secondary.
8186
8187     """
8188     self.feedback_fn("* switching node %s to secondary mode" % node)
8189
8190     for dev in self.instance.disks:
8191       self.cfg.SetDiskID(dev, node)
8192
8193     result = self.rpc.call_blockdev_close(node, self.instance.name,
8194                                           self.instance.disks)
8195     result.Raise("Cannot change disk to secondary on node %s" % node)
8196
8197   def _GoStandalone(self):
8198     """Disconnect from the network.
8199
8200     """
8201     self.feedback_fn("* changing into standalone mode")
8202     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8203                                                self.instance.disks)
8204     for node, nres in result.items():
8205       nres.Raise("Cannot disconnect disks node %s" % node)
8206
8207   def _GoReconnect(self, multimaster):
8208     """Reconnect to the network.
8209
8210     """
8211     if multimaster:
8212       msg = "dual-master"
8213     else:
8214       msg = "single-master"
8215     self.feedback_fn("* changing disks into %s mode" % msg)
8216     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8217                                            (self.instance.disks, self.instance),
8218                                            self.instance.name, multimaster)
8219     for node, nres in result.items():
8220       nres.Raise("Cannot change disks config on node %s" % node)
8221
8222   def _ExecCleanup(self):
8223     """Try to cleanup after a failed migration.
8224
8225     The cleanup is done by:
8226       - check that the instance is running only on one node
8227         (and update the config if needed)
8228       - change disks on its secondary node to secondary
8229       - wait until disks are fully synchronized
8230       - disconnect from the network
8231       - change disks into single-master mode
8232       - wait again until disks are fully synchronized
8233
8234     """
8235     instance = self.instance
8236     target_node = self.target_node
8237     source_node = self.source_node
8238
8239     # check running on only one node
8240     self.feedback_fn("* checking where the instance actually runs"
8241                      " (if this hangs, the hypervisor might be in"
8242                      " a bad state)")
8243     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8244     for node, result in ins_l.items():
8245       result.Raise("Can't contact node %s" % node)
8246
8247     runningon_source = instance.name in ins_l[source_node].payload
8248     runningon_target = instance.name in ins_l[target_node].payload
8249
8250     if runningon_source and runningon_target:
8251       raise errors.OpExecError("Instance seems to be running on two nodes,"
8252                                " or the hypervisor is confused; you will have"
8253                                " to ensure manually that it runs only on one"
8254                                " and restart this operation")
8255
8256     if not (runningon_source or runningon_target):
8257       raise errors.OpExecError("Instance does not seem to be running at all;"
8258                                " in this case it's safer to repair by"
8259                                " running 'gnt-instance stop' to ensure disk"
8260                                " shutdown, and then restarting it")
8261
8262     if runningon_target:
8263       # the migration has actually succeeded, we need to update the config
8264       self.feedback_fn("* instance running on secondary node (%s),"
8265                        " updating config" % target_node)
8266       instance.primary_node = target_node
8267       self.cfg.Update(instance, self.feedback_fn)
8268       demoted_node = source_node
8269     else:
8270       self.feedback_fn("* instance confirmed to be running on its"
8271                        " primary node (%s)" % source_node)
8272       demoted_node = target_node
8273
8274     if instance.disk_template in constants.DTS_INT_MIRROR:
8275       self._EnsureSecondary(demoted_node)
8276       try:
8277         self._WaitUntilSync()
8278       except errors.OpExecError:
8279         # we ignore here errors, since if the device is standalone, it
8280         # won't be able to sync
8281         pass
8282       self._GoStandalone()
8283       self._GoReconnect(False)
8284       self._WaitUntilSync()
8285
8286     self.feedback_fn("* done")
8287
8288   def _RevertDiskStatus(self):
8289     """Try to revert the disk status after a failed migration.
8290
8291     """
8292     target_node = self.target_node
8293     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8294       return
8295
8296     try:
8297       self._EnsureSecondary(target_node)
8298       self._GoStandalone()
8299       self._GoReconnect(False)
8300       self._WaitUntilSync()
8301     except errors.OpExecError, err:
8302       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8303                          " please try to recover the instance manually;"
8304                          " error '%s'" % str(err))
8305
8306   def _AbortMigration(self):
8307     """Call the hypervisor code to abort a started migration.
8308
8309     """
8310     instance = self.instance
8311     target_node = self.target_node
8312     source_node = self.source_node
8313     migration_info = self.migration_info
8314
8315     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8316                                                                  instance,
8317                                                                  migration_info,
8318                                                                  False)
8319     abort_msg = abort_result.fail_msg
8320     if abort_msg:
8321       logging.error("Aborting migration failed on target node %s: %s",
8322                     target_node, abort_msg)
8323       # Don't raise an exception here, as we stil have to try to revert the
8324       # disk status, even if this step failed.
8325
8326     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8327         instance, False, self.live)
8328     abort_msg = abort_result.fail_msg
8329     if abort_msg:
8330       logging.error("Aborting migration failed on source node %s: %s",
8331                     source_node, abort_msg)
8332
8333   def _ExecMigration(self):
8334     """Migrate an instance.
8335
8336     The migrate is done by:
8337       - change the disks into dual-master mode
8338       - wait until disks are fully synchronized again
8339       - migrate the instance
8340       - change disks on the new secondary node (the old primary) to secondary
8341       - wait until disks are fully synchronized
8342       - change disks into single-master mode
8343
8344     """
8345     instance = self.instance
8346     target_node = self.target_node
8347     source_node = self.source_node
8348
8349     # Check for hypervisor version mismatch and warn the user.
8350     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8351                                        None, [self.instance.hypervisor])
8352     for ninfo in nodeinfo.values():
8353       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8354                   ninfo.node)
8355     (_, _, (src_info, )) = nodeinfo[source_node].payload
8356     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8357
8358     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8359         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8360       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8361       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8362       if src_version != dst_version:
8363         self.feedback_fn("* warning: hypervisor version mismatch between"
8364                          " source (%s) and target (%s) node" %
8365                          (src_version, dst_version))
8366
8367     self.feedback_fn("* checking disk consistency between source and target")
8368     for (idx, dev) in enumerate(instance.disks):
8369       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8370         raise errors.OpExecError("Disk %s is degraded or not fully"
8371                                  " synchronized on target node,"
8372                                  " aborting migration" % idx)
8373
8374     if self.current_mem > self.tgt_free_mem:
8375       if not self.allow_runtime_changes:
8376         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8377                                  " free memory to fit instance %s on target"
8378                                  " node %s (have %dMB, need %dMB)" %
8379                                  (instance.name, target_node,
8380                                   self.tgt_free_mem, self.current_mem))
8381       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8382       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8383                                                      instance,
8384                                                      self.tgt_free_mem)
8385       rpcres.Raise("Cannot modify instance runtime memory")
8386
8387     # First get the migration information from the remote node
8388     result = self.rpc.call_migration_info(source_node, instance)
8389     msg = result.fail_msg
8390     if msg:
8391       log_err = ("Failed fetching source migration information from %s: %s" %
8392                  (source_node, msg))
8393       logging.error(log_err)
8394       raise errors.OpExecError(log_err)
8395
8396     self.migration_info = migration_info = result.payload
8397
8398     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8399       # Then switch the disks to master/master mode
8400       self._EnsureSecondary(target_node)
8401       self._GoStandalone()
8402       self._GoReconnect(True)
8403       self._WaitUntilSync()
8404
8405     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8406     result = self.rpc.call_accept_instance(target_node,
8407                                            instance,
8408                                            migration_info,
8409                                            self.nodes_ip[target_node])
8410
8411     msg = result.fail_msg
8412     if msg:
8413       logging.error("Instance pre-migration failed, trying to revert"
8414                     " disk status: %s", msg)
8415       self.feedback_fn("Pre-migration failed, aborting")
8416       self._AbortMigration()
8417       self._RevertDiskStatus()
8418       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8419                                (instance.name, msg))
8420
8421     self.feedback_fn("* migrating instance to %s" % target_node)
8422     result = self.rpc.call_instance_migrate(source_node, instance,
8423                                             self.nodes_ip[target_node],
8424                                             self.live)
8425     msg = result.fail_msg
8426     if msg:
8427       logging.error("Instance migration failed, trying to revert"
8428                     " disk status: %s", msg)
8429       self.feedback_fn("Migration failed, aborting")
8430       self._AbortMigration()
8431       self._RevertDiskStatus()
8432       raise errors.OpExecError("Could not migrate instance %s: %s" %
8433                                (instance.name, msg))
8434
8435     self.feedback_fn("* starting memory transfer")
8436     last_feedback = time.time()
8437     while True:
8438       result = self.rpc.call_instance_get_migration_status(source_node,
8439                                                            instance)
8440       msg = result.fail_msg
8441       ms = result.payload   # MigrationStatus instance
8442       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8443         logging.error("Instance migration failed, trying to revert"
8444                       " disk status: %s", msg)
8445         self.feedback_fn("Migration failed, aborting")
8446         self._AbortMigration()
8447         self._RevertDiskStatus()
8448         raise errors.OpExecError("Could not migrate instance %s: %s" %
8449                                  (instance.name, msg))
8450
8451       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8452         self.feedback_fn("* memory transfer complete")
8453         break
8454
8455       if (utils.TimeoutExpired(last_feedback,
8456                                self._MIGRATION_FEEDBACK_INTERVAL) and
8457           ms.transferred_ram is not None):
8458         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8459         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8460         last_feedback = time.time()
8461
8462       time.sleep(self._MIGRATION_POLL_INTERVAL)
8463
8464     result = self.rpc.call_instance_finalize_migration_src(source_node,
8465                                                            instance,
8466                                                            True,
8467                                                            self.live)
8468     msg = result.fail_msg
8469     if msg:
8470       logging.error("Instance migration succeeded, but finalization failed"
8471                     " on the source node: %s", msg)
8472       raise errors.OpExecError("Could not finalize instance migration: %s" %
8473                                msg)
8474
8475     instance.primary_node = target_node
8476
8477     # distribute new instance config to the other nodes
8478     self.cfg.Update(instance, self.feedback_fn)
8479
8480     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8481                                                            instance,
8482                                                            migration_info,
8483                                                            True)
8484     msg = result.fail_msg
8485     if msg:
8486       logging.error("Instance migration succeeded, but finalization failed"
8487                     " on the target node: %s", msg)
8488       raise errors.OpExecError("Could not finalize instance migration: %s" %
8489                                msg)
8490
8491     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8492       self._EnsureSecondary(source_node)
8493       self._WaitUntilSync()
8494       self._GoStandalone()
8495       self._GoReconnect(False)
8496       self._WaitUntilSync()
8497
8498     # If the instance's disk template is `rbd' and there was a successful
8499     # migration, unmap the device from the source node.
8500     if self.instance.disk_template == constants.DT_RBD:
8501       disks = _ExpandCheckDisks(instance, instance.disks)
8502       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8503       for disk in disks:
8504         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8505         msg = result.fail_msg
8506         if msg:
8507           logging.error("Migration was successful, but couldn't unmap the"
8508                         " block device %s on source node %s: %s",
8509                         disk.iv_name, source_node, msg)
8510           logging.error("You need to unmap the device %s manually on %s",
8511                         disk.iv_name, source_node)
8512
8513     self.feedback_fn("* done")
8514
8515   def _ExecFailover(self):
8516     """Failover an instance.
8517
8518     The failover is done by shutting it down on its present node and
8519     starting it on the secondary.
8520
8521     """
8522     instance = self.instance
8523     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8524
8525     source_node = instance.primary_node
8526     target_node = self.target_node
8527
8528     if instance.admin_state == constants.ADMINST_UP:
8529       self.feedback_fn("* checking disk consistency between source and target")
8530       for (idx, dev) in enumerate(instance.disks):
8531         # for drbd, these are drbd over lvm
8532         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8533                                      False):
8534           if primary_node.offline:
8535             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8536                              " target node %s" %
8537                              (primary_node.name, idx, target_node))
8538           elif not self.ignore_consistency:
8539             raise errors.OpExecError("Disk %s is degraded on target node,"
8540                                      " aborting failover" % idx)
8541     else:
8542       self.feedback_fn("* not checking disk consistency as instance is not"
8543                        " running")
8544
8545     self.feedback_fn("* shutting down instance on source node")
8546     logging.info("Shutting down instance %s on node %s",
8547                  instance.name, source_node)
8548
8549     result = self.rpc.call_instance_shutdown(source_node, instance,
8550                                              self.shutdown_timeout)
8551     msg = result.fail_msg
8552     if msg:
8553       if self.ignore_consistency or primary_node.offline:
8554         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8555                            " proceeding anyway; please make sure node"
8556                            " %s is down; error details: %s",
8557                            instance.name, source_node, source_node, msg)
8558       else:
8559         raise errors.OpExecError("Could not shutdown instance %s on"
8560                                  " node %s: %s" %
8561                                  (instance.name, source_node, msg))
8562
8563     self.feedback_fn("* deactivating the instance's disks on source node")
8564     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8565       raise errors.OpExecError("Can't shut down the instance's disks")
8566
8567     instance.primary_node = target_node
8568     # distribute new instance config to the other nodes
8569     self.cfg.Update(instance, self.feedback_fn)
8570
8571     # Only start the instance if it's marked as up
8572     if instance.admin_state == constants.ADMINST_UP:
8573       self.feedback_fn("* activating the instance's disks on target node %s" %
8574                        target_node)
8575       logging.info("Starting instance %s on node %s",
8576                    instance.name, target_node)
8577
8578       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8579                                            ignore_secondaries=True)
8580       if not disks_ok:
8581         _ShutdownInstanceDisks(self.lu, instance)
8582         raise errors.OpExecError("Can't activate the instance's disks")
8583
8584       self.feedback_fn("* starting the instance on the target node %s" %
8585                        target_node)
8586       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8587                                             False)
8588       msg = result.fail_msg
8589       if msg:
8590         _ShutdownInstanceDisks(self.lu, instance)
8591         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8592                                  (instance.name, target_node, msg))
8593
8594   def Exec(self, feedback_fn):
8595     """Perform the migration.
8596
8597     """
8598     self.feedback_fn = feedback_fn
8599     self.source_node = self.instance.primary_node
8600
8601     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8602     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8603       self.target_node = self.instance.secondary_nodes[0]
8604       # Otherwise self.target_node has been populated either
8605       # directly, or through an iallocator.
8606
8607     self.all_nodes = [self.source_node, self.target_node]
8608     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8609                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8610
8611     if self.failover:
8612       feedback_fn("Failover instance %s" % self.instance.name)
8613       self._ExecFailover()
8614     else:
8615       feedback_fn("Migrating instance %s" % self.instance.name)
8616
8617       if self.cleanup:
8618         return self._ExecCleanup()
8619       else:
8620         return self._ExecMigration()
8621
8622
8623 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8624                     force_open):
8625   """Wrapper around L{_CreateBlockDevInner}.
8626
8627   This method annotates the root device first.
8628
8629   """
8630   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8631   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8632                               force_open)
8633
8634
8635 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8636                          info, force_open):
8637   """Create a tree of block devices on a given node.
8638
8639   If this device type has to be created on secondaries, create it and
8640   all its children.
8641
8642   If not, just recurse to children keeping the same 'force' value.
8643
8644   @attention: The device has to be annotated already.
8645
8646   @param lu: the lu on whose behalf we execute
8647   @param node: the node on which to create the device
8648   @type instance: L{objects.Instance}
8649   @param instance: the instance which owns the device
8650   @type device: L{objects.Disk}
8651   @param device: the device to create
8652   @type force_create: boolean
8653   @param force_create: whether to force creation of this device; this
8654       will be change to True whenever we find a device which has
8655       CreateOnSecondary() attribute
8656   @param info: the extra 'metadata' we should attach to the device
8657       (this will be represented as a LVM tag)
8658   @type force_open: boolean
8659   @param force_open: this parameter will be passes to the
8660       L{backend.BlockdevCreate} function where it specifies
8661       whether we run on primary or not, and it affects both
8662       the child assembly and the device own Open() execution
8663
8664   """
8665   if device.CreateOnSecondary():
8666     force_create = True
8667
8668   if device.children:
8669     for child in device.children:
8670       _CreateBlockDevInner(lu, node, instance, child, force_create,
8671                            info, force_open)
8672
8673   if not force_create:
8674     return
8675
8676   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8677
8678
8679 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8680   """Create a single block device on a given node.
8681
8682   This will not recurse over children of the device, so they must be
8683   created in advance.
8684
8685   @param lu: the lu on whose behalf we execute
8686   @param node: the node on which to create the device
8687   @type instance: L{objects.Instance}
8688   @param instance: the instance which owns the device
8689   @type device: L{objects.Disk}
8690   @param device: the device to create
8691   @param info: the extra 'metadata' we should attach to the device
8692       (this will be represented as a LVM tag)
8693   @type force_open: boolean
8694   @param force_open: this parameter will be passes to the
8695       L{backend.BlockdevCreate} function where it specifies
8696       whether we run on primary or not, and it affects both
8697       the child assembly and the device own Open() execution
8698
8699   """
8700   lu.cfg.SetDiskID(device, node)
8701   result = lu.rpc.call_blockdev_create(node, device, device.size,
8702                                        instance.name, force_open, info)
8703   result.Raise("Can't create block device %s on"
8704                " node %s for instance %s" % (device, node, instance.name))
8705   if device.physical_id is None:
8706     device.physical_id = result.payload
8707
8708
8709 def _GenerateUniqueNames(lu, exts):
8710   """Generate a suitable LV name.
8711
8712   This will generate a logical volume name for the given instance.
8713
8714   """
8715   results = []
8716   for val in exts:
8717     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8718     results.append("%s%s" % (new_id, val))
8719   return results
8720
8721
8722 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8723                          iv_name, p_minor, s_minor):
8724   """Generate a drbd8 device complete with its children.
8725
8726   """
8727   assert len(vgnames) == len(names) == 2
8728   port = lu.cfg.AllocatePort()
8729   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8730
8731   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8732                           logical_id=(vgnames[0], names[0]),
8733                           params={})
8734   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8735                           logical_id=(vgnames[1], names[1]),
8736                           params={})
8737   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8738                           logical_id=(primary, secondary, port,
8739                                       p_minor, s_minor,
8740                                       shared_secret),
8741                           children=[dev_data, dev_meta],
8742                           iv_name=iv_name, params={})
8743   return drbd_dev
8744
8745
8746 _DISK_TEMPLATE_NAME_PREFIX = {
8747   constants.DT_PLAIN: "",
8748   constants.DT_RBD: ".rbd",
8749   }
8750
8751
8752 _DISK_TEMPLATE_DEVICE_TYPE = {
8753   constants.DT_PLAIN: constants.LD_LV,
8754   constants.DT_FILE: constants.LD_FILE,
8755   constants.DT_SHARED_FILE: constants.LD_FILE,
8756   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8757   constants.DT_RBD: constants.LD_RBD,
8758   }
8759
8760
8761 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8762     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8763     feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8764     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8765   """Generate the entire disk layout for a given template type.
8766
8767   """
8768   #TODO: compute space requirements
8769
8770   vgname = lu.cfg.GetVGName()
8771   disk_count = len(disk_info)
8772   disks = []
8773
8774   if template_name == constants.DT_DISKLESS:
8775     pass
8776   elif template_name == constants.DT_DRBD8:
8777     if len(secondary_nodes) != 1:
8778       raise errors.ProgrammerError("Wrong template configuration")
8779     remote_node = secondary_nodes[0]
8780     minors = lu.cfg.AllocateDRBDMinor(
8781       [primary_node, remote_node] * len(disk_info), instance_name)
8782
8783     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8784                                                        full_disk_params)
8785     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8786
8787     names = []
8788     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8789                                                for i in range(disk_count)]):
8790       names.append(lv_prefix + "_data")
8791       names.append(lv_prefix + "_meta")
8792     for idx, disk in enumerate(disk_info):
8793       disk_index = idx + base_index
8794       data_vg = disk.get(constants.IDISK_VG, vgname)
8795       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8796       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8797                                       disk[constants.IDISK_SIZE],
8798                                       [data_vg, meta_vg],
8799                                       names[idx * 2:idx * 2 + 2],
8800                                       "disk/%d" % disk_index,
8801                                       minors[idx * 2], minors[idx * 2 + 1])
8802       disk_dev.mode = disk[constants.IDISK_MODE]
8803       disks.append(disk_dev)
8804   else:
8805     if secondary_nodes:
8806       raise errors.ProgrammerError("Wrong template configuration")
8807
8808     if template_name == constants.DT_FILE:
8809       _req_file_storage()
8810     elif template_name == constants.DT_SHARED_FILE:
8811       _req_shr_file_storage()
8812
8813     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8814     if name_prefix is None:
8815       names = None
8816     else:
8817       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8818                                         (name_prefix, base_index + i)
8819                                         for i in range(disk_count)])
8820
8821     if template_name == constants.DT_PLAIN:
8822       def logical_id_fn(idx, _, disk):
8823         vg = disk.get(constants.IDISK_VG, vgname)
8824         return (vg, names[idx])
8825     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8826       logical_id_fn = \
8827         lambda _, disk_index, disk: (file_driver,
8828                                      "%s/disk%d" % (file_storage_dir,
8829                                                     disk_index))
8830     elif template_name == constants.DT_BLOCK:
8831       logical_id_fn = \
8832         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8833                                        disk[constants.IDISK_ADOPT])
8834     elif template_name == constants.DT_RBD:
8835       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8836     else:
8837       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8838
8839     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8840
8841     for idx, disk in enumerate(disk_info):
8842       disk_index = idx + base_index
8843       size = disk[constants.IDISK_SIZE]
8844       feedback_fn("* disk %s, size %s" %
8845                   (disk_index, utils.FormatUnit(size, "h")))
8846       disks.append(objects.Disk(dev_type=dev_type, size=size,
8847                                 logical_id=logical_id_fn(idx, disk_index, disk),
8848                                 iv_name="disk/%d" % disk_index,
8849                                 mode=disk[constants.IDISK_MODE],
8850                                 params={}))
8851
8852   return disks
8853
8854
8855 def _GetInstanceInfoText(instance):
8856   """Compute that text that should be added to the disk's metadata.
8857
8858   """
8859   return "originstname+%s" % instance.name
8860
8861
8862 def _CalcEta(time_taken, written, total_size):
8863   """Calculates the ETA based on size written and total size.
8864
8865   @param time_taken: The time taken so far
8866   @param written: amount written so far
8867   @param total_size: The total size of data to be written
8868   @return: The remaining time in seconds
8869
8870   """
8871   avg_time = time_taken / float(written)
8872   return (total_size - written) * avg_time
8873
8874
8875 def _WipeDisks(lu, instance):
8876   """Wipes instance disks.
8877
8878   @type lu: L{LogicalUnit}
8879   @param lu: the logical unit on whose behalf we execute
8880   @type instance: L{objects.Instance}
8881   @param instance: the instance whose disks we should create
8882   @return: the success of the wipe
8883
8884   """
8885   node = instance.primary_node
8886
8887   for device in instance.disks:
8888     lu.cfg.SetDiskID(device, node)
8889
8890   logging.info("Pause sync of instance %s disks", instance.name)
8891   result = lu.rpc.call_blockdev_pause_resume_sync(node,
8892                                                   (instance.disks, instance),
8893                                                   True)
8894   result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8895
8896   for idx, success in enumerate(result.payload):
8897     if not success:
8898       logging.warn("pause-sync of instance %s for disks %d failed",
8899                    instance.name, idx)
8900
8901   try:
8902     for idx, device in enumerate(instance.disks):
8903       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8904       # MAX_WIPE_CHUNK at max
8905       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8906                             constants.MIN_WIPE_CHUNK_PERCENT)
8907       # we _must_ make this an int, otherwise rounding errors will
8908       # occur
8909       wipe_chunk_size = int(wipe_chunk_size)
8910
8911       lu.LogInfo("* Wiping disk %d", idx)
8912       logging.info("Wiping disk %d for instance %s, node %s using"
8913                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8914
8915       offset = 0
8916       size = device.size
8917       last_output = 0
8918       start_time = time.time()
8919
8920       while offset < size:
8921         wipe_size = min(wipe_chunk_size, size - offset)
8922         logging.debug("Wiping disk %d, offset %s, chunk %s",
8923                       idx, offset, wipe_size)
8924         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8925                                            wipe_size)
8926         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8927                      (idx, offset, wipe_size))
8928         now = time.time()
8929         offset += wipe_size
8930         if now - last_output >= 60:
8931           eta = _CalcEta(now - start_time, offset, size)
8932           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8933                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8934           last_output = now
8935   finally:
8936     logging.info("Resume sync of instance %s disks", instance.name)
8937
8938     result = lu.rpc.call_blockdev_pause_resume_sync(node,
8939                                                     (instance.disks, instance),
8940                                                     False)
8941
8942     if result.fail_msg:
8943       lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8944                     " please have a look at the status and troubleshoot"
8945                     " the issue: %s", node, result.fail_msg)
8946     else:
8947       for idx, success in enumerate(result.payload):
8948         if not success:
8949           lu.LogWarning("Resume sync of disk %d failed, please have a"
8950                         " look at the status and troubleshoot the issue", idx)
8951           logging.warn("resume-sync of instance %s for disks %d failed",
8952                        instance.name, idx)
8953
8954
8955 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8956   """Create all disks for an instance.
8957
8958   This abstracts away some work from AddInstance.
8959
8960   @type lu: L{LogicalUnit}
8961   @param lu: the logical unit on whose behalf we execute
8962   @type instance: L{objects.Instance}
8963   @param instance: the instance whose disks we should create
8964   @type to_skip: list
8965   @param to_skip: list of indices to skip
8966   @type target_node: string
8967   @param target_node: if passed, overrides the target node for creation
8968   @rtype: boolean
8969   @return: the success of the creation
8970
8971   """
8972   info = _GetInstanceInfoText(instance)
8973   if target_node is None:
8974     pnode = instance.primary_node
8975     all_nodes = instance.all_nodes
8976   else:
8977     pnode = target_node
8978     all_nodes = [pnode]
8979
8980   if instance.disk_template in constants.DTS_FILEBASED:
8981     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8982     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8983
8984     result.Raise("Failed to create directory '%s' on"
8985                  " node %s" % (file_storage_dir, pnode))
8986
8987   # Note: this needs to be kept in sync with adding of disks in
8988   # LUInstanceSetParams
8989   for idx, device in enumerate(instance.disks):
8990     if to_skip and idx in to_skip:
8991       continue
8992     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8993     #HARDCODE
8994     for node in all_nodes:
8995       f_create = node == pnode
8996       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8997
8998
8999 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9000   """Remove all disks for an instance.
9001
9002   This abstracts away some work from `AddInstance()` and
9003   `RemoveInstance()`. Note that in case some of the devices couldn't
9004   be removed, the removal will continue with the other ones (compare
9005   with `_CreateDisks()`).
9006
9007   @type lu: L{LogicalUnit}
9008   @param lu: the logical unit on whose behalf we execute
9009   @type instance: L{objects.Instance}
9010   @param instance: the instance whose disks we should remove
9011   @type target_node: string
9012   @param target_node: used to override the node on which to remove the disks
9013   @rtype: boolean
9014   @return: the success of the removal
9015
9016   """
9017   logging.info("Removing block devices for instance %s", instance.name)
9018
9019   all_result = True
9020   ports_to_release = set()
9021   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9022   for (idx, device) in enumerate(anno_disks):
9023     if target_node:
9024       edata = [(target_node, device)]
9025     else:
9026       edata = device.ComputeNodeTree(instance.primary_node)
9027     for node, disk in edata:
9028       lu.cfg.SetDiskID(disk, node)
9029       result = lu.rpc.call_blockdev_remove(node, disk)
9030       if result.fail_msg:
9031         lu.LogWarning("Could not remove disk %s on node %s,"
9032                       " continuing anyway: %s", idx, node, result.fail_msg)
9033         if not (result.offline and node != instance.primary_node):
9034           all_result = False
9035
9036     # if this is a DRBD disk, return its port to the pool
9037     if device.dev_type in constants.LDS_DRBD:
9038       ports_to_release.add(device.logical_id[2])
9039
9040   if all_result or ignore_failures:
9041     for port in ports_to_release:
9042       lu.cfg.AddTcpUdpPort(port)
9043
9044   if instance.disk_template == constants.DT_FILE:
9045     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9046     if target_node:
9047       tgt = target_node
9048     else:
9049       tgt = instance.primary_node
9050     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9051     if result.fail_msg:
9052       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9053                     file_storage_dir, instance.primary_node, result.fail_msg)
9054       all_result = False
9055
9056   return all_result
9057
9058
9059 def _ComputeDiskSizePerVG(disk_template, disks):
9060   """Compute disk size requirements in the volume group
9061
9062   """
9063   def _compute(disks, payload):
9064     """Universal algorithm.
9065
9066     """
9067     vgs = {}
9068     for disk in disks:
9069       vgs[disk[constants.IDISK_VG]] = \
9070         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9071
9072     return vgs
9073
9074   # Required free disk space as a function of disk and swap space
9075   req_size_dict = {
9076     constants.DT_DISKLESS: {},
9077     constants.DT_PLAIN: _compute(disks, 0),
9078     # 128 MB are added for drbd metadata for each disk
9079     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9080     constants.DT_FILE: {},
9081     constants.DT_SHARED_FILE: {},
9082   }
9083
9084   if disk_template not in req_size_dict:
9085     raise errors.ProgrammerError("Disk template '%s' size requirement"
9086                                  " is unknown" % disk_template)
9087
9088   return req_size_dict[disk_template]
9089
9090
9091 def _ComputeDiskSize(disk_template, disks):
9092   """Compute disk size requirements according to disk template
9093
9094   """
9095   # Required free disk space as a function of disk and swap space
9096   req_size_dict = {
9097     constants.DT_DISKLESS: None,
9098     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9099     # 128 MB are added for drbd metadata for each disk
9100     constants.DT_DRBD8:
9101       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9102     constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9103     constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9104     constants.DT_BLOCK: 0,
9105     constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9106   }
9107
9108   if disk_template not in req_size_dict:
9109     raise errors.ProgrammerError("Disk template '%s' size requirement"
9110                                  " is unknown" % disk_template)
9111
9112   return req_size_dict[disk_template]
9113
9114
9115 def _FilterVmNodes(lu, nodenames):
9116   """Filters out non-vm_capable nodes from a list.
9117
9118   @type lu: L{LogicalUnit}
9119   @param lu: the logical unit for which we check
9120   @type nodenames: list
9121   @param nodenames: the list of nodes on which we should check
9122   @rtype: list
9123   @return: the list of vm-capable nodes
9124
9125   """
9126   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9127   return [name for name in nodenames if name not in vm_nodes]
9128
9129
9130 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9131   """Hypervisor parameter validation.
9132
9133   This function abstract the hypervisor parameter validation to be
9134   used in both instance create and instance modify.
9135
9136   @type lu: L{LogicalUnit}
9137   @param lu: the logical unit for which we check
9138   @type nodenames: list
9139   @param nodenames: the list of nodes on which we should check
9140   @type hvname: string
9141   @param hvname: the name of the hypervisor we should use
9142   @type hvparams: dict
9143   @param hvparams: the parameters which we need to check
9144   @raise errors.OpPrereqError: if the parameters are not valid
9145
9146   """
9147   nodenames = _FilterVmNodes(lu, nodenames)
9148
9149   cluster = lu.cfg.GetClusterInfo()
9150   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9151
9152   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9153   for node in nodenames:
9154     info = hvinfo[node]
9155     if info.offline:
9156       continue
9157     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9158
9159
9160 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9161   """OS parameters validation.
9162
9163   @type lu: L{LogicalUnit}
9164   @param lu: the logical unit for which we check
9165   @type required: boolean
9166   @param required: whether the validation should fail if the OS is not
9167       found
9168   @type nodenames: list
9169   @param nodenames: the list of nodes on which we should check
9170   @type osname: string
9171   @param osname: the name of the hypervisor we should use
9172   @type osparams: dict
9173   @param osparams: the parameters which we need to check
9174   @raise errors.OpPrereqError: if the parameters are not valid
9175
9176   """
9177   nodenames = _FilterVmNodes(lu, nodenames)
9178   result = lu.rpc.call_os_validate(nodenames, required, osname,
9179                                    [constants.OS_VALIDATE_PARAMETERS],
9180                                    osparams)
9181   for node, nres in result.items():
9182     # we don't check for offline cases since this should be run only
9183     # against the master node and/or an instance's nodes
9184     nres.Raise("OS Parameters validation failed on node %s" % node)
9185     if not nres.payload:
9186       lu.LogInfo("OS %s not found on node %s, validation skipped",
9187                  osname, node)
9188
9189
9190 class LUInstanceCreate(LogicalUnit):
9191   """Create an instance.
9192
9193   """
9194   HPATH = "instance-add"
9195   HTYPE = constants.HTYPE_INSTANCE
9196   REQ_BGL = False
9197
9198   def CheckArguments(self):
9199     """Check arguments.
9200
9201     """
9202     # do not require name_check to ease forward/backward compatibility
9203     # for tools
9204     if self.op.no_install and self.op.start:
9205       self.LogInfo("No-installation mode selected, disabling startup")
9206       self.op.start = False
9207     # validate/normalize the instance name
9208     self.op.instance_name = \
9209       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9210
9211     if self.op.ip_check and not self.op.name_check:
9212       # TODO: make the ip check more flexible and not depend on the name check
9213       raise errors.OpPrereqError("Cannot do IP address check without a name"
9214                                  " check", errors.ECODE_INVAL)
9215
9216     # check nics' parameter names
9217     for nic in self.op.nics:
9218       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9219
9220     # check disks. parameter names and consistent adopt/no-adopt strategy
9221     has_adopt = has_no_adopt = False
9222     for disk in self.op.disks:
9223       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9224       if constants.IDISK_ADOPT in disk:
9225         has_adopt = True
9226       else:
9227         has_no_adopt = True
9228     if has_adopt and has_no_adopt:
9229       raise errors.OpPrereqError("Either all disks are adopted or none is",
9230                                  errors.ECODE_INVAL)
9231     if has_adopt:
9232       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9233         raise errors.OpPrereqError("Disk adoption is not supported for the"
9234                                    " '%s' disk template" %
9235                                    self.op.disk_template,
9236                                    errors.ECODE_INVAL)
9237       if self.op.iallocator is not None:
9238         raise errors.OpPrereqError("Disk adoption not allowed with an"
9239                                    " iallocator script", errors.ECODE_INVAL)
9240       if self.op.mode == constants.INSTANCE_IMPORT:
9241         raise errors.OpPrereqError("Disk adoption not allowed for"
9242                                    " instance import", errors.ECODE_INVAL)
9243     else:
9244       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9245         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9246                                    " but no 'adopt' parameter given" %
9247                                    self.op.disk_template,
9248                                    errors.ECODE_INVAL)
9249
9250     self.adopt_disks = has_adopt
9251
9252     # instance name verification
9253     if self.op.name_check:
9254       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9255       self.op.instance_name = self.hostname1.name
9256       # used in CheckPrereq for ip ping check
9257       self.check_ip = self.hostname1.ip
9258     else:
9259       self.check_ip = None
9260
9261     # file storage checks
9262     if (self.op.file_driver and
9263         not self.op.file_driver in constants.FILE_DRIVER):
9264       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9265                                  self.op.file_driver, errors.ECODE_INVAL)
9266
9267     if self.op.disk_template == constants.DT_FILE:
9268       opcodes.RequireFileStorage()
9269     elif self.op.disk_template == constants.DT_SHARED_FILE:
9270       opcodes.RequireSharedFileStorage()
9271
9272     ### Node/iallocator related checks
9273     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9274
9275     if self.op.pnode is not None:
9276       if self.op.disk_template in constants.DTS_INT_MIRROR:
9277         if self.op.snode is None:
9278           raise errors.OpPrereqError("The networked disk templates need"
9279                                      " a mirror node", errors.ECODE_INVAL)
9280       elif self.op.snode:
9281         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9282                         " template")
9283         self.op.snode = None
9284
9285     self._cds = _GetClusterDomainSecret()
9286
9287     if self.op.mode == constants.INSTANCE_IMPORT:
9288       # On import force_variant must be True, because if we forced it at
9289       # initial install, our only chance when importing it back is that it
9290       # works again!
9291       self.op.force_variant = True
9292
9293       if self.op.no_install:
9294         self.LogInfo("No-installation mode has no effect during import")
9295
9296     elif self.op.mode == constants.INSTANCE_CREATE:
9297       if self.op.os_type is None:
9298         raise errors.OpPrereqError("No guest OS specified",
9299                                    errors.ECODE_INVAL)
9300       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9301         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9302                                    " installation" % self.op.os_type,
9303                                    errors.ECODE_STATE)
9304       if self.op.disk_template is None:
9305         raise errors.OpPrereqError("No disk template specified",
9306                                    errors.ECODE_INVAL)
9307
9308     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9309       # Check handshake to ensure both clusters have the same domain secret
9310       src_handshake = self.op.source_handshake
9311       if not src_handshake:
9312         raise errors.OpPrereqError("Missing source handshake",
9313                                    errors.ECODE_INVAL)
9314
9315       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9316                                                            src_handshake)
9317       if errmsg:
9318         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9319                                    errors.ECODE_INVAL)
9320
9321       # Load and check source CA
9322       self.source_x509_ca_pem = self.op.source_x509_ca
9323       if not self.source_x509_ca_pem:
9324         raise errors.OpPrereqError("Missing source X509 CA",
9325                                    errors.ECODE_INVAL)
9326
9327       try:
9328         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9329                                                     self._cds)
9330       except OpenSSL.crypto.Error, err:
9331         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9332                                    (err, ), errors.ECODE_INVAL)
9333
9334       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9335       if errcode is not None:
9336         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9337                                    errors.ECODE_INVAL)
9338
9339       self.source_x509_ca = cert
9340
9341       src_instance_name = self.op.source_instance_name
9342       if not src_instance_name:
9343         raise errors.OpPrereqError("Missing source instance name",
9344                                    errors.ECODE_INVAL)
9345
9346       self.source_instance_name = \
9347           netutils.GetHostname(name=src_instance_name).name
9348
9349     else:
9350       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9351                                  self.op.mode, errors.ECODE_INVAL)
9352
9353   def ExpandNames(self):
9354     """ExpandNames for CreateInstance.
9355
9356     Figure out the right locks for instance creation.
9357
9358     """
9359     self.needed_locks = {}
9360
9361     instance_name = self.op.instance_name
9362     # this is just a preventive check, but someone might still add this
9363     # instance in the meantime, and creation will fail at lock-add time
9364     if instance_name in self.cfg.GetInstanceList():
9365       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9366                                  instance_name, errors.ECODE_EXISTS)
9367
9368     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9369
9370     if self.op.iallocator:
9371       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9372       # specifying a group on instance creation and then selecting nodes from
9373       # that group
9374       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9375       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9376     else:
9377       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9378       nodelist = [self.op.pnode]
9379       if self.op.snode is not None:
9380         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9381         nodelist.append(self.op.snode)
9382       self.needed_locks[locking.LEVEL_NODE] = nodelist
9383       # Lock resources of instance's primary and secondary nodes (copy to
9384       # prevent accidential modification)
9385       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9386
9387     # in case of import lock the source node too
9388     if self.op.mode == constants.INSTANCE_IMPORT:
9389       src_node = self.op.src_node
9390       src_path = self.op.src_path
9391
9392       if src_path is None:
9393         self.op.src_path = src_path = self.op.instance_name
9394
9395       if src_node is None:
9396         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9397         self.op.src_node = None
9398         if os.path.isabs(src_path):
9399           raise errors.OpPrereqError("Importing an instance from a path"
9400                                      " requires a source node option",
9401                                      errors.ECODE_INVAL)
9402       else:
9403         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9404         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9405           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9406         if not os.path.isabs(src_path):
9407           self.op.src_path = src_path = \
9408             utils.PathJoin(constants.EXPORT_DIR, src_path)
9409
9410   def _RunAllocator(self):
9411     """Run the allocator based on input opcode.
9412
9413     """
9414     nics = [n.ToDict() for n in self.nics]
9415     ial = IAllocator(self.cfg, self.rpc,
9416                      mode=constants.IALLOCATOR_MODE_ALLOC,
9417                      name=self.op.instance_name,
9418                      disk_template=self.op.disk_template,
9419                      tags=self.op.tags,
9420                      os=self.op.os_type,
9421                      vcpus=self.be_full[constants.BE_VCPUS],
9422                      memory=self.be_full[constants.BE_MAXMEM],
9423                      spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9424                      disks=self.disks,
9425                      nics=nics,
9426                      hypervisor=self.op.hypervisor,
9427                      )
9428
9429     ial.Run(self.op.iallocator)
9430
9431     if not ial.success:
9432       raise errors.OpPrereqError("Can't compute nodes using"
9433                                  " iallocator '%s': %s" %
9434                                  (self.op.iallocator, ial.info),
9435                                  errors.ECODE_NORES)
9436     if len(ial.result) != ial.required_nodes:
9437       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9438                                  " of nodes (%s), required %s" %
9439                                  (self.op.iallocator, len(ial.result),
9440                                   ial.required_nodes), errors.ECODE_FAULT)
9441     self.op.pnode = ial.result[0]
9442     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9443                  self.op.instance_name, self.op.iallocator,
9444                  utils.CommaJoin(ial.result))
9445     if ial.required_nodes == 2:
9446       self.op.snode = ial.result[1]
9447
9448   def BuildHooksEnv(self):
9449     """Build hooks env.
9450
9451     This runs on master, primary and secondary nodes of the instance.
9452
9453     """
9454     env = {
9455       "ADD_MODE": self.op.mode,
9456       }
9457     if self.op.mode == constants.INSTANCE_IMPORT:
9458       env["SRC_NODE"] = self.op.src_node
9459       env["SRC_PATH"] = self.op.src_path
9460       env["SRC_IMAGES"] = self.src_images
9461
9462     env.update(_BuildInstanceHookEnv(
9463       name=self.op.instance_name,
9464       primary_node=self.op.pnode,
9465       secondary_nodes=self.secondaries,
9466       status=self.op.start,
9467       os_type=self.op.os_type,
9468       minmem=self.be_full[constants.BE_MINMEM],
9469       maxmem=self.be_full[constants.BE_MAXMEM],
9470       vcpus=self.be_full[constants.BE_VCPUS],
9471       nics=_NICListToTuple(self, self.nics),
9472       disk_template=self.op.disk_template,
9473       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9474              for d in self.disks],
9475       bep=self.be_full,
9476       hvp=self.hv_full,
9477       hypervisor_name=self.op.hypervisor,
9478       tags=self.op.tags,
9479     ))
9480
9481     return env
9482
9483   def BuildHooksNodes(self):
9484     """Build hooks nodes.
9485
9486     """
9487     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9488     return nl, nl
9489
9490   def _ReadExportInfo(self):
9491     """Reads the export information from disk.
9492
9493     It will override the opcode source node and path with the actual
9494     information, if these two were not specified before.
9495
9496     @return: the export information
9497
9498     """
9499     assert self.op.mode == constants.INSTANCE_IMPORT
9500
9501     src_node = self.op.src_node
9502     src_path = self.op.src_path
9503
9504     if src_node is None:
9505       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9506       exp_list = self.rpc.call_export_list(locked_nodes)
9507       found = False
9508       for node in exp_list:
9509         if exp_list[node].fail_msg:
9510           continue
9511         if src_path in exp_list[node].payload:
9512           found = True
9513           self.op.src_node = src_node = node
9514           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9515                                                        src_path)
9516           break
9517       if not found:
9518         raise errors.OpPrereqError("No export found for relative path %s" %
9519                                     src_path, errors.ECODE_INVAL)
9520
9521     _CheckNodeOnline(self, src_node)
9522     result = self.rpc.call_export_info(src_node, src_path)
9523     result.Raise("No export or invalid export found in dir %s" % src_path)
9524
9525     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9526     if not export_info.has_section(constants.INISECT_EXP):
9527       raise errors.ProgrammerError("Corrupted export config",
9528                                    errors.ECODE_ENVIRON)
9529
9530     ei_version = export_info.get(constants.INISECT_EXP, "version")
9531     if (int(ei_version) != constants.EXPORT_VERSION):
9532       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9533                                  (ei_version, constants.EXPORT_VERSION),
9534                                  errors.ECODE_ENVIRON)
9535     return export_info
9536
9537   def _ReadExportParams(self, einfo):
9538     """Use export parameters as defaults.
9539
9540     In case the opcode doesn't specify (as in override) some instance
9541     parameters, then try to use them from the export information, if
9542     that declares them.
9543
9544     """
9545     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9546
9547     if self.op.disk_template is None:
9548       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9549         self.op.disk_template = einfo.get(constants.INISECT_INS,
9550                                           "disk_template")
9551         if self.op.disk_template not in constants.DISK_TEMPLATES:
9552           raise errors.OpPrereqError("Disk template specified in configuration"
9553                                      " file is not one of the allowed values:"
9554                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9555       else:
9556         raise errors.OpPrereqError("No disk template specified and the export"
9557                                    " is missing the disk_template information",
9558                                    errors.ECODE_INVAL)
9559
9560     if not self.op.disks:
9561       disks = []
9562       # TODO: import the disk iv_name too
9563       for idx in range(constants.MAX_DISKS):
9564         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9565           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9566           disks.append({constants.IDISK_SIZE: disk_sz})
9567       self.op.disks = disks
9568       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9569         raise errors.OpPrereqError("No disk info specified and the export"
9570                                    " is missing the disk information",
9571                                    errors.ECODE_INVAL)
9572
9573     if not self.op.nics:
9574       nics = []
9575       for idx in range(constants.MAX_NICS):
9576         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9577           ndict = {}
9578           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9579             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9580             ndict[name] = v
9581           nics.append(ndict)
9582         else:
9583           break
9584       self.op.nics = nics
9585
9586     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9587       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9588
9589     if (self.op.hypervisor is None and
9590         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9591       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9592
9593     if einfo.has_section(constants.INISECT_HYP):
9594       # use the export parameters but do not override the ones
9595       # specified by the user
9596       for name, value in einfo.items(constants.INISECT_HYP):
9597         if name not in self.op.hvparams:
9598           self.op.hvparams[name] = value
9599
9600     if einfo.has_section(constants.INISECT_BEP):
9601       # use the parameters, without overriding
9602       for name, value in einfo.items(constants.INISECT_BEP):
9603         if name not in self.op.beparams:
9604           self.op.beparams[name] = value
9605         # Compatibility for the old "memory" be param
9606         if name == constants.BE_MEMORY:
9607           if constants.BE_MAXMEM not in self.op.beparams:
9608             self.op.beparams[constants.BE_MAXMEM] = value
9609           if constants.BE_MINMEM not in self.op.beparams:
9610             self.op.beparams[constants.BE_MINMEM] = value
9611     else:
9612       # try to read the parameters old style, from the main section
9613       for name in constants.BES_PARAMETERS:
9614         if (name not in self.op.beparams and
9615             einfo.has_option(constants.INISECT_INS, name)):
9616           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9617
9618     if einfo.has_section(constants.INISECT_OSP):
9619       # use the parameters, without overriding
9620       for name, value in einfo.items(constants.INISECT_OSP):
9621         if name not in self.op.osparams:
9622           self.op.osparams[name] = value
9623
9624   def _RevertToDefaults(self, cluster):
9625     """Revert the instance parameters to the default values.
9626
9627     """
9628     # hvparams
9629     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9630     for name in self.op.hvparams.keys():
9631       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9632         del self.op.hvparams[name]
9633     # beparams
9634     be_defs = cluster.SimpleFillBE({})
9635     for name in self.op.beparams.keys():
9636       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9637         del self.op.beparams[name]
9638     # nic params
9639     nic_defs = cluster.SimpleFillNIC({})
9640     for nic in self.op.nics:
9641       for name in constants.NICS_PARAMETERS:
9642         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9643           del nic[name]
9644     # osparams
9645     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9646     for name in self.op.osparams.keys():
9647       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9648         del self.op.osparams[name]
9649
9650   def _CalculateFileStorageDir(self):
9651     """Calculate final instance file storage dir.
9652
9653     """
9654     # file storage dir calculation/check
9655     self.instance_file_storage_dir = None
9656     if self.op.disk_template in constants.DTS_FILEBASED:
9657       # build the full file storage dir path
9658       joinargs = []
9659
9660       if self.op.disk_template == constants.DT_SHARED_FILE:
9661         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9662       else:
9663         get_fsd_fn = self.cfg.GetFileStorageDir
9664
9665       cfg_storagedir = get_fsd_fn()
9666       if not cfg_storagedir:
9667         raise errors.OpPrereqError("Cluster file storage dir not defined")
9668       joinargs.append(cfg_storagedir)
9669
9670       if self.op.file_storage_dir is not None:
9671         joinargs.append(self.op.file_storage_dir)
9672
9673       joinargs.append(self.op.instance_name)
9674
9675       # pylint: disable=W0142
9676       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9677
9678   def CheckPrereq(self): # pylint: disable=R0914
9679     """Check prerequisites.
9680
9681     """
9682     self._CalculateFileStorageDir()
9683
9684     if self.op.mode == constants.INSTANCE_IMPORT:
9685       export_info = self._ReadExportInfo()
9686       self._ReadExportParams(export_info)
9687       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9688     else:
9689       self._old_instance_name = None
9690
9691     if (not self.cfg.GetVGName() and
9692         self.op.disk_template not in constants.DTS_NOT_LVM):
9693       raise errors.OpPrereqError("Cluster does not support lvm-based"
9694                                  " instances", errors.ECODE_STATE)
9695
9696     if (self.op.hypervisor is None or
9697         self.op.hypervisor == constants.VALUE_AUTO):
9698       self.op.hypervisor = self.cfg.GetHypervisorType()
9699
9700     cluster = self.cfg.GetClusterInfo()
9701     enabled_hvs = cluster.enabled_hypervisors
9702     if self.op.hypervisor not in enabled_hvs:
9703       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9704                                  " cluster (%s)" % (self.op.hypervisor,
9705                                   ",".join(enabled_hvs)),
9706                                  errors.ECODE_STATE)
9707
9708     # Check tag validity
9709     for tag in self.op.tags:
9710       objects.TaggableObject.ValidateTag(tag)
9711
9712     # check hypervisor parameter syntax (locally)
9713     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9714     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9715                                       self.op.hvparams)
9716     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9717     hv_type.CheckParameterSyntax(filled_hvp)
9718     self.hv_full = filled_hvp
9719     # check that we don't specify global parameters on an instance
9720     _CheckGlobalHvParams(self.op.hvparams)
9721
9722     # fill and remember the beparams dict
9723     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9724     for param, value in self.op.beparams.iteritems():
9725       if value == constants.VALUE_AUTO:
9726         self.op.beparams[param] = default_beparams[param]
9727     objects.UpgradeBeParams(self.op.beparams)
9728     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9729     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9730
9731     # build os parameters
9732     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9733
9734     # now that hvp/bep are in final format, let's reset to defaults,
9735     # if told to do so
9736     if self.op.identify_defaults:
9737       self._RevertToDefaults(cluster)
9738
9739     # NIC buildup
9740     self.nics = []
9741     for idx, nic in enumerate(self.op.nics):
9742       nic_mode_req = nic.get(constants.INIC_MODE, None)
9743       nic_mode = nic_mode_req
9744       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9745         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9746
9747       # in routed mode, for the first nic, the default ip is 'auto'
9748       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9749         default_ip_mode = constants.VALUE_AUTO
9750       else:
9751         default_ip_mode = constants.VALUE_NONE
9752
9753       # ip validity checks
9754       ip = nic.get(constants.INIC_IP, default_ip_mode)
9755       if ip is None or ip.lower() == constants.VALUE_NONE:
9756         nic_ip = None
9757       elif ip.lower() == constants.VALUE_AUTO:
9758         if not self.op.name_check:
9759           raise errors.OpPrereqError("IP address set to auto but name checks"
9760                                      " have been skipped",
9761                                      errors.ECODE_INVAL)
9762         nic_ip = self.hostname1.ip
9763       else:
9764         if not netutils.IPAddress.IsValid(ip):
9765           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9766                                      errors.ECODE_INVAL)
9767         nic_ip = ip
9768
9769       # TODO: check the ip address for uniqueness
9770       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9771         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9772                                    errors.ECODE_INVAL)
9773
9774       # MAC address verification
9775       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9776       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9777         mac = utils.NormalizeAndValidateMac(mac)
9778
9779         try:
9780           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9781         except errors.ReservationError:
9782           raise errors.OpPrereqError("MAC address %s already in use"
9783                                      " in cluster" % mac,
9784                                      errors.ECODE_NOTUNIQUE)
9785
9786       #  Build nic parameters
9787       link = nic.get(constants.INIC_LINK, None)
9788       if link == constants.VALUE_AUTO:
9789         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9790       nicparams = {}
9791       if nic_mode_req:
9792         nicparams[constants.NIC_MODE] = nic_mode
9793       if link:
9794         nicparams[constants.NIC_LINK] = link
9795
9796       check_params = cluster.SimpleFillNIC(nicparams)
9797       objects.NIC.CheckParameterSyntax(check_params)
9798       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9799
9800     # disk checks/pre-build
9801     default_vg = self.cfg.GetVGName()
9802     self.disks = []
9803     for disk in self.op.disks:
9804       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9805       if mode not in constants.DISK_ACCESS_SET:
9806         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9807                                    mode, errors.ECODE_INVAL)
9808       size = disk.get(constants.IDISK_SIZE, None)
9809       if size is None:
9810         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9811       try:
9812         size = int(size)
9813       except (TypeError, ValueError):
9814         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9815                                    errors.ECODE_INVAL)
9816
9817       data_vg = disk.get(constants.IDISK_VG, default_vg)
9818       new_disk = {
9819         constants.IDISK_SIZE: size,
9820         constants.IDISK_MODE: mode,
9821         constants.IDISK_VG: data_vg,
9822         }
9823       if constants.IDISK_METAVG in disk:
9824         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9825       if constants.IDISK_ADOPT in disk:
9826         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9827       self.disks.append(new_disk)
9828
9829     if self.op.mode == constants.INSTANCE_IMPORT:
9830       disk_images = []
9831       for idx in range(len(self.disks)):
9832         option = "disk%d_dump" % idx
9833         if export_info.has_option(constants.INISECT_INS, option):
9834           # FIXME: are the old os-es, disk sizes, etc. useful?
9835           export_name = export_info.get(constants.INISECT_INS, option)
9836           image = utils.PathJoin(self.op.src_path, export_name)
9837           disk_images.append(image)
9838         else:
9839           disk_images.append(False)
9840
9841       self.src_images = disk_images
9842
9843       if self.op.instance_name == self._old_instance_name:
9844         for idx, nic in enumerate(self.nics):
9845           if nic.mac == constants.VALUE_AUTO:
9846             nic_mac_ini = "nic%d_mac" % idx
9847             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9848
9849     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9850
9851     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9852     if self.op.ip_check:
9853       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9854         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9855                                    (self.check_ip, self.op.instance_name),
9856                                    errors.ECODE_NOTUNIQUE)
9857
9858     #### mac address generation
9859     # By generating here the mac address both the allocator and the hooks get
9860     # the real final mac address rather than the 'auto' or 'generate' value.
9861     # There is a race condition between the generation and the instance object
9862     # creation, which means that we know the mac is valid now, but we're not
9863     # sure it will be when we actually add the instance. If things go bad
9864     # adding the instance will abort because of a duplicate mac, and the
9865     # creation job will fail.
9866     for nic in self.nics:
9867       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9868         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9869
9870     #### allocator run
9871
9872     if self.op.iallocator is not None:
9873       self._RunAllocator()
9874
9875     # Release all unneeded node locks
9876     _ReleaseLocks(self, locking.LEVEL_NODE,
9877                   keep=filter(None, [self.op.pnode, self.op.snode,
9878                                      self.op.src_node]))
9879     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9880                   keep=filter(None, [self.op.pnode, self.op.snode,
9881                                      self.op.src_node]))
9882
9883     #### node related checks
9884
9885     # check primary node
9886     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9887     assert self.pnode is not None, \
9888       "Cannot retrieve locked node %s" % self.op.pnode
9889     if pnode.offline:
9890       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9891                                  pnode.name, errors.ECODE_STATE)
9892     if pnode.drained:
9893       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9894                                  pnode.name, errors.ECODE_STATE)
9895     if not pnode.vm_capable:
9896       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9897                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9898
9899     self.secondaries = []
9900
9901     # mirror node verification
9902     if self.op.disk_template in constants.DTS_INT_MIRROR:
9903       if self.op.snode == pnode.name:
9904         raise errors.OpPrereqError("The secondary node cannot be the"
9905                                    " primary node", errors.ECODE_INVAL)
9906       _CheckNodeOnline(self, self.op.snode)
9907       _CheckNodeNotDrained(self, self.op.snode)
9908       _CheckNodeVmCapable(self, self.op.snode)
9909       self.secondaries.append(self.op.snode)
9910
9911       snode = self.cfg.GetNodeInfo(self.op.snode)
9912       if pnode.group != snode.group:
9913         self.LogWarning("The primary and secondary nodes are in two"
9914                         " different node groups; the disk parameters"
9915                         " from the first disk's node group will be"
9916                         " used")
9917
9918     nodenames = [pnode.name] + self.secondaries
9919
9920     # Verify instance specs
9921     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9922     ispec = {
9923       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9924       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9925       constants.ISPEC_DISK_COUNT: len(self.disks),
9926       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9927       constants.ISPEC_NIC_COUNT: len(self.nics),
9928       constants.ISPEC_SPINDLE_USE: spindle_use,
9929       }
9930
9931     group_info = self.cfg.GetNodeGroup(pnode.group)
9932     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9933     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9934     if not self.op.ignore_ipolicy and res:
9935       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9936                                   " policy: %s") % (pnode.group,
9937                                                     utils.CommaJoin(res)),
9938                                   errors.ECODE_INVAL)
9939
9940     if not self.adopt_disks:
9941       if self.op.disk_template == constants.DT_RBD:
9942         # _CheckRADOSFreeSpace() is just a placeholder.
9943         # Any function that checks prerequisites can be placed here.
9944         # Check if there is enough space on the RADOS cluster.
9945         _CheckRADOSFreeSpace()
9946       else:
9947         # Check lv size requirements, if not adopting
9948         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9949         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9950
9951     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9952       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9953                                 disk[constants.IDISK_ADOPT])
9954                      for disk in self.disks])
9955       if len(all_lvs) != len(self.disks):
9956         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9957                                    errors.ECODE_INVAL)
9958       for lv_name in all_lvs:
9959         try:
9960           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9961           # to ReserveLV uses the same syntax
9962           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9963         except errors.ReservationError:
9964           raise errors.OpPrereqError("LV named %s used by another instance" %
9965                                      lv_name, errors.ECODE_NOTUNIQUE)
9966
9967       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9968       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9969
9970       node_lvs = self.rpc.call_lv_list([pnode.name],
9971                                        vg_names.payload.keys())[pnode.name]
9972       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9973       node_lvs = node_lvs.payload
9974
9975       delta = all_lvs.difference(node_lvs.keys())
9976       if delta:
9977         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9978                                    utils.CommaJoin(delta),
9979                                    errors.ECODE_INVAL)
9980       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9981       if online_lvs:
9982         raise errors.OpPrereqError("Online logical volumes found, cannot"
9983                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9984                                    errors.ECODE_STATE)
9985       # update the size of disk based on what is found
9986       for dsk in self.disks:
9987         dsk[constants.IDISK_SIZE] = \
9988           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9989                                         dsk[constants.IDISK_ADOPT])][0]))
9990
9991     elif self.op.disk_template == constants.DT_BLOCK:
9992       # Normalize and de-duplicate device paths
9993       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9994                        for disk in self.disks])
9995       if len(all_disks) != len(self.disks):
9996         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9997                                    errors.ECODE_INVAL)
9998       baddisks = [d for d in all_disks
9999                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10000       if baddisks:
10001         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10002                                    " cannot be adopted" %
10003                                    (", ".join(baddisks),
10004                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10005                                    errors.ECODE_INVAL)
10006
10007       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10008                                             list(all_disks))[pnode.name]
10009       node_disks.Raise("Cannot get block device information from node %s" %
10010                        pnode.name)
10011       node_disks = node_disks.payload
10012       delta = all_disks.difference(node_disks.keys())
10013       if delta:
10014         raise errors.OpPrereqError("Missing block device(s): %s" %
10015                                    utils.CommaJoin(delta),
10016                                    errors.ECODE_INVAL)
10017       for dsk in self.disks:
10018         dsk[constants.IDISK_SIZE] = \
10019           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10020
10021     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10022
10023     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10024     # check OS parameters (remotely)
10025     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10026
10027     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10028
10029     # memory check on primary node
10030     #TODO(dynmem): use MINMEM for checking
10031     if self.op.start:
10032       _CheckNodeFreeMemory(self, self.pnode.name,
10033                            "creating instance %s" % self.op.instance_name,
10034                            self.be_full[constants.BE_MAXMEM],
10035                            self.op.hypervisor)
10036
10037     self.dry_run_result = list(nodenames)
10038
10039   def Exec(self, feedback_fn):
10040     """Create and add the instance to the cluster.
10041
10042     """
10043     instance = self.op.instance_name
10044     pnode_name = self.pnode.name
10045
10046     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10047                 self.owned_locks(locking.LEVEL_NODE)), \
10048       "Node locks differ from node resource locks"
10049
10050     ht_kind = self.op.hypervisor
10051     if ht_kind in constants.HTS_REQ_PORT:
10052       network_port = self.cfg.AllocatePort()
10053     else:
10054       network_port = None
10055
10056     # This is ugly but we got a chicken-egg problem here
10057     # We can only take the group disk parameters, as the instance
10058     # has no disks yet (we are generating them right here).
10059     node = self.cfg.GetNodeInfo(pnode_name)
10060     nodegroup = self.cfg.GetNodeGroup(node.group)
10061     disks = _GenerateDiskTemplate(self,
10062                                   self.op.disk_template,
10063                                   instance, pnode_name,
10064                                   self.secondaries,
10065                                   self.disks,
10066                                   self.instance_file_storage_dir,
10067                                   self.op.file_driver,
10068                                   0,
10069                                   feedback_fn,
10070                                   self.cfg.GetGroupDiskParams(nodegroup))
10071
10072     iobj = objects.Instance(name=instance, os=self.op.os_type,
10073                             primary_node=pnode_name,
10074                             nics=self.nics, disks=disks,
10075                             disk_template=self.op.disk_template,
10076                             admin_state=constants.ADMINST_DOWN,
10077                             network_port=network_port,
10078                             beparams=self.op.beparams,
10079                             hvparams=self.op.hvparams,
10080                             hypervisor=self.op.hypervisor,
10081                             osparams=self.op.osparams,
10082                             )
10083
10084     if self.op.tags:
10085       for tag in self.op.tags:
10086         iobj.AddTag(tag)
10087
10088     if self.adopt_disks:
10089       if self.op.disk_template == constants.DT_PLAIN:
10090         # rename LVs to the newly-generated names; we need to construct
10091         # 'fake' LV disks with the old data, plus the new unique_id
10092         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10093         rename_to = []
10094         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10095           rename_to.append(t_dsk.logical_id)
10096           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10097           self.cfg.SetDiskID(t_dsk, pnode_name)
10098         result = self.rpc.call_blockdev_rename(pnode_name,
10099                                                zip(tmp_disks, rename_to))
10100         result.Raise("Failed to rename adoped LVs")
10101     else:
10102       feedback_fn("* creating instance disks...")
10103       try:
10104         _CreateDisks(self, iobj)
10105       except errors.OpExecError:
10106         self.LogWarning("Device creation failed, reverting...")
10107         try:
10108           _RemoveDisks(self, iobj)
10109         finally:
10110           self.cfg.ReleaseDRBDMinors(instance)
10111           raise
10112
10113     feedback_fn("adding instance %s to cluster config" % instance)
10114
10115     self.cfg.AddInstance(iobj, self.proc.GetECId())
10116
10117     # Declare that we don't want to remove the instance lock anymore, as we've
10118     # added the instance to the config
10119     del self.remove_locks[locking.LEVEL_INSTANCE]
10120
10121     if self.op.mode == constants.INSTANCE_IMPORT:
10122       # Release unused nodes
10123       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10124     else:
10125       # Release all nodes
10126       _ReleaseLocks(self, locking.LEVEL_NODE)
10127
10128     disk_abort = False
10129     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10130       feedback_fn("* wiping instance disks...")
10131       try:
10132         _WipeDisks(self, iobj)
10133       except errors.OpExecError, err:
10134         logging.exception("Wiping disks failed")
10135         self.LogWarning("Wiping instance disks failed (%s)", err)
10136         disk_abort = True
10137
10138     if disk_abort:
10139       # Something is already wrong with the disks, don't do anything else
10140       pass
10141     elif self.op.wait_for_sync:
10142       disk_abort = not _WaitForSync(self, iobj)
10143     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10144       # make sure the disks are not degraded (still sync-ing is ok)
10145       feedback_fn("* checking mirrors status")
10146       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10147     else:
10148       disk_abort = False
10149
10150     if disk_abort:
10151       _RemoveDisks(self, iobj)
10152       self.cfg.RemoveInstance(iobj.name)
10153       # Make sure the instance lock gets removed
10154       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10155       raise errors.OpExecError("There are some degraded disks for"
10156                                " this instance")
10157
10158     # Release all node resource locks
10159     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10160
10161     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10162       # we need to set the disks ID to the primary node, since the
10163       # preceding code might or might have not done it, depending on
10164       # disk template and other options
10165       for disk in iobj.disks:
10166         self.cfg.SetDiskID(disk, pnode_name)
10167       if self.op.mode == constants.INSTANCE_CREATE:
10168         if not self.op.no_install:
10169           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10170                         not self.op.wait_for_sync)
10171           if pause_sync:
10172             feedback_fn("* pausing disk sync to install instance OS")
10173             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10174                                                               (iobj.disks,
10175                                                                iobj), True)
10176             for idx, success in enumerate(result.payload):
10177               if not success:
10178                 logging.warn("pause-sync of instance %s for disk %d failed",
10179                              instance, idx)
10180
10181           feedback_fn("* running the instance OS create scripts...")
10182           # FIXME: pass debug option from opcode to backend
10183           os_add_result = \
10184             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10185                                           self.op.debug_level)
10186           if pause_sync:
10187             feedback_fn("* resuming disk sync")
10188             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10189                                                               (iobj.disks,
10190                                                                iobj), False)
10191             for idx, success in enumerate(result.payload):
10192               if not success:
10193                 logging.warn("resume-sync of instance %s for disk %d failed",
10194                              instance, idx)
10195
10196           os_add_result.Raise("Could not add os for instance %s"
10197                               " on node %s" % (instance, pnode_name))
10198
10199       else:
10200         if self.op.mode == constants.INSTANCE_IMPORT:
10201           feedback_fn("* running the instance OS import scripts...")
10202
10203           transfers = []
10204
10205           for idx, image in enumerate(self.src_images):
10206             if not image:
10207               continue
10208
10209             # FIXME: pass debug option from opcode to backend
10210             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10211                                                constants.IEIO_FILE, (image, ),
10212                                                constants.IEIO_SCRIPT,
10213                                                (iobj.disks[idx], idx),
10214                                                None)
10215             transfers.append(dt)
10216
10217           import_result = \
10218             masterd.instance.TransferInstanceData(self, feedback_fn,
10219                                                   self.op.src_node, pnode_name,
10220                                                   self.pnode.secondary_ip,
10221                                                   iobj, transfers)
10222           if not compat.all(import_result):
10223             self.LogWarning("Some disks for instance %s on node %s were not"
10224                             " imported successfully" % (instance, pnode_name))
10225
10226           rename_from = self._old_instance_name
10227
10228         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10229           feedback_fn("* preparing remote import...")
10230           # The source cluster will stop the instance before attempting to make
10231           # a connection. In some cases stopping an instance can take a long
10232           # time, hence the shutdown timeout is added to the connection
10233           # timeout.
10234           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10235                              self.op.source_shutdown_timeout)
10236           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10237
10238           assert iobj.primary_node == self.pnode.name
10239           disk_results = \
10240             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10241                                           self.source_x509_ca,
10242                                           self._cds, timeouts)
10243           if not compat.all(disk_results):
10244             # TODO: Should the instance still be started, even if some disks
10245             # failed to import (valid for local imports, too)?
10246             self.LogWarning("Some disks for instance %s on node %s were not"
10247                             " imported successfully" % (instance, pnode_name))
10248
10249           rename_from = self.source_instance_name
10250
10251         else:
10252           # also checked in the prereq part
10253           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10254                                        % self.op.mode)
10255
10256         # Run rename script on newly imported instance
10257         assert iobj.name == instance
10258         feedback_fn("Running rename script for %s" % instance)
10259         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10260                                                    rename_from,
10261                                                    self.op.debug_level)
10262         if result.fail_msg:
10263           self.LogWarning("Failed to run rename script for %s on node"
10264                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10265
10266     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10267
10268     if self.op.start:
10269       iobj.admin_state = constants.ADMINST_UP
10270       self.cfg.Update(iobj, feedback_fn)
10271       logging.info("Starting instance %s on node %s", instance, pnode_name)
10272       feedback_fn("* starting instance...")
10273       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10274                                             False)
10275       result.Raise("Could not start instance")
10276
10277     return list(iobj.all_nodes)
10278
10279
10280 def _CheckRADOSFreeSpace():
10281   """Compute disk size requirements inside the RADOS cluster.
10282
10283   """
10284   # For the RADOS cluster we assume there is always enough space.
10285   pass
10286
10287
10288 class LUInstanceConsole(NoHooksLU):
10289   """Connect to an instance's console.
10290
10291   This is somewhat special in that it returns the command line that
10292   you need to run on the master node in order to connect to the
10293   console.
10294
10295   """
10296   REQ_BGL = False
10297
10298   def ExpandNames(self):
10299     self.share_locks = _ShareAll()
10300     self._ExpandAndLockInstance()
10301
10302   def CheckPrereq(self):
10303     """Check prerequisites.
10304
10305     This checks that the instance is in the cluster.
10306
10307     """
10308     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10309     assert self.instance is not None, \
10310       "Cannot retrieve locked instance %s" % self.op.instance_name
10311     _CheckNodeOnline(self, self.instance.primary_node)
10312
10313   def Exec(self, feedback_fn):
10314     """Connect to the console of an instance
10315
10316     """
10317     instance = self.instance
10318     node = instance.primary_node
10319
10320     node_insts = self.rpc.call_instance_list([node],
10321                                              [instance.hypervisor])[node]
10322     node_insts.Raise("Can't get node information from %s" % node)
10323
10324     if instance.name not in node_insts.payload:
10325       if instance.admin_state == constants.ADMINST_UP:
10326         state = constants.INSTST_ERRORDOWN
10327       elif instance.admin_state == constants.ADMINST_DOWN:
10328         state = constants.INSTST_ADMINDOWN
10329       else:
10330         state = constants.INSTST_ADMINOFFLINE
10331       raise errors.OpExecError("Instance %s is not running (state %s)" %
10332                                (instance.name, state))
10333
10334     logging.debug("Connecting to console of %s on %s", instance.name, node)
10335
10336     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10337
10338
10339 def _GetInstanceConsole(cluster, instance):
10340   """Returns console information for an instance.
10341
10342   @type cluster: L{objects.Cluster}
10343   @type instance: L{objects.Instance}
10344   @rtype: dict
10345
10346   """
10347   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10348   # beparams and hvparams are passed separately, to avoid editing the
10349   # instance and then saving the defaults in the instance itself.
10350   hvparams = cluster.FillHV(instance)
10351   beparams = cluster.FillBE(instance)
10352   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10353
10354   assert console.instance == instance.name
10355   assert console.Validate()
10356
10357   return console.ToDict()
10358
10359
10360 class LUInstanceReplaceDisks(LogicalUnit):
10361   """Replace the disks of an instance.
10362
10363   """
10364   HPATH = "mirrors-replace"
10365   HTYPE = constants.HTYPE_INSTANCE
10366   REQ_BGL = False
10367
10368   def CheckArguments(self):
10369     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10370                                   self.op.iallocator)
10371
10372   def ExpandNames(self):
10373     self._ExpandAndLockInstance()
10374
10375     assert locking.LEVEL_NODE not in self.needed_locks
10376     assert locking.LEVEL_NODE_RES not in self.needed_locks
10377     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10378
10379     assert self.op.iallocator is None or self.op.remote_node is None, \
10380       "Conflicting options"
10381
10382     if self.op.remote_node is not None:
10383       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10384
10385       # Warning: do not remove the locking of the new secondary here
10386       # unless DRBD8.AddChildren is changed to work in parallel;
10387       # currently it doesn't since parallel invocations of
10388       # FindUnusedMinor will conflict
10389       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10390       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10391     else:
10392       self.needed_locks[locking.LEVEL_NODE] = []
10393       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10394
10395       if self.op.iallocator is not None:
10396         # iallocator will select a new node in the same group
10397         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10398
10399     self.needed_locks[locking.LEVEL_NODE_RES] = []
10400
10401     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10402                                    self.op.iallocator, self.op.remote_node,
10403                                    self.op.disks, False, self.op.early_release,
10404                                    self.op.ignore_ipolicy)
10405
10406     self.tasklets = [self.replacer]
10407
10408   def DeclareLocks(self, level):
10409     if level == locking.LEVEL_NODEGROUP:
10410       assert self.op.remote_node is None
10411       assert self.op.iallocator is not None
10412       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10413
10414       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10415       # Lock all groups used by instance optimistically; this requires going
10416       # via the node before it's locked, requiring verification later on
10417       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10418         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10419
10420     elif level == locking.LEVEL_NODE:
10421       if self.op.iallocator is not None:
10422         assert self.op.remote_node is None
10423         assert not self.needed_locks[locking.LEVEL_NODE]
10424
10425         # Lock member nodes of all locked groups
10426         self.needed_locks[locking.LEVEL_NODE] = [node_name
10427           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10428           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10429       else:
10430         self._LockInstancesNodes()
10431     elif level == locking.LEVEL_NODE_RES:
10432       # Reuse node locks
10433       self.needed_locks[locking.LEVEL_NODE_RES] = \
10434         self.needed_locks[locking.LEVEL_NODE]
10435
10436   def BuildHooksEnv(self):
10437     """Build hooks env.
10438
10439     This runs on the master, the primary and all the secondaries.
10440
10441     """
10442     instance = self.replacer.instance
10443     env = {
10444       "MODE": self.op.mode,
10445       "NEW_SECONDARY": self.op.remote_node,
10446       "OLD_SECONDARY": instance.secondary_nodes[0],
10447       }
10448     env.update(_BuildInstanceHookEnvByObject(self, instance))
10449     return env
10450
10451   def BuildHooksNodes(self):
10452     """Build hooks nodes.
10453
10454     """
10455     instance = self.replacer.instance
10456     nl = [
10457       self.cfg.GetMasterNode(),
10458       instance.primary_node,
10459       ]
10460     if self.op.remote_node is not None:
10461       nl.append(self.op.remote_node)
10462     return nl, nl
10463
10464   def CheckPrereq(self):
10465     """Check prerequisites.
10466
10467     """
10468     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10469             self.op.iallocator is None)
10470
10471     # Verify if node group locks are still correct
10472     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10473     if owned_groups:
10474       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10475
10476     return LogicalUnit.CheckPrereq(self)
10477
10478
10479 class TLReplaceDisks(Tasklet):
10480   """Replaces disks for an instance.
10481
10482   Note: Locking is not within the scope of this class.
10483
10484   """
10485   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10486                disks, delay_iallocator, early_release, ignore_ipolicy):
10487     """Initializes this class.
10488
10489     """
10490     Tasklet.__init__(self, lu)
10491
10492     # Parameters
10493     self.instance_name = instance_name
10494     self.mode = mode
10495     self.iallocator_name = iallocator_name
10496     self.remote_node = remote_node
10497     self.disks = disks
10498     self.delay_iallocator = delay_iallocator
10499     self.early_release = early_release
10500     self.ignore_ipolicy = ignore_ipolicy
10501
10502     # Runtime data
10503     self.instance = None
10504     self.new_node = None
10505     self.target_node = None
10506     self.other_node = None
10507     self.remote_node_info = None
10508     self.node_secondary_ip = None
10509
10510   @staticmethod
10511   def CheckArguments(mode, remote_node, iallocator):
10512     """Helper function for users of this class.
10513
10514     """
10515     # check for valid parameter combination
10516     if mode == constants.REPLACE_DISK_CHG:
10517       if remote_node is None and iallocator is None:
10518         raise errors.OpPrereqError("When changing the secondary either an"
10519                                    " iallocator script must be used or the"
10520                                    " new node given", errors.ECODE_INVAL)
10521
10522       if remote_node is not None and iallocator is not None:
10523         raise errors.OpPrereqError("Give either the iallocator or the new"
10524                                    " secondary, not both", errors.ECODE_INVAL)
10525
10526     elif remote_node is not None or iallocator is not None:
10527       # Not replacing the secondary
10528       raise errors.OpPrereqError("The iallocator and new node options can"
10529                                  " only be used when changing the"
10530                                  " secondary node", errors.ECODE_INVAL)
10531
10532   @staticmethod
10533   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10534     """Compute a new secondary node using an IAllocator.
10535
10536     """
10537     ial = IAllocator(lu.cfg, lu.rpc,
10538                      mode=constants.IALLOCATOR_MODE_RELOC,
10539                      name=instance_name,
10540                      relocate_from=list(relocate_from))
10541
10542     ial.Run(iallocator_name)
10543
10544     if not ial.success:
10545       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10546                                  " %s" % (iallocator_name, ial.info),
10547                                  errors.ECODE_NORES)
10548
10549     if len(ial.result) != ial.required_nodes:
10550       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10551                                  " of nodes (%s), required %s" %
10552                                  (iallocator_name,
10553                                   len(ial.result), ial.required_nodes),
10554                                  errors.ECODE_FAULT)
10555
10556     remote_node_name = ial.result[0]
10557
10558     lu.LogInfo("Selected new secondary for instance '%s': %s",
10559                instance_name, remote_node_name)
10560
10561     return remote_node_name
10562
10563   def _FindFaultyDisks(self, node_name):
10564     """Wrapper for L{_FindFaultyInstanceDisks}.
10565
10566     """
10567     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10568                                     node_name, True)
10569
10570   def _CheckDisksActivated(self, instance):
10571     """Checks if the instance disks are activated.
10572
10573     @param instance: The instance to check disks
10574     @return: True if they are activated, False otherwise
10575
10576     """
10577     nodes = instance.all_nodes
10578
10579     for idx, dev in enumerate(instance.disks):
10580       for node in nodes:
10581         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10582         self.cfg.SetDiskID(dev, node)
10583
10584         result = _BlockdevFind(self, node, dev, instance)
10585
10586         if result.offline:
10587           continue
10588         elif result.fail_msg or not result.payload:
10589           return False
10590
10591     return True
10592
10593   def CheckPrereq(self):
10594     """Check prerequisites.
10595
10596     This checks that the instance is in the cluster.
10597
10598     """
10599     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10600     assert instance is not None, \
10601       "Cannot retrieve locked instance %s" % self.instance_name
10602
10603     if instance.disk_template != constants.DT_DRBD8:
10604       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10605                                  " instances", errors.ECODE_INVAL)
10606
10607     if len(instance.secondary_nodes) != 1:
10608       raise errors.OpPrereqError("The instance has a strange layout,"
10609                                  " expected one secondary but found %d" %
10610                                  len(instance.secondary_nodes),
10611                                  errors.ECODE_FAULT)
10612
10613     if not self.delay_iallocator:
10614       self._CheckPrereq2()
10615
10616   def _CheckPrereq2(self):
10617     """Check prerequisites, second part.
10618
10619     This function should always be part of CheckPrereq. It was separated and is
10620     now called from Exec because during node evacuation iallocator was only
10621     called with an unmodified cluster model, not taking planned changes into
10622     account.
10623
10624     """
10625     instance = self.instance
10626     secondary_node = instance.secondary_nodes[0]
10627
10628     if self.iallocator_name is None:
10629       remote_node = self.remote_node
10630     else:
10631       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10632                                        instance.name, instance.secondary_nodes)
10633
10634     if remote_node is None:
10635       self.remote_node_info = None
10636     else:
10637       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10638              "Remote node '%s' is not locked" % remote_node
10639
10640       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10641       assert self.remote_node_info is not None, \
10642         "Cannot retrieve locked node %s" % remote_node
10643
10644     if remote_node == self.instance.primary_node:
10645       raise errors.OpPrereqError("The specified node is the primary node of"
10646                                  " the instance", errors.ECODE_INVAL)
10647
10648     if remote_node == secondary_node:
10649       raise errors.OpPrereqError("The specified node is already the"
10650                                  " secondary node of the instance",
10651                                  errors.ECODE_INVAL)
10652
10653     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10654                                     constants.REPLACE_DISK_CHG):
10655       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10656                                  errors.ECODE_INVAL)
10657
10658     if self.mode == constants.REPLACE_DISK_AUTO:
10659       if not self._CheckDisksActivated(instance):
10660         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10661                                    " first" % self.instance_name,
10662                                    errors.ECODE_STATE)
10663       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10664       faulty_secondary = self._FindFaultyDisks(secondary_node)
10665
10666       if faulty_primary and faulty_secondary:
10667         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10668                                    " one node and can not be repaired"
10669                                    " automatically" % self.instance_name,
10670                                    errors.ECODE_STATE)
10671
10672       if faulty_primary:
10673         self.disks = faulty_primary
10674         self.target_node = instance.primary_node
10675         self.other_node = secondary_node
10676         check_nodes = [self.target_node, self.other_node]
10677       elif faulty_secondary:
10678         self.disks = faulty_secondary
10679         self.target_node = secondary_node
10680         self.other_node = instance.primary_node
10681         check_nodes = [self.target_node, self.other_node]
10682       else:
10683         self.disks = []
10684         check_nodes = []
10685
10686     else:
10687       # Non-automatic modes
10688       if self.mode == constants.REPLACE_DISK_PRI:
10689         self.target_node = instance.primary_node
10690         self.other_node = secondary_node
10691         check_nodes = [self.target_node, self.other_node]
10692
10693       elif self.mode == constants.REPLACE_DISK_SEC:
10694         self.target_node = secondary_node
10695         self.other_node = instance.primary_node
10696         check_nodes = [self.target_node, self.other_node]
10697
10698       elif self.mode == constants.REPLACE_DISK_CHG:
10699         self.new_node = remote_node
10700         self.other_node = instance.primary_node
10701         self.target_node = secondary_node
10702         check_nodes = [self.new_node, self.other_node]
10703
10704         _CheckNodeNotDrained(self.lu, remote_node)
10705         _CheckNodeVmCapable(self.lu, remote_node)
10706
10707         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10708         assert old_node_info is not None
10709         if old_node_info.offline and not self.early_release:
10710           # doesn't make sense to delay the release
10711           self.early_release = True
10712           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10713                           " early-release mode", secondary_node)
10714
10715       else:
10716         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10717                                      self.mode)
10718
10719       # If not specified all disks should be replaced
10720       if not self.disks:
10721         self.disks = range(len(self.instance.disks))
10722
10723     # TODO: This is ugly, but right now we can't distinguish between internal
10724     # submitted opcode and external one. We should fix that.
10725     if self.remote_node_info:
10726       # We change the node, lets verify it still meets instance policy
10727       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10728       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10729                                        new_group_info)
10730       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10731                               ignore=self.ignore_ipolicy)
10732
10733     for node in check_nodes:
10734       _CheckNodeOnline(self.lu, node)
10735
10736     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10737                                                           self.other_node,
10738                                                           self.target_node]
10739                               if node_name is not None)
10740
10741     # Release unneeded node and node resource locks
10742     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10743     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10744
10745     # Release any owned node group
10746     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10747       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10748
10749     # Check whether disks are valid
10750     for disk_idx in self.disks:
10751       instance.FindDisk(disk_idx)
10752
10753     # Get secondary node IP addresses
10754     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10755                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10756
10757   def Exec(self, feedback_fn):
10758     """Execute disk replacement.
10759
10760     This dispatches the disk replacement to the appropriate handler.
10761
10762     """
10763     if self.delay_iallocator:
10764       self._CheckPrereq2()
10765
10766     if __debug__:
10767       # Verify owned locks before starting operation
10768       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10769       assert set(owned_nodes) == set(self.node_secondary_ip), \
10770           ("Incorrect node locks, owning %s, expected %s" %
10771            (owned_nodes, self.node_secondary_ip.keys()))
10772       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10773               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10774
10775       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10776       assert list(owned_instances) == [self.instance_name], \
10777           "Instance '%s' not locked" % self.instance_name
10778
10779       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10780           "Should not own any node group lock at this point"
10781
10782     if not self.disks:
10783       feedback_fn("No disks need replacement for instance '%s'" %
10784                   self.instance.name)
10785       return
10786
10787     feedback_fn("Replacing disk(s) %s for instance '%s'" %
10788                 (utils.CommaJoin(self.disks), self.instance.name))
10789     feedback_fn("Current primary node: %s", self.instance.primary_node)
10790     feedback_fn("Current seconary node: %s",
10791                 utils.CommaJoin(self.instance.secondary_nodes))
10792
10793     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10794
10795     # Activate the instance disks if we're replacing them on a down instance
10796     if activate_disks:
10797       _StartInstanceDisks(self.lu, self.instance, True)
10798
10799     try:
10800       # Should we replace the secondary node?
10801       if self.new_node is not None:
10802         fn = self._ExecDrbd8Secondary
10803       else:
10804         fn = self._ExecDrbd8DiskOnly
10805
10806       result = fn(feedback_fn)
10807     finally:
10808       # Deactivate the instance disks if we're replacing them on a
10809       # down instance
10810       if activate_disks:
10811         _SafeShutdownInstanceDisks(self.lu, self.instance)
10812
10813     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10814
10815     if __debug__:
10816       # Verify owned locks
10817       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10818       nodes = frozenset(self.node_secondary_ip)
10819       assert ((self.early_release and not owned_nodes) or
10820               (not self.early_release and not (set(owned_nodes) - nodes))), \
10821         ("Not owning the correct locks, early_release=%s, owned=%r,"
10822          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10823
10824     return result
10825
10826   def _CheckVolumeGroup(self, nodes):
10827     self.lu.LogInfo("Checking volume groups")
10828
10829     vgname = self.cfg.GetVGName()
10830
10831     # Make sure volume group exists on all involved nodes
10832     results = self.rpc.call_vg_list(nodes)
10833     if not results:
10834       raise errors.OpExecError("Can't list volume groups on the nodes")
10835
10836     for node in nodes:
10837       res = results[node]
10838       res.Raise("Error checking node %s" % node)
10839       if vgname not in res.payload:
10840         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10841                                  (vgname, node))
10842
10843   def _CheckDisksExistence(self, nodes):
10844     # Check disk existence
10845     for idx, dev in enumerate(self.instance.disks):
10846       if idx not in self.disks:
10847         continue
10848
10849       for node in nodes:
10850         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10851         self.cfg.SetDiskID(dev, node)
10852
10853         result = _BlockdevFind(self, node, dev, self.instance)
10854
10855         msg = result.fail_msg
10856         if msg or not result.payload:
10857           if not msg:
10858             msg = "disk not found"
10859           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10860                                    (idx, node, msg))
10861
10862   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10863     for idx, dev in enumerate(self.instance.disks):
10864       if idx not in self.disks:
10865         continue
10866
10867       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10868                       (idx, node_name))
10869
10870       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10871                                    on_primary, ldisk=ldisk):
10872         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10873                                  " replace disks for instance %s" %
10874                                  (node_name, self.instance.name))
10875
10876   def _CreateNewStorage(self, node_name):
10877     """Create new storage on the primary or secondary node.
10878
10879     This is only used for same-node replaces, not for changing the
10880     secondary node, hence we don't want to modify the existing disk.
10881
10882     """
10883     iv_names = {}
10884
10885     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10886     for idx, dev in enumerate(disks):
10887       if idx not in self.disks:
10888         continue
10889
10890       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10891
10892       self.cfg.SetDiskID(dev, node_name)
10893
10894       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10895       names = _GenerateUniqueNames(self.lu, lv_names)
10896
10897       (data_disk, meta_disk) = dev.children
10898       vg_data = data_disk.logical_id[0]
10899       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10900                              logical_id=(vg_data, names[0]),
10901                              params=data_disk.params)
10902       vg_meta = meta_disk.logical_id[0]
10903       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10904                              logical_id=(vg_meta, names[1]),
10905                              params=meta_disk.params)
10906
10907       new_lvs = [lv_data, lv_meta]
10908       old_lvs = [child.Copy() for child in dev.children]
10909       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10910
10911       # we pass force_create=True to force the LVM creation
10912       for new_lv in new_lvs:
10913         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10914                              _GetInstanceInfoText(self.instance), False)
10915
10916     return iv_names
10917
10918   def _CheckDevices(self, node_name, iv_names):
10919     for name, (dev, _, _) in iv_names.iteritems():
10920       self.cfg.SetDiskID(dev, node_name)
10921
10922       result = _BlockdevFind(self, node_name, dev, self.instance)
10923
10924       msg = result.fail_msg
10925       if msg or not result.payload:
10926         if not msg:
10927           msg = "disk not found"
10928         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10929                                  (name, msg))
10930
10931       if result.payload.is_degraded:
10932         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10933
10934   def _RemoveOldStorage(self, node_name, iv_names):
10935     for name, (_, old_lvs, _) in iv_names.iteritems():
10936       self.lu.LogInfo("Remove logical volumes for %s" % name)
10937
10938       for lv in old_lvs:
10939         self.cfg.SetDiskID(lv, node_name)
10940
10941         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10942         if msg:
10943           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10944                              hint="remove unused LVs manually")
10945
10946   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10947     """Replace a disk on the primary or secondary for DRBD 8.
10948
10949     The algorithm for replace is quite complicated:
10950
10951       1. for each disk to be replaced:
10952
10953         1. create new LVs on the target node with unique names
10954         1. detach old LVs from the drbd device
10955         1. rename old LVs to name_replaced.<time_t>
10956         1. rename new LVs to old LVs
10957         1. attach the new LVs (with the old names now) to the drbd device
10958
10959       1. wait for sync across all devices
10960
10961       1. for each modified disk:
10962
10963         1. remove old LVs (which have the name name_replaces.<time_t>)
10964
10965     Failures are not very well handled.
10966
10967     """
10968     steps_total = 6
10969
10970     # Step: check device activation
10971     self.lu.LogStep(1, steps_total, "Check device existence")
10972     self._CheckDisksExistence([self.other_node, self.target_node])
10973     self._CheckVolumeGroup([self.target_node, self.other_node])
10974
10975     # Step: check other node consistency
10976     self.lu.LogStep(2, steps_total, "Check peer consistency")
10977     self._CheckDisksConsistency(self.other_node,
10978                                 self.other_node == self.instance.primary_node,
10979                                 False)
10980
10981     # Step: create new storage
10982     self.lu.LogStep(3, steps_total, "Allocate new storage")
10983     iv_names = self._CreateNewStorage(self.target_node)
10984
10985     # Step: for each lv, detach+rename*2+attach
10986     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10987     for dev, old_lvs, new_lvs in iv_names.itervalues():
10988       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10989
10990       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10991                                                      old_lvs)
10992       result.Raise("Can't detach drbd from local storage on node"
10993                    " %s for device %s" % (self.target_node, dev.iv_name))
10994       #dev.children = []
10995       #cfg.Update(instance)
10996
10997       # ok, we created the new LVs, so now we know we have the needed
10998       # storage; as such, we proceed on the target node to rename
10999       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11000       # using the assumption that logical_id == physical_id (which in
11001       # turn is the unique_id on that node)
11002
11003       # FIXME(iustin): use a better name for the replaced LVs
11004       temp_suffix = int(time.time())
11005       ren_fn = lambda d, suff: (d.physical_id[0],
11006                                 d.physical_id[1] + "_replaced-%s" % suff)
11007
11008       # Build the rename list based on what LVs exist on the node
11009       rename_old_to_new = []
11010       for to_ren in old_lvs:
11011         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11012         if not result.fail_msg and result.payload:
11013           # device exists
11014           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11015
11016       self.lu.LogInfo("Renaming the old LVs on the target node")
11017       result = self.rpc.call_blockdev_rename(self.target_node,
11018                                              rename_old_to_new)
11019       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11020
11021       # Now we rename the new LVs to the old LVs
11022       self.lu.LogInfo("Renaming the new LVs on the target node")
11023       rename_new_to_old = [(new, old.physical_id)
11024                            for old, new in zip(old_lvs, new_lvs)]
11025       result = self.rpc.call_blockdev_rename(self.target_node,
11026                                              rename_new_to_old)
11027       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11028
11029       # Intermediate steps of in memory modifications
11030       for old, new in zip(old_lvs, new_lvs):
11031         new.logical_id = old.logical_id
11032         self.cfg.SetDiskID(new, self.target_node)
11033
11034       # We need to modify old_lvs so that removal later removes the
11035       # right LVs, not the newly added ones; note that old_lvs is a
11036       # copy here
11037       for disk in old_lvs:
11038         disk.logical_id = ren_fn(disk, temp_suffix)
11039         self.cfg.SetDiskID(disk, self.target_node)
11040
11041       # Now that the new lvs have the old name, we can add them to the device
11042       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11043       result = self.rpc.call_blockdev_addchildren(self.target_node,
11044                                                   (dev, self.instance), new_lvs)
11045       msg = result.fail_msg
11046       if msg:
11047         for new_lv in new_lvs:
11048           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11049                                                new_lv).fail_msg
11050           if msg2:
11051             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11052                                hint=("cleanup manually the unused logical"
11053                                      "volumes"))
11054         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11055
11056     cstep = itertools.count(5)
11057
11058     if self.early_release:
11059       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11060       self._RemoveOldStorage(self.target_node, iv_names)
11061       # TODO: Check if releasing locks early still makes sense
11062       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11063     else:
11064       # Release all resource locks except those used by the instance
11065       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11066                     keep=self.node_secondary_ip.keys())
11067
11068     # Release all node locks while waiting for sync
11069     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11070
11071     # TODO: Can the instance lock be downgraded here? Take the optional disk
11072     # shutdown in the caller into consideration.
11073
11074     # Wait for sync
11075     # This can fail as the old devices are degraded and _WaitForSync
11076     # does a combined result over all disks, so we don't check its return value
11077     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11078     _WaitForSync(self.lu, self.instance)
11079
11080     # Check all devices manually
11081     self._CheckDevices(self.instance.primary_node, iv_names)
11082
11083     # Step: remove old storage
11084     if not self.early_release:
11085       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11086       self._RemoveOldStorage(self.target_node, iv_names)
11087
11088   def _ExecDrbd8Secondary(self, feedback_fn):
11089     """Replace the secondary node for DRBD 8.
11090
11091     The algorithm for replace is quite complicated:
11092       - for all disks of the instance:
11093         - create new LVs on the new node with same names
11094         - shutdown the drbd device on the old secondary
11095         - disconnect the drbd network on the primary
11096         - create the drbd device on the new secondary
11097         - network attach the drbd on the primary, using an artifice:
11098           the drbd code for Attach() will connect to the network if it
11099           finds a device which is connected to the good local disks but
11100           not network enabled
11101       - wait for sync across all devices
11102       - remove all disks from the old secondary
11103
11104     Failures are not very well handled.
11105
11106     """
11107     steps_total = 6
11108
11109     pnode = self.instance.primary_node
11110
11111     # Step: check device activation
11112     self.lu.LogStep(1, steps_total, "Check device existence")
11113     self._CheckDisksExistence([self.instance.primary_node])
11114     self._CheckVolumeGroup([self.instance.primary_node])
11115
11116     # Step: check other node consistency
11117     self.lu.LogStep(2, steps_total, "Check peer consistency")
11118     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11119
11120     # Step: create new storage
11121     self.lu.LogStep(3, steps_total, "Allocate new storage")
11122     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11123     for idx, dev in enumerate(disks):
11124       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11125                       (self.new_node, idx))
11126       # we pass force_create=True to force LVM creation
11127       for new_lv in dev.children:
11128         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11129                              True, _GetInstanceInfoText(self.instance), False)
11130
11131     # Step 4: dbrd minors and drbd setups changes
11132     # after this, we must manually remove the drbd minors on both the
11133     # error and the success paths
11134     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11135     minors = self.cfg.AllocateDRBDMinor([self.new_node
11136                                          for dev in self.instance.disks],
11137                                         self.instance.name)
11138     logging.debug("Allocated minors %r", minors)
11139
11140     iv_names = {}
11141     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11142       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11143                       (self.new_node, idx))
11144       # create new devices on new_node; note that we create two IDs:
11145       # one without port, so the drbd will be activated without
11146       # networking information on the new node at this stage, and one
11147       # with network, for the latter activation in step 4
11148       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11149       if self.instance.primary_node == o_node1:
11150         p_minor = o_minor1
11151       else:
11152         assert self.instance.primary_node == o_node2, "Three-node instance?"
11153         p_minor = o_minor2
11154
11155       new_alone_id = (self.instance.primary_node, self.new_node, None,
11156                       p_minor, new_minor, o_secret)
11157       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11158                     p_minor, new_minor, o_secret)
11159
11160       iv_names[idx] = (dev, dev.children, new_net_id)
11161       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11162                     new_net_id)
11163       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11164                               logical_id=new_alone_id,
11165                               children=dev.children,
11166                               size=dev.size,
11167                               params={})
11168       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11169                                              self.cfg)
11170       try:
11171         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11172                               anno_new_drbd,
11173                               _GetInstanceInfoText(self.instance), False)
11174       except errors.GenericError:
11175         self.cfg.ReleaseDRBDMinors(self.instance.name)
11176         raise
11177
11178     # We have new devices, shutdown the drbd on the old secondary
11179     for idx, dev in enumerate(self.instance.disks):
11180       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11181       self.cfg.SetDiskID(dev, self.target_node)
11182       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11183                                             (dev, self.instance)).fail_msg
11184       if msg:
11185         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11186                            "node: %s" % (idx, msg),
11187                            hint=("Please cleanup this device manually as"
11188                                  " soon as possible"))
11189
11190     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11191     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11192                                                self.instance.disks)[pnode]
11193
11194     msg = result.fail_msg
11195     if msg:
11196       # detaches didn't succeed (unlikely)
11197       self.cfg.ReleaseDRBDMinors(self.instance.name)
11198       raise errors.OpExecError("Can't detach the disks from the network on"
11199                                " old node: %s" % (msg,))
11200
11201     # if we managed to detach at least one, we update all the disks of
11202     # the instance to point to the new secondary
11203     self.lu.LogInfo("Updating instance configuration")
11204     for dev, _, new_logical_id in iv_names.itervalues():
11205       dev.logical_id = new_logical_id
11206       self.cfg.SetDiskID(dev, self.instance.primary_node)
11207
11208     self.cfg.Update(self.instance, feedback_fn)
11209
11210     # Release all node locks (the configuration has been updated)
11211     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11212
11213     # and now perform the drbd attach
11214     self.lu.LogInfo("Attaching primary drbds to new secondary"
11215                     " (standalone => connected)")
11216     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11217                                             self.new_node],
11218                                            self.node_secondary_ip,
11219                                            (self.instance.disks, self.instance),
11220                                            self.instance.name,
11221                                            False)
11222     for to_node, to_result in result.items():
11223       msg = to_result.fail_msg
11224       if msg:
11225         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11226                            to_node, msg,
11227                            hint=("please do a gnt-instance info to see the"
11228                                  " status of disks"))
11229
11230     cstep = itertools.count(5)
11231
11232     if self.early_release:
11233       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11234       self._RemoveOldStorage(self.target_node, iv_names)
11235       # TODO: Check if releasing locks early still makes sense
11236       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11237     else:
11238       # Release all resource locks except those used by the instance
11239       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11240                     keep=self.node_secondary_ip.keys())
11241
11242     # TODO: Can the instance lock be downgraded here? Take the optional disk
11243     # shutdown in the caller into consideration.
11244
11245     # Wait for sync
11246     # This can fail as the old devices are degraded and _WaitForSync
11247     # does a combined result over all disks, so we don't check its return value
11248     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11249     _WaitForSync(self.lu, self.instance)
11250
11251     # Check all devices manually
11252     self._CheckDevices(self.instance.primary_node, iv_names)
11253
11254     # Step: remove old storage
11255     if not self.early_release:
11256       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11257       self._RemoveOldStorage(self.target_node, iv_names)
11258
11259
11260 class LURepairNodeStorage(NoHooksLU):
11261   """Repairs the volume group on a node.
11262
11263   """
11264   REQ_BGL = False
11265
11266   def CheckArguments(self):
11267     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11268
11269     storage_type = self.op.storage_type
11270
11271     if (constants.SO_FIX_CONSISTENCY not in
11272         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11273       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11274                                  " repaired" % storage_type,
11275                                  errors.ECODE_INVAL)
11276
11277   def ExpandNames(self):
11278     self.needed_locks = {
11279       locking.LEVEL_NODE: [self.op.node_name],
11280       }
11281
11282   def _CheckFaultyDisks(self, instance, node_name):
11283     """Ensure faulty disks abort the opcode or at least warn."""
11284     try:
11285       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11286                                   node_name, True):
11287         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11288                                    " node '%s'" % (instance.name, node_name),
11289                                    errors.ECODE_STATE)
11290     except errors.OpPrereqError, err:
11291       if self.op.ignore_consistency:
11292         self.proc.LogWarning(str(err.args[0]))
11293       else:
11294         raise
11295
11296   def CheckPrereq(self):
11297     """Check prerequisites.
11298
11299     """
11300     # Check whether any instance on this node has faulty disks
11301     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11302       if inst.admin_state != constants.ADMINST_UP:
11303         continue
11304       check_nodes = set(inst.all_nodes)
11305       check_nodes.discard(self.op.node_name)
11306       for inst_node_name in check_nodes:
11307         self._CheckFaultyDisks(inst, inst_node_name)
11308
11309   def Exec(self, feedback_fn):
11310     feedback_fn("Repairing storage unit '%s' on %s ..." %
11311                 (self.op.name, self.op.node_name))
11312
11313     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11314     result = self.rpc.call_storage_execute(self.op.node_name,
11315                                            self.op.storage_type, st_args,
11316                                            self.op.name,
11317                                            constants.SO_FIX_CONSISTENCY)
11318     result.Raise("Failed to repair storage unit '%s' on %s" %
11319                  (self.op.name, self.op.node_name))
11320
11321
11322 class LUNodeEvacuate(NoHooksLU):
11323   """Evacuates instances off a list of nodes.
11324
11325   """
11326   REQ_BGL = False
11327
11328   _MODE2IALLOCATOR = {
11329     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11330     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11331     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11332     }
11333   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11334   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11335           constants.IALLOCATOR_NEVAC_MODES)
11336
11337   def CheckArguments(self):
11338     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11339
11340   def ExpandNames(self):
11341     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11342
11343     if self.op.remote_node is not None:
11344       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11345       assert self.op.remote_node
11346
11347       if self.op.remote_node == self.op.node_name:
11348         raise errors.OpPrereqError("Can not use evacuated node as a new"
11349                                    " secondary node", errors.ECODE_INVAL)
11350
11351       if self.op.mode != constants.NODE_EVAC_SEC:
11352         raise errors.OpPrereqError("Without the use of an iallocator only"
11353                                    " secondary instances can be evacuated",
11354                                    errors.ECODE_INVAL)
11355
11356     # Declare locks
11357     self.share_locks = _ShareAll()
11358     self.needed_locks = {
11359       locking.LEVEL_INSTANCE: [],
11360       locking.LEVEL_NODEGROUP: [],
11361       locking.LEVEL_NODE: [],
11362       }
11363
11364     # Determine nodes (via group) optimistically, needs verification once locks
11365     # have been acquired
11366     self.lock_nodes = self._DetermineNodes()
11367
11368   def _DetermineNodes(self):
11369     """Gets the list of nodes to operate on.
11370
11371     """
11372     if self.op.remote_node is None:
11373       # Iallocator will choose any node(s) in the same group
11374       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11375     else:
11376       group_nodes = frozenset([self.op.remote_node])
11377
11378     # Determine nodes to be locked
11379     return set([self.op.node_name]) | group_nodes
11380
11381   def _DetermineInstances(self):
11382     """Builds list of instances to operate on.
11383
11384     """
11385     assert self.op.mode in constants.NODE_EVAC_MODES
11386
11387     if self.op.mode == constants.NODE_EVAC_PRI:
11388       # Primary instances only
11389       inst_fn = _GetNodePrimaryInstances
11390       assert self.op.remote_node is None, \
11391         "Evacuating primary instances requires iallocator"
11392     elif self.op.mode == constants.NODE_EVAC_SEC:
11393       # Secondary instances only
11394       inst_fn = _GetNodeSecondaryInstances
11395     else:
11396       # All instances
11397       assert self.op.mode == constants.NODE_EVAC_ALL
11398       inst_fn = _GetNodeInstances
11399       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11400       # per instance
11401       raise errors.OpPrereqError("Due to an issue with the iallocator"
11402                                  " interface it is not possible to evacuate"
11403                                  " all instances at once; specify explicitly"
11404                                  " whether to evacuate primary or secondary"
11405                                  " instances",
11406                                  errors.ECODE_INVAL)
11407
11408     return inst_fn(self.cfg, self.op.node_name)
11409
11410   def DeclareLocks(self, level):
11411     if level == locking.LEVEL_INSTANCE:
11412       # Lock instances optimistically, needs verification once node and group
11413       # locks have been acquired
11414       self.needed_locks[locking.LEVEL_INSTANCE] = \
11415         set(i.name for i in self._DetermineInstances())
11416
11417     elif level == locking.LEVEL_NODEGROUP:
11418       # Lock node groups for all potential target nodes optimistically, needs
11419       # verification once nodes have been acquired
11420       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11421         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11422
11423     elif level == locking.LEVEL_NODE:
11424       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11425
11426   def CheckPrereq(self):
11427     # Verify locks
11428     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11429     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11430     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11431
11432     need_nodes = self._DetermineNodes()
11433
11434     if not owned_nodes.issuperset(need_nodes):
11435       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11436                                  " locks were acquired, current nodes are"
11437                                  " are '%s', used to be '%s'; retry the"
11438                                  " operation" %
11439                                  (self.op.node_name,
11440                                   utils.CommaJoin(need_nodes),
11441                                   utils.CommaJoin(owned_nodes)),
11442                                  errors.ECODE_STATE)
11443
11444     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11445     if owned_groups != wanted_groups:
11446       raise errors.OpExecError("Node groups changed since locks were acquired,"
11447                                " current groups are '%s', used to be '%s';"
11448                                " retry the operation" %
11449                                (utils.CommaJoin(wanted_groups),
11450                                 utils.CommaJoin(owned_groups)))
11451
11452     # Determine affected instances
11453     self.instances = self._DetermineInstances()
11454     self.instance_names = [i.name for i in self.instances]
11455
11456     if set(self.instance_names) != owned_instances:
11457       raise errors.OpExecError("Instances on node '%s' changed since locks"
11458                                " were acquired, current instances are '%s',"
11459                                " used to be '%s'; retry the operation" %
11460                                (self.op.node_name,
11461                                 utils.CommaJoin(self.instance_names),
11462                                 utils.CommaJoin(owned_instances)))
11463
11464     if self.instance_names:
11465       self.LogInfo("Evacuating instances from node '%s': %s",
11466                    self.op.node_name,
11467                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11468     else:
11469       self.LogInfo("No instances to evacuate from node '%s'",
11470                    self.op.node_name)
11471
11472     if self.op.remote_node is not None:
11473       for i in self.instances:
11474         if i.primary_node == self.op.remote_node:
11475           raise errors.OpPrereqError("Node %s is the primary node of"
11476                                      " instance %s, cannot use it as"
11477                                      " secondary" %
11478                                      (self.op.remote_node, i.name),
11479                                      errors.ECODE_INVAL)
11480
11481   def Exec(self, feedback_fn):
11482     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11483
11484     if not self.instance_names:
11485       # No instances to evacuate
11486       jobs = []
11487
11488     elif self.op.iallocator is not None:
11489       # TODO: Implement relocation to other group
11490       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11491                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11492                        instances=list(self.instance_names))
11493
11494       ial.Run(self.op.iallocator)
11495
11496       if not ial.success:
11497         raise errors.OpPrereqError("Can't compute node evacuation using"
11498                                    " iallocator '%s': %s" %
11499                                    (self.op.iallocator, ial.info),
11500                                    errors.ECODE_NORES)
11501
11502       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11503
11504     elif self.op.remote_node is not None:
11505       assert self.op.mode == constants.NODE_EVAC_SEC
11506       jobs = [
11507         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11508                                         remote_node=self.op.remote_node,
11509                                         disks=[],
11510                                         mode=constants.REPLACE_DISK_CHG,
11511                                         early_release=self.op.early_release)]
11512         for instance_name in self.instance_names
11513         ]
11514
11515     else:
11516       raise errors.ProgrammerError("No iallocator or remote node")
11517
11518     return ResultWithJobs(jobs)
11519
11520
11521 def _SetOpEarlyRelease(early_release, op):
11522   """Sets C{early_release} flag on opcodes if available.
11523
11524   """
11525   try:
11526     op.early_release = early_release
11527   except AttributeError:
11528     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11529
11530   return op
11531
11532
11533 def _NodeEvacDest(use_nodes, group, nodes):
11534   """Returns group or nodes depending on caller's choice.
11535
11536   """
11537   if use_nodes:
11538     return utils.CommaJoin(nodes)
11539   else:
11540     return group
11541
11542
11543 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11544   """Unpacks the result of change-group and node-evacuate iallocator requests.
11545
11546   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11547   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11548
11549   @type lu: L{LogicalUnit}
11550   @param lu: Logical unit instance
11551   @type alloc_result: tuple/list
11552   @param alloc_result: Result from iallocator
11553   @type early_release: bool
11554   @param early_release: Whether to release locks early if possible
11555   @type use_nodes: bool
11556   @param use_nodes: Whether to display node names instead of groups
11557
11558   """
11559   (moved, failed, jobs) = alloc_result
11560
11561   if failed:
11562     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11563                                  for (name, reason) in failed)
11564     lu.LogWarning("Unable to evacuate instances %s", failreason)
11565     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11566
11567   if moved:
11568     lu.LogInfo("Instances to be moved: %s",
11569                utils.CommaJoin("%s (to %s)" %
11570                                (name, _NodeEvacDest(use_nodes, group, nodes))
11571                                for (name, group, nodes) in moved))
11572
11573   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11574               map(opcodes.OpCode.LoadOpCode, ops))
11575           for ops in jobs]
11576
11577
11578 class LUInstanceGrowDisk(LogicalUnit):
11579   """Grow a disk of an instance.
11580
11581   """
11582   HPATH = "disk-grow"
11583   HTYPE = constants.HTYPE_INSTANCE
11584   REQ_BGL = False
11585
11586   def ExpandNames(self):
11587     self._ExpandAndLockInstance()
11588     self.needed_locks[locking.LEVEL_NODE] = []
11589     self.needed_locks[locking.LEVEL_NODE_RES] = []
11590     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11591     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11592
11593   def DeclareLocks(self, level):
11594     if level == locking.LEVEL_NODE:
11595       self._LockInstancesNodes()
11596     elif level == locking.LEVEL_NODE_RES:
11597       # Copy node locks
11598       self.needed_locks[locking.LEVEL_NODE_RES] = \
11599         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11600
11601   def BuildHooksEnv(self):
11602     """Build hooks env.
11603
11604     This runs on the master, the primary and all the secondaries.
11605
11606     """
11607     env = {
11608       "DISK": self.op.disk,
11609       "AMOUNT": self.op.amount,
11610       "ABSOLUTE": self.op.absolute,
11611       }
11612     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11613     return env
11614
11615   def BuildHooksNodes(self):
11616     """Build hooks nodes.
11617
11618     """
11619     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11620     return (nl, nl)
11621
11622   def CheckPrereq(self):
11623     """Check prerequisites.
11624
11625     This checks that the instance is in the cluster.
11626
11627     """
11628     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11629     assert instance is not None, \
11630       "Cannot retrieve locked instance %s" % self.op.instance_name
11631     nodenames = list(instance.all_nodes)
11632     for node in nodenames:
11633       _CheckNodeOnline(self, node)
11634
11635     self.instance = instance
11636
11637     if instance.disk_template not in constants.DTS_GROWABLE:
11638       raise errors.OpPrereqError("Instance's disk layout does not support"
11639                                  " growing", errors.ECODE_INVAL)
11640
11641     self.disk = instance.FindDisk(self.op.disk)
11642
11643     if self.op.absolute:
11644       self.target = self.op.amount
11645       self.delta = self.target - self.disk.size
11646       if self.delta < 0:
11647         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11648                                    "current disk size (%s)" %
11649                                    (utils.FormatUnit(self.target, "h"),
11650                                     utils.FormatUnit(self.disk.size, "h")),
11651                                    errors.ECODE_STATE)
11652     else:
11653       self.delta = self.op.amount
11654       self.target = self.disk.size + self.delta
11655       if self.delta < 0:
11656         raise errors.OpPrereqError("Requested increment (%s) is negative" %
11657                                    utils.FormatUnit(self.delta, "h"),
11658                                    errors.ECODE_INVAL)
11659
11660     if instance.disk_template not in (constants.DT_FILE,
11661                                       constants.DT_SHARED_FILE,
11662                                       constants.DT_RBD):
11663       # TODO: check the free disk space for file, when that feature will be
11664       # supported
11665       _CheckNodesFreeDiskPerVG(self, nodenames,
11666                                self.disk.ComputeGrowth(self.delta))
11667
11668   def Exec(self, feedback_fn):
11669     """Execute disk grow.
11670
11671     """
11672     instance = self.instance
11673     disk = self.disk
11674
11675     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11676     assert (self.owned_locks(locking.LEVEL_NODE) ==
11677             self.owned_locks(locking.LEVEL_NODE_RES))
11678
11679     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11680     if not disks_ok:
11681       raise errors.OpExecError("Cannot activate block device to grow")
11682
11683     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11684                 (self.op.disk, instance.name,
11685                  utils.FormatUnit(self.delta, "h"),
11686                  utils.FormatUnit(self.target, "h")))
11687
11688     # First run all grow ops in dry-run mode
11689     for node in instance.all_nodes:
11690       self.cfg.SetDiskID(disk, node)
11691       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11692                                            True)
11693       result.Raise("Grow request failed to node %s" % node)
11694
11695     # We know that (as far as we can test) operations across different
11696     # nodes will succeed, time to run it for real
11697     for node in instance.all_nodes:
11698       self.cfg.SetDiskID(disk, node)
11699       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11700                                            False)
11701       result.Raise("Grow request failed to node %s" % node)
11702
11703       # TODO: Rewrite code to work properly
11704       # DRBD goes into sync mode for a short amount of time after executing the
11705       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11706       # calling "resize" in sync mode fails. Sleeping for a short amount of
11707       # time is a work-around.
11708       time.sleep(5)
11709
11710     disk.RecordGrow(self.delta)
11711     self.cfg.Update(instance, feedback_fn)
11712
11713     # Changes have been recorded, release node lock
11714     _ReleaseLocks(self, locking.LEVEL_NODE)
11715
11716     # Downgrade lock while waiting for sync
11717     self.glm.downgrade(locking.LEVEL_INSTANCE)
11718
11719     if self.op.wait_for_sync:
11720       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11721       if disk_abort:
11722         self.proc.LogWarning("Disk sync-ing has not returned a good"
11723                              " status; please check the instance")
11724       if instance.admin_state != constants.ADMINST_UP:
11725         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11726     elif instance.admin_state != constants.ADMINST_UP:
11727       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11728                            " not supposed to be running because no wait for"
11729                            " sync mode was requested")
11730
11731     assert self.owned_locks(locking.LEVEL_NODE_RES)
11732     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11733
11734
11735 class LUInstanceQueryData(NoHooksLU):
11736   """Query runtime instance data.
11737
11738   """
11739   REQ_BGL = False
11740
11741   def ExpandNames(self):
11742     self.needed_locks = {}
11743
11744     # Use locking if requested or when non-static information is wanted
11745     if not (self.op.static or self.op.use_locking):
11746       self.LogWarning("Non-static data requested, locks need to be acquired")
11747       self.op.use_locking = True
11748
11749     if self.op.instances or not self.op.use_locking:
11750       # Expand instance names right here
11751       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11752     else:
11753       # Will use acquired locks
11754       self.wanted_names = None
11755
11756     if self.op.use_locking:
11757       self.share_locks = _ShareAll()
11758
11759       if self.wanted_names is None:
11760         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11761       else:
11762         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11763
11764       self.needed_locks[locking.LEVEL_NODEGROUP] = []
11765       self.needed_locks[locking.LEVEL_NODE] = []
11766       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11767
11768   def DeclareLocks(self, level):
11769     if self.op.use_locking:
11770       if level == locking.LEVEL_NODEGROUP:
11771         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11772
11773         # Lock all groups used by instances optimistically; this requires going
11774         # via the node before it's locked, requiring verification later on
11775         self.needed_locks[locking.LEVEL_NODEGROUP] = \
11776           frozenset(group_uuid
11777                     for instance_name in owned_instances
11778                     for group_uuid in
11779                       self.cfg.GetInstanceNodeGroups(instance_name))
11780
11781       elif level == locking.LEVEL_NODE:
11782         self._LockInstancesNodes()
11783
11784   def CheckPrereq(self):
11785     """Check prerequisites.
11786
11787     This only checks the optional instance list against the existing names.
11788
11789     """
11790     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11791     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11792     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11793
11794     if self.wanted_names is None:
11795       assert self.op.use_locking, "Locking was not used"
11796       self.wanted_names = owned_instances
11797
11798     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11799
11800     if self.op.use_locking:
11801       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11802                                 None)
11803     else:
11804       assert not (owned_instances or owned_groups or owned_nodes)
11805
11806     self.wanted_instances = instances.values()
11807
11808   def _ComputeBlockdevStatus(self, node, instance, dev):
11809     """Returns the status of a block device
11810
11811     """
11812     if self.op.static or not node:
11813       return None
11814
11815     self.cfg.SetDiskID(dev, node)
11816
11817     result = self.rpc.call_blockdev_find(node, dev)
11818     if result.offline:
11819       return None
11820
11821     result.Raise("Can't compute disk status for %s" % instance.name)
11822
11823     status = result.payload
11824     if status is None:
11825       return None
11826
11827     return (status.dev_path, status.major, status.minor,
11828             status.sync_percent, status.estimated_time,
11829             status.is_degraded, status.ldisk_status)
11830
11831   def _ComputeDiskStatus(self, instance, snode, dev):
11832     """Compute block device status.
11833
11834     """
11835     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11836
11837     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11838
11839   def _ComputeDiskStatusInner(self, instance, snode, dev):
11840     """Compute block device status.
11841
11842     @attention: The device has to be annotated already.
11843
11844     """
11845     if dev.dev_type in constants.LDS_DRBD:
11846       # we change the snode then (otherwise we use the one passed in)
11847       if dev.logical_id[0] == instance.primary_node:
11848         snode = dev.logical_id[1]
11849       else:
11850         snode = dev.logical_id[0]
11851
11852     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11853                                               instance, dev)
11854     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11855
11856     if dev.children:
11857       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11858                                         instance, snode),
11859                          dev.children)
11860     else:
11861       dev_children = []
11862
11863     return {
11864       "iv_name": dev.iv_name,
11865       "dev_type": dev.dev_type,
11866       "logical_id": dev.logical_id,
11867       "physical_id": dev.physical_id,
11868       "pstatus": dev_pstatus,
11869       "sstatus": dev_sstatus,
11870       "children": dev_children,
11871       "mode": dev.mode,
11872       "size": dev.size,
11873       }
11874
11875   def Exec(self, feedback_fn):
11876     """Gather and return data"""
11877     result = {}
11878
11879     cluster = self.cfg.GetClusterInfo()
11880
11881     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11882     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11883
11884     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11885                                                  for node in nodes.values()))
11886
11887     group2name_fn = lambda uuid: groups[uuid].name
11888
11889     for instance in self.wanted_instances:
11890       pnode = nodes[instance.primary_node]
11891
11892       if self.op.static or pnode.offline:
11893         remote_state = None
11894         if pnode.offline:
11895           self.LogWarning("Primary node %s is marked offline, returning static"
11896                           " information only for instance %s" %
11897                           (pnode.name, instance.name))
11898       else:
11899         remote_info = self.rpc.call_instance_info(instance.primary_node,
11900                                                   instance.name,
11901                                                   instance.hypervisor)
11902         remote_info.Raise("Error checking node %s" % instance.primary_node)
11903         remote_info = remote_info.payload
11904         if remote_info and "state" in remote_info:
11905           remote_state = "up"
11906         else:
11907           if instance.admin_state == constants.ADMINST_UP:
11908             remote_state = "down"
11909           else:
11910             remote_state = instance.admin_state
11911
11912       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11913                   instance.disks)
11914
11915       snodes_group_uuids = [nodes[snode_name].group
11916                             for snode_name in instance.secondary_nodes]
11917
11918       result[instance.name] = {
11919         "name": instance.name,
11920         "config_state": instance.admin_state,
11921         "run_state": remote_state,
11922         "pnode": instance.primary_node,
11923         "pnode_group_uuid": pnode.group,
11924         "pnode_group_name": group2name_fn(pnode.group),
11925         "snodes": instance.secondary_nodes,
11926         "snodes_group_uuids": snodes_group_uuids,
11927         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11928         "os": instance.os,
11929         # this happens to be the same format used for hooks
11930         "nics": _NICListToTuple(self, instance.nics),
11931         "disk_template": instance.disk_template,
11932         "disks": disks,
11933         "hypervisor": instance.hypervisor,
11934         "network_port": instance.network_port,
11935         "hv_instance": instance.hvparams,
11936         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11937         "be_instance": instance.beparams,
11938         "be_actual": cluster.FillBE(instance),
11939         "os_instance": instance.osparams,
11940         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11941         "serial_no": instance.serial_no,
11942         "mtime": instance.mtime,
11943         "ctime": instance.ctime,
11944         "uuid": instance.uuid,
11945         }
11946
11947     return result
11948
11949
11950 def PrepareContainerMods(mods, private_fn):
11951   """Prepares a list of container modifications by adding a private data field.
11952
11953   @type mods: list of tuples; (operation, index, parameters)
11954   @param mods: List of modifications
11955   @type private_fn: callable or None
11956   @param private_fn: Callable for constructing a private data field for a
11957     modification
11958   @rtype: list
11959
11960   """
11961   if private_fn is None:
11962     fn = lambda: None
11963   else:
11964     fn = private_fn
11965
11966   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11967
11968
11969 #: Type description for changes as returned by L{ApplyContainerMods}'s
11970 #: callbacks
11971 _TApplyContModsCbChanges = \
11972   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11973     ht.TNonEmptyString,
11974     ht.TAny,
11975     ])))
11976
11977
11978 def ApplyContainerMods(kind, container, chgdesc, mods,
11979                        create_fn, modify_fn, remove_fn):
11980   """Applies descriptions in C{mods} to C{container}.
11981
11982   @type kind: string
11983   @param kind: One-word item description
11984   @type container: list
11985   @param container: Container to modify
11986   @type chgdesc: None or list
11987   @param chgdesc: List of applied changes
11988   @type mods: list
11989   @param mods: Modifications as returned by L{PrepareContainerMods}
11990   @type create_fn: callable
11991   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11992     receives absolute item index, parameters and private data object as added
11993     by L{PrepareContainerMods}, returns tuple containing new item and changes
11994     as list
11995   @type modify_fn: callable
11996   @param modify_fn: Callback for modifying an existing item
11997     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11998     and private data object as added by L{PrepareContainerMods}, returns
11999     changes as list
12000   @type remove_fn: callable
12001   @param remove_fn: Callback on removing item; receives absolute item index,
12002     item and private data object as added by L{PrepareContainerMods}
12003
12004   """
12005   for (op, idx, params, private) in mods:
12006     if idx == -1:
12007       # Append
12008       absidx = len(container) - 1
12009     elif idx < 0:
12010       raise IndexError("Not accepting negative indices other than -1")
12011     elif idx > len(container):
12012       raise IndexError("Got %s index %s, but there are only %s" %
12013                        (kind, idx, len(container)))
12014     else:
12015       absidx = idx
12016
12017     changes = None
12018
12019     if op == constants.DDM_ADD:
12020       # Calculate where item will be added
12021       if idx == -1:
12022         addidx = len(container)
12023       else:
12024         addidx = idx
12025
12026       if create_fn is None:
12027         item = params
12028       else:
12029         (item, changes) = create_fn(addidx, params, private)
12030
12031       if idx == -1:
12032         container.append(item)
12033       else:
12034         assert idx >= 0
12035         assert idx <= len(container)
12036         # list.insert does so before the specified index
12037         container.insert(idx, item)
12038     else:
12039       # Retrieve existing item
12040       try:
12041         item = container[absidx]
12042       except IndexError:
12043         raise IndexError("Invalid %s index %s" % (kind, idx))
12044
12045       if op == constants.DDM_REMOVE:
12046         assert not params
12047
12048         if remove_fn is not None:
12049           remove_fn(absidx, item, private)
12050
12051         changes = [("%s/%s" % (kind, absidx), "remove")]
12052
12053         assert container[absidx] == item
12054         del container[absidx]
12055       elif op == constants.DDM_MODIFY:
12056         if modify_fn is not None:
12057           changes = modify_fn(absidx, item, params, private)
12058       else:
12059         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12060
12061     assert _TApplyContModsCbChanges(changes)
12062
12063     if not (chgdesc is None or changes is None):
12064       chgdesc.extend(changes)
12065
12066
12067 def _UpdateIvNames(base_index, disks):
12068   """Updates the C{iv_name} attribute of disks.
12069
12070   @type disks: list of L{objects.Disk}
12071
12072   """
12073   for (idx, disk) in enumerate(disks):
12074     disk.iv_name = "disk/%s" % (base_index + idx, )
12075
12076
12077 class _InstNicModPrivate:
12078   """Data structure for network interface modifications.
12079
12080   Used by L{LUInstanceSetParams}.
12081
12082   """
12083   def __init__(self):
12084     self.params = None
12085     self.filled = None
12086
12087
12088 class LUInstanceSetParams(LogicalUnit):
12089   """Modifies an instances's parameters.
12090
12091   """
12092   HPATH = "instance-modify"
12093   HTYPE = constants.HTYPE_INSTANCE
12094   REQ_BGL = False
12095
12096   @staticmethod
12097   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12098     assert ht.TList(mods)
12099     assert not mods or len(mods[0]) in (2, 3)
12100
12101     if mods and len(mods[0]) == 2:
12102       result = []
12103
12104       addremove = 0
12105       for op, params in mods:
12106         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12107           result.append((op, -1, params))
12108           addremove += 1
12109
12110           if addremove > 1:
12111             raise errors.OpPrereqError("Only one %s add or remove operation is"
12112                                        " supported at a time" % kind,
12113                                        errors.ECODE_INVAL)
12114         else:
12115           result.append((constants.DDM_MODIFY, op, params))
12116
12117       assert verify_fn(result)
12118     else:
12119       result = mods
12120
12121     return result
12122
12123   @staticmethod
12124   def _CheckMods(kind, mods, key_types, item_fn):
12125     """Ensures requested disk/NIC modifications are valid.
12126
12127     """
12128     for (op, _, params) in mods:
12129       assert ht.TDict(params)
12130
12131       utils.ForceDictType(params, key_types)
12132
12133       if op == constants.DDM_REMOVE:
12134         if params:
12135           raise errors.OpPrereqError("No settings should be passed when"
12136                                      " removing a %s" % kind,
12137                                      errors.ECODE_INVAL)
12138       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12139         item_fn(op, params)
12140       else:
12141         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12142
12143   @staticmethod
12144   def _VerifyDiskModification(op, params):
12145     """Verifies a disk modification.
12146
12147     """
12148     if op == constants.DDM_ADD:
12149       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12150       if mode not in constants.DISK_ACCESS_SET:
12151         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12152                                    errors.ECODE_INVAL)
12153
12154       size = params.get(constants.IDISK_SIZE, None)
12155       if size is None:
12156         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12157                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12158
12159       try:
12160         size = int(size)
12161       except (TypeError, ValueError), err:
12162         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12163                                    errors.ECODE_INVAL)
12164
12165       params[constants.IDISK_SIZE] = size
12166
12167     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12168       raise errors.OpPrereqError("Disk size change not possible, use"
12169                                  " grow-disk", errors.ECODE_INVAL)
12170
12171   @staticmethod
12172   def _VerifyNicModification(op, params):
12173     """Verifies a network interface modification.
12174
12175     """
12176     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12177       ip = params.get(constants.INIC_IP, None)
12178       if ip is None:
12179         pass
12180       elif ip.lower() == constants.VALUE_NONE:
12181         params[constants.INIC_IP] = None
12182       elif not netutils.IPAddress.IsValid(ip):
12183         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12184                                    errors.ECODE_INVAL)
12185
12186       bridge = params.get("bridge", None)
12187       link = params.get(constants.INIC_LINK, None)
12188       if bridge and link:
12189         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12190                                    " at the same time", errors.ECODE_INVAL)
12191       elif bridge and bridge.lower() == constants.VALUE_NONE:
12192         params["bridge"] = None
12193       elif link and link.lower() == constants.VALUE_NONE:
12194         params[constants.INIC_LINK] = None
12195
12196       if op == constants.DDM_ADD:
12197         macaddr = params.get(constants.INIC_MAC, None)
12198         if macaddr is None:
12199           params[constants.INIC_MAC] = constants.VALUE_AUTO
12200
12201       if constants.INIC_MAC in params:
12202         macaddr = params[constants.INIC_MAC]
12203         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12204           macaddr = utils.NormalizeAndValidateMac(macaddr)
12205
12206         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12207           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12208                                      " modifying an existing NIC",
12209                                      errors.ECODE_INVAL)
12210
12211   def CheckArguments(self):
12212     if not (self.op.nics or self.op.disks or self.op.disk_template or
12213             self.op.hvparams or self.op.beparams or self.op.os_name or
12214             self.op.offline is not None or self.op.runtime_mem):
12215       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12216
12217     if self.op.hvparams:
12218       _CheckGlobalHvParams(self.op.hvparams)
12219
12220     self.op.disks = \
12221       self._UpgradeDiskNicMods("disk", self.op.disks,
12222         opcodes.OpInstanceSetParams.TestDiskModifications)
12223     self.op.nics = \
12224       self._UpgradeDiskNicMods("NIC", self.op.nics,
12225         opcodes.OpInstanceSetParams.TestNicModifications)
12226
12227     # Check disk modifications
12228     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12229                     self._VerifyDiskModification)
12230
12231     if self.op.disks and self.op.disk_template is not None:
12232       raise errors.OpPrereqError("Disk template conversion and other disk"
12233                                  " changes not supported at the same time",
12234                                  errors.ECODE_INVAL)
12235
12236     if (self.op.disk_template and
12237         self.op.disk_template in constants.DTS_INT_MIRROR and
12238         self.op.remote_node is None):
12239       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12240                                  " one requires specifying a secondary node",
12241                                  errors.ECODE_INVAL)
12242
12243     # Check NIC modifications
12244     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12245                     self._VerifyNicModification)
12246
12247   def ExpandNames(self):
12248     self._ExpandAndLockInstance()
12249     # Can't even acquire node locks in shared mode as upcoming changes in
12250     # Ganeti 2.6 will start to modify the node object on disk conversion
12251     self.needed_locks[locking.LEVEL_NODE] = []
12252     self.needed_locks[locking.LEVEL_NODE_RES] = []
12253     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12254
12255   def DeclareLocks(self, level):
12256     # TODO: Acquire group lock in shared mode (disk parameters)
12257     if level == locking.LEVEL_NODE:
12258       self._LockInstancesNodes()
12259       if self.op.disk_template and self.op.remote_node:
12260         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12261         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12262     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12263       # Copy node locks
12264       self.needed_locks[locking.LEVEL_NODE_RES] = \
12265         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12266
12267   def BuildHooksEnv(self):
12268     """Build hooks env.
12269
12270     This runs on the master, primary and secondaries.
12271
12272     """
12273     args = dict()
12274     if constants.BE_MINMEM in self.be_new:
12275       args["minmem"] = self.be_new[constants.BE_MINMEM]
12276     if constants.BE_MAXMEM in self.be_new:
12277       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12278     if constants.BE_VCPUS in self.be_new:
12279       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12280     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12281     # information at all.
12282
12283     if self._new_nics is not None:
12284       nics = []
12285
12286       for nic in self._new_nics:
12287         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12288         mode = nicparams[constants.NIC_MODE]
12289         link = nicparams[constants.NIC_LINK]
12290         nics.append((nic.ip, nic.mac, mode, link))
12291
12292       args["nics"] = nics
12293
12294     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12295     if self.op.disk_template:
12296       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12297     if self.op.runtime_mem:
12298       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12299
12300     return env
12301
12302   def BuildHooksNodes(self):
12303     """Build hooks nodes.
12304
12305     """
12306     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12307     return (nl, nl)
12308
12309   def _PrepareNicModification(self, params, private, old_ip, old_params,
12310                               cluster, pnode):
12311     update_params_dict = dict([(key, params[key])
12312                                for key in constants.NICS_PARAMETERS
12313                                if key in params])
12314
12315     if "bridge" in params:
12316       update_params_dict[constants.NIC_LINK] = params["bridge"]
12317
12318     new_params = _GetUpdatedParams(old_params, update_params_dict)
12319     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12320
12321     new_filled_params = cluster.SimpleFillNIC(new_params)
12322     objects.NIC.CheckParameterSyntax(new_filled_params)
12323
12324     new_mode = new_filled_params[constants.NIC_MODE]
12325     if new_mode == constants.NIC_MODE_BRIDGED:
12326       bridge = new_filled_params[constants.NIC_LINK]
12327       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12328       if msg:
12329         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12330         if self.op.force:
12331           self.warn.append(msg)
12332         else:
12333           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12334
12335     elif new_mode == constants.NIC_MODE_ROUTED:
12336       ip = params.get(constants.INIC_IP, old_ip)
12337       if ip is None:
12338         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12339                                    " on a routed NIC", errors.ECODE_INVAL)
12340
12341     if constants.INIC_MAC in params:
12342       mac = params[constants.INIC_MAC]
12343       if mac is None:
12344         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12345                                    errors.ECODE_INVAL)
12346       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12347         # otherwise generate the MAC address
12348         params[constants.INIC_MAC] = \
12349           self.cfg.GenerateMAC(self.proc.GetECId())
12350       else:
12351         # or validate/reserve the current one
12352         try:
12353           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12354         except errors.ReservationError:
12355           raise errors.OpPrereqError("MAC address '%s' already in use"
12356                                      " in cluster" % mac,
12357                                      errors.ECODE_NOTUNIQUE)
12358
12359     private.params = new_params
12360     private.filled = new_filled_params
12361
12362   def CheckPrereq(self):
12363     """Check prerequisites.
12364
12365     This only checks the instance list against the existing names.
12366
12367     """
12368     # checking the new params on the primary/secondary nodes
12369
12370     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12371     cluster = self.cluster = self.cfg.GetClusterInfo()
12372     assert self.instance is not None, \
12373       "Cannot retrieve locked instance %s" % self.op.instance_name
12374     pnode = instance.primary_node
12375     nodelist = list(instance.all_nodes)
12376     pnode_info = self.cfg.GetNodeInfo(pnode)
12377     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12378
12379     # Prepare disk/NIC modifications
12380     self.diskmod = PrepareContainerMods(self.op.disks, None)
12381     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12382
12383     # OS change
12384     if self.op.os_name and not self.op.force:
12385       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12386                       self.op.force_variant)
12387       instance_os = self.op.os_name
12388     else:
12389       instance_os = instance.os
12390
12391     assert not (self.op.disk_template and self.op.disks), \
12392       "Can't modify disk template and apply disk changes at the same time"
12393
12394     if self.op.disk_template:
12395       if instance.disk_template == self.op.disk_template:
12396         raise errors.OpPrereqError("Instance already has disk template %s" %
12397                                    instance.disk_template, errors.ECODE_INVAL)
12398
12399       if (instance.disk_template,
12400           self.op.disk_template) not in self._DISK_CONVERSIONS:
12401         raise errors.OpPrereqError("Unsupported disk template conversion from"
12402                                    " %s to %s" % (instance.disk_template,
12403                                                   self.op.disk_template),
12404                                    errors.ECODE_INVAL)
12405       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12406                           msg="cannot change disk template")
12407       if self.op.disk_template in constants.DTS_INT_MIRROR:
12408         if self.op.remote_node == pnode:
12409           raise errors.OpPrereqError("Given new secondary node %s is the same"
12410                                      " as the primary node of the instance" %
12411                                      self.op.remote_node, errors.ECODE_STATE)
12412         _CheckNodeOnline(self, self.op.remote_node)
12413         _CheckNodeNotDrained(self, self.op.remote_node)
12414         # FIXME: here we assume that the old instance type is DT_PLAIN
12415         assert instance.disk_template == constants.DT_PLAIN
12416         disks = [{constants.IDISK_SIZE: d.size,
12417                   constants.IDISK_VG: d.logical_id[0]}
12418                  for d in instance.disks]
12419         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12420         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12421
12422         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12423         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12424         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12425         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12426                                 ignore=self.op.ignore_ipolicy)
12427         if pnode_info.group != snode_info.group:
12428           self.LogWarning("The primary and secondary nodes are in two"
12429                           " different node groups; the disk parameters"
12430                           " from the first disk's node group will be"
12431                           " used")
12432
12433     # hvparams processing
12434     if self.op.hvparams:
12435       hv_type = instance.hypervisor
12436       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12437       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12438       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12439
12440       # local check
12441       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12442       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12443       self.hv_proposed = self.hv_new = hv_new # the new actual values
12444       self.hv_inst = i_hvdict # the new dict (without defaults)
12445     else:
12446       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12447                                               instance.hvparams)
12448       self.hv_new = self.hv_inst = {}
12449
12450     # beparams processing
12451     if self.op.beparams:
12452       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12453                                    use_none=True)
12454       objects.UpgradeBeParams(i_bedict)
12455       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12456       be_new = cluster.SimpleFillBE(i_bedict)
12457       self.be_proposed = self.be_new = be_new # the new actual values
12458       self.be_inst = i_bedict # the new dict (without defaults)
12459     else:
12460       self.be_new = self.be_inst = {}
12461       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12462     be_old = cluster.FillBE(instance)
12463
12464     # CPU param validation -- checking every time a parameter is
12465     # changed to cover all cases where either CPU mask or vcpus have
12466     # changed
12467     if (constants.BE_VCPUS in self.be_proposed and
12468         constants.HV_CPU_MASK in self.hv_proposed):
12469       cpu_list = \
12470         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12471       # Verify mask is consistent with number of vCPUs. Can skip this
12472       # test if only 1 entry in the CPU mask, which means same mask
12473       # is applied to all vCPUs.
12474       if (len(cpu_list) > 1 and
12475           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12476         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12477                                    " CPU mask [%s]" %
12478                                    (self.be_proposed[constants.BE_VCPUS],
12479                                     self.hv_proposed[constants.HV_CPU_MASK]),
12480                                    errors.ECODE_INVAL)
12481
12482       # Only perform this test if a new CPU mask is given
12483       if constants.HV_CPU_MASK in self.hv_new:
12484         # Calculate the largest CPU number requested
12485         max_requested_cpu = max(map(max, cpu_list))
12486         # Check that all of the instance's nodes have enough physical CPUs to
12487         # satisfy the requested CPU mask
12488         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12489                                 max_requested_cpu + 1, instance.hypervisor)
12490
12491     # osparams processing
12492     if self.op.osparams:
12493       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12494       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12495       self.os_inst = i_osdict # the new dict (without defaults)
12496     else:
12497       self.os_inst = {}
12498
12499     self.warn = []
12500
12501     #TODO(dynmem): do the appropriate check involving MINMEM
12502     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12503         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12504       mem_check_list = [pnode]
12505       if be_new[constants.BE_AUTO_BALANCE]:
12506         # either we changed auto_balance to yes or it was from before
12507         mem_check_list.extend(instance.secondary_nodes)
12508       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12509                                                   instance.hypervisor)
12510       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12511                                          [instance.hypervisor])
12512       pninfo = nodeinfo[pnode]
12513       msg = pninfo.fail_msg
12514       if msg:
12515         # Assume the primary node is unreachable and go ahead
12516         self.warn.append("Can't get info from primary node %s: %s" %
12517                          (pnode, msg))
12518       else:
12519         (_, _, (pnhvinfo, )) = pninfo.payload
12520         if not isinstance(pnhvinfo.get("memory_free", None), int):
12521           self.warn.append("Node data from primary node %s doesn't contain"
12522                            " free memory information" % pnode)
12523         elif instance_info.fail_msg:
12524           self.warn.append("Can't get instance runtime information: %s" %
12525                           instance_info.fail_msg)
12526         else:
12527           if instance_info.payload:
12528             current_mem = int(instance_info.payload["memory"])
12529           else:
12530             # Assume instance not running
12531             # (there is a slight race condition here, but it's not very
12532             # probable, and we have no other way to check)
12533             # TODO: Describe race condition
12534             current_mem = 0
12535           #TODO(dynmem): do the appropriate check involving MINMEM
12536           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12537                       pnhvinfo["memory_free"])
12538           if miss_mem > 0:
12539             raise errors.OpPrereqError("This change will prevent the instance"
12540                                        " from starting, due to %d MB of memory"
12541                                        " missing on its primary node" %
12542                                        miss_mem,
12543                                        errors.ECODE_NORES)
12544
12545       if be_new[constants.BE_AUTO_BALANCE]:
12546         for node, nres in nodeinfo.items():
12547           if node not in instance.secondary_nodes:
12548             continue
12549           nres.Raise("Can't get info from secondary node %s" % node,
12550                      prereq=True, ecode=errors.ECODE_STATE)
12551           (_, _, (nhvinfo, )) = nres.payload
12552           if not isinstance(nhvinfo.get("memory_free", None), int):
12553             raise errors.OpPrereqError("Secondary node %s didn't return free"
12554                                        " memory information" % node,
12555                                        errors.ECODE_STATE)
12556           #TODO(dynmem): do the appropriate check involving MINMEM
12557           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12558             raise errors.OpPrereqError("This change will prevent the instance"
12559                                        " from failover to its secondary node"
12560                                        " %s, due to not enough memory" % node,
12561                                        errors.ECODE_STATE)
12562
12563     if self.op.runtime_mem:
12564       remote_info = self.rpc.call_instance_info(instance.primary_node,
12565                                                 instance.name,
12566                                                 instance.hypervisor)
12567       remote_info.Raise("Error checking node %s" % instance.primary_node)
12568       if not remote_info.payload: # not running already
12569         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12570                                    errors.ECODE_STATE)
12571
12572       current_memory = remote_info.payload["memory"]
12573       if (not self.op.force and
12574            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12575             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12576         raise errors.OpPrereqError("Instance %s must have memory between %d"
12577                                    " and %d MB of memory unless --force is"
12578                                    " given" % (instance.name,
12579                                     self.be_proposed[constants.BE_MINMEM],
12580                                     self.be_proposed[constants.BE_MAXMEM]),
12581                                    errors.ECODE_INVAL)
12582
12583       if self.op.runtime_mem > current_memory:
12584         _CheckNodeFreeMemory(self, instance.primary_node,
12585                              "ballooning memory for instance %s" %
12586                              instance.name,
12587                              self.op.memory - current_memory,
12588                              instance.hypervisor)
12589
12590     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12591       raise errors.OpPrereqError("Disk operations not supported for"
12592                                  " diskless instances",
12593                                  errors.ECODE_INVAL)
12594
12595     def _PrepareNicCreate(_, params, private):
12596       self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12597       return (None, None)
12598
12599     def _PrepareNicMod(_, nic, params, private):
12600       self._PrepareNicModification(params, private, nic.ip,
12601                                    nic.nicparams, cluster, pnode)
12602       return None
12603
12604     # Verify NIC changes (operating on copy)
12605     nics = instance.nics[:]
12606     ApplyContainerMods("NIC", nics, None, self.nicmod,
12607                        _PrepareNicCreate, _PrepareNicMod, None)
12608     if len(nics) > constants.MAX_NICS:
12609       raise errors.OpPrereqError("Instance has too many network interfaces"
12610                                  " (%d), cannot add more" % constants.MAX_NICS,
12611                                  errors.ECODE_STATE)
12612
12613     # Verify disk changes (operating on a copy)
12614     disks = instance.disks[:]
12615     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12616     if len(disks) > constants.MAX_DISKS:
12617       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12618                                  " more" % constants.MAX_DISKS,
12619                                  errors.ECODE_STATE)
12620
12621     if self.op.offline is not None:
12622       if self.op.offline:
12623         msg = "can't change to offline"
12624       else:
12625         msg = "can't change to online"
12626       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12627
12628     # Pre-compute NIC changes (necessary to use result in hooks)
12629     self._nic_chgdesc = []
12630     if self.nicmod:
12631       # Operate on copies as this is still in prereq
12632       nics = [nic.Copy() for nic in instance.nics]
12633       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12634                          self._CreateNewNic, self._ApplyNicMods, None)
12635       self._new_nics = nics
12636     else:
12637       self._new_nics = None
12638
12639   def _ConvertPlainToDrbd(self, feedback_fn):
12640     """Converts an instance from plain to drbd.
12641
12642     """
12643     feedback_fn("Converting template to drbd")
12644     instance = self.instance
12645     pnode = instance.primary_node
12646     snode = self.op.remote_node
12647
12648     assert instance.disk_template == constants.DT_PLAIN
12649
12650     # create a fake disk info for _GenerateDiskTemplate
12651     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12652                   constants.IDISK_VG: d.logical_id[0]}
12653                  for d in instance.disks]
12654     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12655                                       instance.name, pnode, [snode],
12656                                       disk_info, None, None, 0, feedback_fn,
12657                                       self.diskparams)
12658     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12659                                         self.diskparams)
12660     info = _GetInstanceInfoText(instance)
12661     feedback_fn("Creating additional volumes...")
12662     # first, create the missing data and meta devices
12663     for disk in anno_disks:
12664       # unfortunately this is... not too nice
12665       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12666                             info, True)
12667       for child in disk.children:
12668         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12669     # at this stage, all new LVs have been created, we can rename the
12670     # old ones
12671     feedback_fn("Renaming original volumes...")
12672     rename_list = [(o, n.children[0].logical_id)
12673                    for (o, n) in zip(instance.disks, new_disks)]
12674     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12675     result.Raise("Failed to rename original LVs")
12676
12677     feedback_fn("Initializing DRBD devices...")
12678     # all child devices are in place, we can now create the DRBD devices
12679     for disk in anno_disks:
12680       for node in [pnode, snode]:
12681         f_create = node == pnode
12682         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12683
12684     # at this point, the instance has been modified
12685     instance.disk_template = constants.DT_DRBD8
12686     instance.disks = new_disks
12687     self.cfg.Update(instance, feedback_fn)
12688
12689     # Release node locks while waiting for sync
12690     _ReleaseLocks(self, locking.LEVEL_NODE)
12691
12692     # disks are created, waiting for sync
12693     disk_abort = not _WaitForSync(self, instance,
12694                                   oneshot=not self.op.wait_for_sync)
12695     if disk_abort:
12696       raise errors.OpExecError("There are some degraded disks for"
12697                                " this instance, please cleanup manually")
12698
12699     # Node resource locks will be released by caller
12700
12701   def _ConvertDrbdToPlain(self, feedback_fn):
12702     """Converts an instance from drbd to plain.
12703
12704     """
12705     instance = self.instance
12706
12707     assert len(instance.secondary_nodes) == 1
12708     assert instance.disk_template == constants.DT_DRBD8
12709
12710     pnode = instance.primary_node
12711     snode = instance.secondary_nodes[0]
12712     feedback_fn("Converting template to plain")
12713
12714     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12715     new_disks = [d.children[0] for d in instance.disks]
12716
12717     # copy over size and mode
12718     for parent, child in zip(old_disks, new_disks):
12719       child.size = parent.size
12720       child.mode = parent.mode
12721
12722     # this is a DRBD disk, return its port to the pool
12723     # NOTE: this must be done right before the call to cfg.Update!
12724     for disk in old_disks:
12725       tcp_port = disk.logical_id[2]
12726       self.cfg.AddTcpUdpPort(tcp_port)
12727
12728     # update instance structure
12729     instance.disks = new_disks
12730     instance.disk_template = constants.DT_PLAIN
12731     self.cfg.Update(instance, feedback_fn)
12732
12733     # Release locks in case removing disks takes a while
12734     _ReleaseLocks(self, locking.LEVEL_NODE)
12735
12736     feedback_fn("Removing volumes on the secondary node...")
12737     for disk in old_disks:
12738       self.cfg.SetDiskID(disk, snode)
12739       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12740       if msg:
12741         self.LogWarning("Could not remove block device %s on node %s,"
12742                         " continuing anyway: %s", disk.iv_name, snode, msg)
12743
12744     feedback_fn("Removing unneeded volumes on the primary node...")
12745     for idx, disk in enumerate(old_disks):
12746       meta = disk.children[1]
12747       self.cfg.SetDiskID(meta, pnode)
12748       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12749       if msg:
12750         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12751                         " continuing anyway: %s", idx, pnode, msg)
12752
12753   def _CreateNewDisk(self, idx, params, _):
12754     """Creates a new disk.
12755
12756     """
12757     instance = self.instance
12758
12759     # add a new disk
12760     if instance.disk_template in constants.DTS_FILEBASED:
12761       (file_driver, file_path) = instance.disks[0].logical_id
12762       file_path = os.path.dirname(file_path)
12763     else:
12764       file_driver = file_path = None
12765
12766     disk = \
12767       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12768                             instance.primary_node, instance.secondary_nodes,
12769                             [params], file_path, file_driver, idx,
12770                             self.Log, self.diskparams)[0]
12771
12772     info = _GetInstanceInfoText(instance)
12773
12774     logging.info("Creating volume %s for instance %s",
12775                  disk.iv_name, instance.name)
12776     # Note: this needs to be kept in sync with _CreateDisks
12777     #HARDCODE
12778     for node in instance.all_nodes:
12779       f_create = (node == instance.primary_node)
12780       try:
12781         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12782       except errors.OpExecError, err:
12783         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12784                         disk.iv_name, disk, node, err)
12785
12786     return (disk, [
12787       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12788       ])
12789
12790   @staticmethod
12791   def _ModifyDisk(idx, disk, params, _):
12792     """Modifies a disk.
12793
12794     """
12795     disk.mode = params[constants.IDISK_MODE]
12796
12797     return [
12798       ("disk.mode/%d" % idx, disk.mode),
12799       ]
12800
12801   def _RemoveDisk(self, idx, root, _):
12802     """Removes a disk.
12803
12804     """
12805     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12806     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12807       self.cfg.SetDiskID(disk, node)
12808       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12809       if msg:
12810         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12811                         " continuing anyway", idx, node, msg)
12812
12813     # if this is a DRBD disk, return its port to the pool
12814     if root.dev_type in constants.LDS_DRBD:
12815       self.cfg.AddTcpUdpPort(root.logical_id[2])
12816
12817   @staticmethod
12818   def _CreateNewNic(idx, params, private):
12819     """Creates data structure for a new network interface.
12820
12821     """
12822     mac = params[constants.INIC_MAC]
12823     ip = params.get(constants.INIC_IP, None)
12824     nicparams = private.params
12825
12826     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12827       ("nic.%d" % idx,
12828        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12829        (mac, ip, private.filled[constants.NIC_MODE],
12830        private.filled[constants.NIC_LINK])),
12831       ])
12832
12833   @staticmethod
12834   def _ApplyNicMods(idx, nic, params, private):
12835     """Modifies a network interface.
12836
12837     """
12838     changes = []
12839
12840     for key in [constants.INIC_MAC, constants.INIC_IP]:
12841       if key in params:
12842         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12843         setattr(nic, key, params[key])
12844
12845     if private.params:
12846       nic.nicparams = private.params
12847
12848       for (key, val) in params.items():
12849         changes.append(("nic.%s/%d" % (key, idx), val))
12850
12851     return changes
12852
12853   def Exec(self, feedback_fn):
12854     """Modifies an instance.
12855
12856     All parameters take effect only at the next restart of the instance.
12857
12858     """
12859     # Process here the warnings from CheckPrereq, as we don't have a
12860     # feedback_fn there.
12861     # TODO: Replace with self.LogWarning
12862     for warn in self.warn:
12863       feedback_fn("WARNING: %s" % warn)
12864
12865     assert ((self.op.disk_template is None) ^
12866             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12867       "Not owning any node resource locks"
12868
12869     result = []
12870     instance = self.instance
12871
12872     # runtime memory
12873     if self.op.runtime_mem:
12874       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12875                                                      instance,
12876                                                      self.op.runtime_mem)
12877       rpcres.Raise("Cannot modify instance runtime memory")
12878       result.append(("runtime_memory", self.op.runtime_mem))
12879
12880     # Apply disk changes
12881     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12882                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12883     _UpdateIvNames(0, instance.disks)
12884
12885     if self.op.disk_template:
12886       if __debug__:
12887         check_nodes = set(instance.all_nodes)
12888         if self.op.remote_node:
12889           check_nodes.add(self.op.remote_node)
12890         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12891           owned = self.owned_locks(level)
12892           assert not (check_nodes - owned), \
12893             ("Not owning the correct locks, owning %r, expected at least %r" %
12894              (owned, check_nodes))
12895
12896       r_shut = _ShutdownInstanceDisks(self, instance)
12897       if not r_shut:
12898         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12899                                  " proceed with disk template conversion")
12900       mode = (instance.disk_template, self.op.disk_template)
12901       try:
12902         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12903       except:
12904         self.cfg.ReleaseDRBDMinors(instance.name)
12905         raise
12906       result.append(("disk_template", self.op.disk_template))
12907
12908       assert instance.disk_template == self.op.disk_template, \
12909         ("Expected disk template '%s', found '%s'" %
12910          (self.op.disk_template, instance.disk_template))
12911
12912     # Release node and resource locks if there are any (they might already have
12913     # been released during disk conversion)
12914     _ReleaseLocks(self, locking.LEVEL_NODE)
12915     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12916
12917     # Apply NIC changes
12918     if self._new_nics is not None:
12919       instance.nics = self._new_nics
12920       result.extend(self._nic_chgdesc)
12921
12922     # hvparams changes
12923     if self.op.hvparams:
12924       instance.hvparams = self.hv_inst
12925       for key, val in self.op.hvparams.iteritems():
12926         result.append(("hv/%s" % key, val))
12927
12928     # beparams changes
12929     if self.op.beparams:
12930       instance.beparams = self.be_inst
12931       for key, val in self.op.beparams.iteritems():
12932         result.append(("be/%s" % key, val))
12933
12934     # OS change
12935     if self.op.os_name:
12936       instance.os = self.op.os_name
12937
12938     # osparams changes
12939     if self.op.osparams:
12940       instance.osparams = self.os_inst
12941       for key, val in self.op.osparams.iteritems():
12942         result.append(("os/%s" % key, val))
12943
12944     if self.op.offline is None:
12945       # Ignore
12946       pass
12947     elif self.op.offline:
12948       # Mark instance as offline
12949       self.cfg.MarkInstanceOffline(instance.name)
12950       result.append(("admin_state", constants.ADMINST_OFFLINE))
12951     else:
12952       # Mark instance as online, but stopped
12953       self.cfg.MarkInstanceDown(instance.name)
12954       result.append(("admin_state", constants.ADMINST_DOWN))
12955
12956     self.cfg.Update(instance, feedback_fn)
12957
12958     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12959                 self.owned_locks(locking.LEVEL_NODE)), \
12960       "All node locks should have been released by now"
12961
12962     return result
12963
12964   _DISK_CONVERSIONS = {
12965     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12966     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12967     }
12968
12969
12970 class LUInstanceChangeGroup(LogicalUnit):
12971   HPATH = "instance-change-group"
12972   HTYPE = constants.HTYPE_INSTANCE
12973   REQ_BGL = False
12974
12975   def ExpandNames(self):
12976     self.share_locks = _ShareAll()
12977     self.needed_locks = {
12978       locking.LEVEL_NODEGROUP: [],
12979       locking.LEVEL_NODE: [],
12980       }
12981
12982     self._ExpandAndLockInstance()
12983
12984     if self.op.target_groups:
12985       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12986                                   self.op.target_groups)
12987     else:
12988       self.req_target_uuids = None
12989
12990     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12991
12992   def DeclareLocks(self, level):
12993     if level == locking.LEVEL_NODEGROUP:
12994       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12995
12996       if self.req_target_uuids:
12997         lock_groups = set(self.req_target_uuids)
12998
12999         # Lock all groups used by instance optimistically; this requires going
13000         # via the node before it's locked, requiring verification later on
13001         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13002         lock_groups.update(instance_groups)
13003       else:
13004         # No target groups, need to lock all of them
13005         lock_groups = locking.ALL_SET
13006
13007       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13008
13009     elif level == locking.LEVEL_NODE:
13010       if self.req_target_uuids:
13011         # Lock all nodes used by instances
13012         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13013         self._LockInstancesNodes()
13014
13015         # Lock all nodes in all potential target groups
13016         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13017                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13018         member_nodes = [node_name
13019                         for group in lock_groups
13020                         for node_name in self.cfg.GetNodeGroup(group).members]
13021         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13022       else:
13023         # Lock all nodes as all groups are potential targets
13024         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13025
13026   def CheckPrereq(self):
13027     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13028     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13029     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13030
13031     assert (self.req_target_uuids is None or
13032             owned_groups.issuperset(self.req_target_uuids))
13033     assert owned_instances == set([self.op.instance_name])
13034
13035     # Get instance information
13036     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13037
13038     # Check if node groups for locked instance are still correct
13039     assert owned_nodes.issuperset(self.instance.all_nodes), \
13040       ("Instance %s's nodes changed while we kept the lock" %
13041        self.op.instance_name)
13042
13043     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13044                                            owned_groups)
13045
13046     if self.req_target_uuids:
13047       # User requested specific target groups
13048       self.target_uuids = frozenset(self.req_target_uuids)
13049     else:
13050       # All groups except those used by the instance are potential targets
13051       self.target_uuids = owned_groups - inst_groups
13052
13053     conflicting_groups = self.target_uuids & inst_groups
13054     if conflicting_groups:
13055       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13056                                  " used by the instance '%s'" %
13057                                  (utils.CommaJoin(conflicting_groups),
13058                                   self.op.instance_name),
13059                                  errors.ECODE_INVAL)
13060
13061     if not self.target_uuids:
13062       raise errors.OpPrereqError("There are no possible target groups",
13063                                  errors.ECODE_INVAL)
13064
13065   def BuildHooksEnv(self):
13066     """Build hooks env.
13067
13068     """
13069     assert self.target_uuids
13070
13071     env = {
13072       "TARGET_GROUPS": " ".join(self.target_uuids),
13073       }
13074
13075     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13076
13077     return env
13078
13079   def BuildHooksNodes(self):
13080     """Build hooks nodes.
13081
13082     """
13083     mn = self.cfg.GetMasterNode()
13084     return ([mn], [mn])
13085
13086   def Exec(self, feedback_fn):
13087     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13088
13089     assert instances == [self.op.instance_name], "Instance not locked"
13090
13091     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13092                      instances=instances, target_groups=list(self.target_uuids))
13093
13094     ial.Run(self.op.iallocator)
13095
13096     if not ial.success:
13097       raise errors.OpPrereqError("Can't compute solution for changing group of"
13098                                  " instance '%s' using iallocator '%s': %s" %
13099                                  (self.op.instance_name, self.op.iallocator,
13100                                   ial.info),
13101                                  errors.ECODE_NORES)
13102
13103     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13104
13105     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13106                  " instance '%s'", len(jobs), self.op.instance_name)
13107
13108     return ResultWithJobs(jobs)
13109
13110
13111 class LUBackupQuery(NoHooksLU):
13112   """Query the exports list
13113
13114   """
13115   REQ_BGL = False
13116
13117   def CheckArguments(self):
13118     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13119                              ["node", "export"], self.op.use_locking)
13120
13121   def ExpandNames(self):
13122     self.expq.ExpandNames(self)
13123
13124   def DeclareLocks(self, level):
13125     self.expq.DeclareLocks(self, level)
13126
13127   def Exec(self, feedback_fn):
13128     result = {}
13129
13130     for (node, expname) in self.expq.OldStyleQuery(self):
13131       if expname is None:
13132         result[node] = False
13133       else:
13134         result.setdefault(node, []).append(expname)
13135
13136     return result
13137
13138
13139 class _ExportQuery(_QueryBase):
13140   FIELDS = query.EXPORT_FIELDS
13141
13142   #: The node name is not a unique key for this query
13143   SORT_FIELD = "node"
13144
13145   def ExpandNames(self, lu):
13146     lu.needed_locks = {}
13147
13148     # The following variables interact with _QueryBase._GetNames
13149     if self.names:
13150       self.wanted = _GetWantedNodes(lu, self.names)
13151     else:
13152       self.wanted = locking.ALL_SET
13153
13154     self.do_locking = self.use_locking
13155
13156     if self.do_locking:
13157       lu.share_locks = _ShareAll()
13158       lu.needed_locks = {
13159         locking.LEVEL_NODE: self.wanted,
13160         }
13161
13162   def DeclareLocks(self, lu, level):
13163     pass
13164
13165   def _GetQueryData(self, lu):
13166     """Computes the list of nodes and their attributes.
13167
13168     """
13169     # Locking is not used
13170     # TODO
13171     assert not (compat.any(lu.glm.is_owned(level)
13172                            for level in locking.LEVELS
13173                            if level != locking.LEVEL_CLUSTER) or
13174                 self.do_locking or self.use_locking)
13175
13176     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13177
13178     result = []
13179
13180     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13181       if nres.fail_msg:
13182         result.append((node, None))
13183       else:
13184         result.extend((node, expname) for expname in nres.payload)
13185
13186     return result
13187
13188
13189 class LUBackupPrepare(NoHooksLU):
13190   """Prepares an instance for an export and returns useful information.
13191
13192   """
13193   REQ_BGL = False
13194
13195   def ExpandNames(self):
13196     self._ExpandAndLockInstance()
13197
13198   def CheckPrereq(self):
13199     """Check prerequisites.
13200
13201     """
13202     instance_name = self.op.instance_name
13203
13204     self.instance = self.cfg.GetInstanceInfo(instance_name)
13205     assert self.instance is not None, \
13206           "Cannot retrieve locked instance %s" % self.op.instance_name
13207     _CheckNodeOnline(self, self.instance.primary_node)
13208
13209     self._cds = _GetClusterDomainSecret()
13210
13211   def Exec(self, feedback_fn):
13212     """Prepares an instance for an export.
13213
13214     """
13215     instance = self.instance
13216
13217     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13218       salt = utils.GenerateSecret(8)
13219
13220       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13221       result = self.rpc.call_x509_cert_create(instance.primary_node,
13222                                               constants.RIE_CERT_VALIDITY)
13223       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13224
13225       (name, cert_pem) = result.payload
13226
13227       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13228                                              cert_pem)
13229
13230       return {
13231         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13232         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13233                           salt),
13234         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13235         }
13236
13237     return None
13238
13239
13240 class LUBackupExport(LogicalUnit):
13241   """Export an instance to an image in the cluster.
13242
13243   """
13244   HPATH = "instance-export"
13245   HTYPE = constants.HTYPE_INSTANCE
13246   REQ_BGL = False
13247
13248   def CheckArguments(self):
13249     """Check the arguments.
13250
13251     """
13252     self.x509_key_name = self.op.x509_key_name
13253     self.dest_x509_ca_pem = self.op.destination_x509_ca
13254
13255     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13256       if not self.x509_key_name:
13257         raise errors.OpPrereqError("Missing X509 key name for encryption",
13258                                    errors.ECODE_INVAL)
13259
13260       if not self.dest_x509_ca_pem:
13261         raise errors.OpPrereqError("Missing destination X509 CA",
13262                                    errors.ECODE_INVAL)
13263
13264   def ExpandNames(self):
13265     self._ExpandAndLockInstance()
13266
13267     # Lock all nodes for local exports
13268     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13269       # FIXME: lock only instance primary and destination node
13270       #
13271       # Sad but true, for now we have do lock all nodes, as we don't know where
13272       # the previous export might be, and in this LU we search for it and
13273       # remove it from its current node. In the future we could fix this by:
13274       #  - making a tasklet to search (share-lock all), then create the
13275       #    new one, then one to remove, after
13276       #  - removing the removal operation altogether
13277       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13278
13279   def DeclareLocks(self, level):
13280     """Last minute lock declaration."""
13281     # All nodes are locked anyway, so nothing to do here.
13282
13283   def BuildHooksEnv(self):
13284     """Build hooks env.
13285
13286     This will run on the master, primary node and target node.
13287
13288     """
13289     env = {
13290       "EXPORT_MODE": self.op.mode,
13291       "EXPORT_NODE": self.op.target_node,
13292       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13293       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13294       # TODO: Generic function for boolean env variables
13295       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13296       }
13297
13298     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13299
13300     return env
13301
13302   def BuildHooksNodes(self):
13303     """Build hooks nodes.
13304
13305     """
13306     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13307
13308     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13309       nl.append(self.op.target_node)
13310
13311     return (nl, nl)
13312
13313   def CheckPrereq(self):
13314     """Check prerequisites.
13315
13316     This checks that the instance and node names are valid.
13317
13318     """
13319     instance_name = self.op.instance_name
13320
13321     self.instance = self.cfg.GetInstanceInfo(instance_name)
13322     assert self.instance is not None, \
13323           "Cannot retrieve locked instance %s" % self.op.instance_name
13324     _CheckNodeOnline(self, self.instance.primary_node)
13325
13326     if (self.op.remove_instance and
13327         self.instance.admin_state == constants.ADMINST_UP and
13328         not self.op.shutdown):
13329       raise errors.OpPrereqError("Can not remove instance without shutting it"
13330                                  " down before")
13331
13332     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13333       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13334       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13335       assert self.dst_node is not None
13336
13337       _CheckNodeOnline(self, self.dst_node.name)
13338       _CheckNodeNotDrained(self, self.dst_node.name)
13339
13340       self._cds = None
13341       self.dest_disk_info = None
13342       self.dest_x509_ca = None
13343
13344     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13345       self.dst_node = None
13346
13347       if len(self.op.target_node) != len(self.instance.disks):
13348         raise errors.OpPrereqError(("Received destination information for %s"
13349                                     " disks, but instance %s has %s disks") %
13350                                    (len(self.op.target_node), instance_name,
13351                                     len(self.instance.disks)),
13352                                    errors.ECODE_INVAL)
13353
13354       cds = _GetClusterDomainSecret()
13355
13356       # Check X509 key name
13357       try:
13358         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13359       except (TypeError, ValueError), err:
13360         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13361
13362       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13363         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13364                                    errors.ECODE_INVAL)
13365
13366       # Load and verify CA
13367       try:
13368         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13369       except OpenSSL.crypto.Error, err:
13370         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13371                                    (err, ), errors.ECODE_INVAL)
13372
13373       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13374       if errcode is not None:
13375         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13376                                    (msg, ), errors.ECODE_INVAL)
13377
13378       self.dest_x509_ca = cert
13379
13380       # Verify target information
13381       disk_info = []
13382       for idx, disk_data in enumerate(self.op.target_node):
13383         try:
13384           (host, port, magic) = \
13385             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13386         except errors.GenericError, err:
13387           raise errors.OpPrereqError("Target info for disk %s: %s" %
13388                                      (idx, err), errors.ECODE_INVAL)
13389
13390         disk_info.append((host, port, magic))
13391
13392       assert len(disk_info) == len(self.op.target_node)
13393       self.dest_disk_info = disk_info
13394
13395     else:
13396       raise errors.ProgrammerError("Unhandled export mode %r" %
13397                                    self.op.mode)
13398
13399     # instance disk type verification
13400     # TODO: Implement export support for file-based disks
13401     for disk in self.instance.disks:
13402       if disk.dev_type == constants.LD_FILE:
13403         raise errors.OpPrereqError("Export not supported for instances with"
13404                                    " file-based disks", errors.ECODE_INVAL)
13405
13406   def _CleanupExports(self, feedback_fn):
13407     """Removes exports of current instance from all other nodes.
13408
13409     If an instance in a cluster with nodes A..D was exported to node C, its
13410     exports will be removed from the nodes A, B and D.
13411
13412     """
13413     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13414
13415     nodelist = self.cfg.GetNodeList()
13416     nodelist.remove(self.dst_node.name)
13417
13418     # on one-node clusters nodelist will be empty after the removal
13419     # if we proceed the backup would be removed because OpBackupQuery
13420     # substitutes an empty list with the full cluster node list.
13421     iname = self.instance.name
13422     if nodelist:
13423       feedback_fn("Removing old exports for instance %s" % iname)
13424       exportlist = self.rpc.call_export_list(nodelist)
13425       for node in exportlist:
13426         if exportlist[node].fail_msg:
13427           continue
13428         if iname in exportlist[node].payload:
13429           msg = self.rpc.call_export_remove(node, iname).fail_msg
13430           if msg:
13431             self.LogWarning("Could not remove older export for instance %s"
13432                             " on node %s: %s", iname, node, msg)
13433
13434   def Exec(self, feedback_fn):
13435     """Export an instance to an image in the cluster.
13436
13437     """
13438     assert self.op.mode in constants.EXPORT_MODES
13439
13440     instance = self.instance
13441     src_node = instance.primary_node
13442
13443     if self.op.shutdown:
13444       # shutdown the instance, but not the disks
13445       feedback_fn("Shutting down instance %s" % instance.name)
13446       result = self.rpc.call_instance_shutdown(src_node, instance,
13447                                                self.op.shutdown_timeout)
13448       # TODO: Maybe ignore failures if ignore_remove_failures is set
13449       result.Raise("Could not shutdown instance %s on"
13450                    " node %s" % (instance.name, src_node))
13451
13452     # set the disks ID correctly since call_instance_start needs the
13453     # correct drbd minor to create the symlinks
13454     for disk in instance.disks:
13455       self.cfg.SetDiskID(disk, src_node)
13456
13457     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13458
13459     if activate_disks:
13460       # Activate the instance disks if we'exporting a stopped instance
13461       feedback_fn("Activating disks for %s" % instance.name)
13462       _StartInstanceDisks(self, instance, None)
13463
13464     try:
13465       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13466                                                      instance)
13467
13468       helper.CreateSnapshots()
13469       try:
13470         if (self.op.shutdown and
13471             instance.admin_state == constants.ADMINST_UP and
13472             not self.op.remove_instance):
13473           assert not activate_disks
13474           feedback_fn("Starting instance %s" % instance.name)
13475           result = self.rpc.call_instance_start(src_node,
13476                                                 (instance, None, None), False)
13477           msg = result.fail_msg
13478           if msg:
13479             feedback_fn("Failed to start instance: %s" % msg)
13480             _ShutdownInstanceDisks(self, instance)
13481             raise errors.OpExecError("Could not start instance: %s" % msg)
13482
13483         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13484           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13485         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13486           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13487           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13488
13489           (key_name, _, _) = self.x509_key_name
13490
13491           dest_ca_pem = \
13492             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13493                                             self.dest_x509_ca)
13494
13495           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13496                                                      key_name, dest_ca_pem,
13497                                                      timeouts)
13498       finally:
13499         helper.Cleanup()
13500
13501       # Check for backwards compatibility
13502       assert len(dresults) == len(instance.disks)
13503       assert compat.all(isinstance(i, bool) for i in dresults), \
13504              "Not all results are boolean: %r" % dresults
13505
13506     finally:
13507       if activate_disks:
13508         feedback_fn("Deactivating disks for %s" % instance.name)
13509         _ShutdownInstanceDisks(self, instance)
13510
13511     if not (compat.all(dresults) and fin_resu):
13512       failures = []
13513       if not fin_resu:
13514         failures.append("export finalization")
13515       if not compat.all(dresults):
13516         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13517                                if not dsk)
13518         failures.append("disk export: disk(s) %s" % fdsk)
13519
13520       raise errors.OpExecError("Export failed, errors in %s" %
13521                                utils.CommaJoin(failures))
13522
13523     # At this point, the export was successful, we can cleanup/finish
13524
13525     # Remove instance if requested
13526     if self.op.remove_instance:
13527       feedback_fn("Removing instance %s" % instance.name)
13528       _RemoveInstance(self, feedback_fn, instance,
13529                       self.op.ignore_remove_failures)
13530
13531     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13532       self._CleanupExports(feedback_fn)
13533
13534     return fin_resu, dresults
13535
13536
13537 class LUBackupRemove(NoHooksLU):
13538   """Remove exports related to the named instance.
13539
13540   """
13541   REQ_BGL = False
13542
13543   def ExpandNames(self):
13544     self.needed_locks = {}
13545     # We need all nodes to be locked in order for RemoveExport to work, but we
13546     # don't need to lock the instance itself, as nothing will happen to it (and
13547     # we can remove exports also for a removed instance)
13548     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13549
13550   def Exec(self, feedback_fn):
13551     """Remove any export.
13552
13553     """
13554     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13555     # If the instance was not found we'll try with the name that was passed in.
13556     # This will only work if it was an FQDN, though.
13557     fqdn_warn = False
13558     if not instance_name:
13559       fqdn_warn = True
13560       instance_name = self.op.instance_name
13561
13562     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13563     exportlist = self.rpc.call_export_list(locked_nodes)
13564     found = False
13565     for node in exportlist:
13566       msg = exportlist[node].fail_msg
13567       if msg:
13568         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13569         continue
13570       if instance_name in exportlist[node].payload:
13571         found = True
13572         result = self.rpc.call_export_remove(node, instance_name)
13573         msg = result.fail_msg
13574         if msg:
13575           logging.error("Could not remove export for instance %s"
13576                         " on node %s: %s", instance_name, node, msg)
13577
13578     if fqdn_warn and not found:
13579       feedback_fn("Export not found. If trying to remove an export belonging"
13580                   " to a deleted instance please use its Fully Qualified"
13581                   " Domain Name.")
13582
13583
13584 class LUGroupAdd(LogicalUnit):
13585   """Logical unit for creating node groups.
13586
13587   """
13588   HPATH = "group-add"
13589   HTYPE = constants.HTYPE_GROUP
13590   REQ_BGL = False
13591
13592   def ExpandNames(self):
13593     # We need the new group's UUID here so that we can create and acquire the
13594     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13595     # that it should not check whether the UUID exists in the configuration.
13596     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13597     self.needed_locks = {}
13598     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13599
13600   def CheckPrereq(self):
13601     """Check prerequisites.
13602
13603     This checks that the given group name is not an existing node group
13604     already.
13605
13606     """
13607     try:
13608       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13609     except errors.OpPrereqError:
13610       pass
13611     else:
13612       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13613                                  " node group (UUID: %s)" %
13614                                  (self.op.group_name, existing_uuid),
13615                                  errors.ECODE_EXISTS)
13616
13617     if self.op.ndparams:
13618       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13619
13620     if self.op.hv_state:
13621       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13622     else:
13623       self.new_hv_state = None
13624
13625     if self.op.disk_state:
13626       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13627     else:
13628       self.new_disk_state = None
13629
13630     if self.op.diskparams:
13631       for templ in constants.DISK_TEMPLATES:
13632         if templ in self.op.diskparams:
13633           utils.ForceDictType(self.op.diskparams[templ],
13634                               constants.DISK_DT_TYPES)
13635       self.new_diskparams = self.op.diskparams
13636       try:
13637         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13638       except errors.OpPrereqError, err:
13639         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13640                                    errors.ECODE_INVAL)
13641     else:
13642       self.new_diskparams = {}
13643
13644     if self.op.ipolicy:
13645       cluster = self.cfg.GetClusterInfo()
13646       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13647       try:
13648         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13649       except errors.ConfigurationError, err:
13650         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13651                                    errors.ECODE_INVAL)
13652
13653   def BuildHooksEnv(self):
13654     """Build hooks env.
13655
13656     """
13657     return {
13658       "GROUP_NAME": self.op.group_name,
13659       }
13660
13661   def BuildHooksNodes(self):
13662     """Build hooks nodes.
13663
13664     """
13665     mn = self.cfg.GetMasterNode()
13666     return ([mn], [mn])
13667
13668   def Exec(self, feedback_fn):
13669     """Add the node group to the cluster.
13670
13671     """
13672     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13673                                   uuid=self.group_uuid,
13674                                   alloc_policy=self.op.alloc_policy,
13675                                   ndparams=self.op.ndparams,
13676                                   diskparams=self.new_diskparams,
13677                                   ipolicy=self.op.ipolicy,
13678                                   hv_state_static=self.new_hv_state,
13679                                   disk_state_static=self.new_disk_state)
13680
13681     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13682     del self.remove_locks[locking.LEVEL_NODEGROUP]
13683
13684
13685 class LUGroupAssignNodes(NoHooksLU):
13686   """Logical unit for assigning nodes to groups.
13687
13688   """
13689   REQ_BGL = False
13690
13691   def ExpandNames(self):
13692     # These raise errors.OpPrereqError on their own:
13693     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13694     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13695
13696     # We want to lock all the affected nodes and groups. We have readily
13697     # available the list of nodes, and the *destination* group. To gather the
13698     # list of "source" groups, we need to fetch node information later on.
13699     self.needed_locks = {
13700       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13701       locking.LEVEL_NODE: self.op.nodes,
13702       }
13703
13704   def DeclareLocks(self, level):
13705     if level == locking.LEVEL_NODEGROUP:
13706       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13707
13708       # Try to get all affected nodes' groups without having the group or node
13709       # lock yet. Needs verification later in the code flow.
13710       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13711
13712       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13713
13714   def CheckPrereq(self):
13715     """Check prerequisites.
13716
13717     """
13718     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13719     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13720             frozenset(self.op.nodes))
13721
13722     expected_locks = (set([self.group_uuid]) |
13723                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13724     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13725     if actual_locks != expected_locks:
13726       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13727                                " current groups are '%s', used to be '%s'" %
13728                                (utils.CommaJoin(expected_locks),
13729                                 utils.CommaJoin(actual_locks)))
13730
13731     self.node_data = self.cfg.GetAllNodesInfo()
13732     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13733     instance_data = self.cfg.GetAllInstancesInfo()
13734
13735     if self.group is None:
13736       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13737                                (self.op.group_name, self.group_uuid))
13738
13739     (new_splits, previous_splits) = \
13740       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13741                                              for node in self.op.nodes],
13742                                             self.node_data, instance_data)
13743
13744     if new_splits:
13745       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13746
13747       if not self.op.force:
13748         raise errors.OpExecError("The following instances get split by this"
13749                                  " change and --force was not given: %s" %
13750                                  fmt_new_splits)
13751       else:
13752         self.LogWarning("This operation will split the following instances: %s",
13753                         fmt_new_splits)
13754
13755         if previous_splits:
13756           self.LogWarning("In addition, these already-split instances continue"
13757                           " to be split across groups: %s",
13758                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13759
13760   def Exec(self, feedback_fn):
13761     """Assign nodes to a new group.
13762
13763     """
13764     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13765
13766     self.cfg.AssignGroupNodes(mods)
13767
13768   @staticmethod
13769   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13770     """Check for split instances after a node assignment.
13771
13772     This method considers a series of node assignments as an atomic operation,
13773     and returns information about split instances after applying the set of
13774     changes.
13775
13776     In particular, it returns information about newly split instances, and
13777     instances that were already split, and remain so after the change.
13778
13779     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13780     considered.
13781
13782     @type changes: list of (node_name, new_group_uuid) pairs.
13783     @param changes: list of node assignments to consider.
13784     @param node_data: a dict with data for all nodes
13785     @param instance_data: a dict with all instances to consider
13786     @rtype: a two-tuple
13787     @return: a list of instances that were previously okay and result split as a
13788       consequence of this change, and a list of instances that were previously
13789       split and this change does not fix.
13790
13791     """
13792     changed_nodes = dict((node, group) for node, group in changes
13793                          if node_data[node].group != group)
13794
13795     all_split_instances = set()
13796     previously_split_instances = set()
13797
13798     def InstanceNodes(instance):
13799       return [instance.primary_node] + list(instance.secondary_nodes)
13800
13801     for inst in instance_data.values():
13802       if inst.disk_template not in constants.DTS_INT_MIRROR:
13803         continue
13804
13805       instance_nodes = InstanceNodes(inst)
13806
13807       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13808         previously_split_instances.add(inst.name)
13809
13810       if len(set(changed_nodes.get(node, node_data[node].group)
13811                  for node in instance_nodes)) > 1:
13812         all_split_instances.add(inst.name)
13813
13814     return (list(all_split_instances - previously_split_instances),
13815             list(previously_split_instances & all_split_instances))
13816
13817
13818 class _GroupQuery(_QueryBase):
13819   FIELDS = query.GROUP_FIELDS
13820
13821   def ExpandNames(self, lu):
13822     lu.needed_locks = {}
13823
13824     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13825     self._cluster = lu.cfg.GetClusterInfo()
13826     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13827
13828     if not self.names:
13829       self.wanted = [name_to_uuid[name]
13830                      for name in utils.NiceSort(name_to_uuid.keys())]
13831     else:
13832       # Accept names to be either names or UUIDs.
13833       missing = []
13834       self.wanted = []
13835       all_uuid = frozenset(self._all_groups.keys())
13836
13837       for name in self.names:
13838         if name in all_uuid:
13839           self.wanted.append(name)
13840         elif name in name_to_uuid:
13841           self.wanted.append(name_to_uuid[name])
13842         else:
13843           missing.append(name)
13844
13845       if missing:
13846         raise errors.OpPrereqError("Some groups do not exist: %s" %
13847                                    utils.CommaJoin(missing),
13848                                    errors.ECODE_NOENT)
13849
13850   def DeclareLocks(self, lu, level):
13851     pass
13852
13853   def _GetQueryData(self, lu):
13854     """Computes the list of node groups and their attributes.
13855
13856     """
13857     do_nodes = query.GQ_NODE in self.requested_data
13858     do_instances = query.GQ_INST in self.requested_data
13859
13860     group_to_nodes = None
13861     group_to_instances = None
13862
13863     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13864     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13865     # latter GetAllInstancesInfo() is not enough, for we have to go through
13866     # instance->node. Hence, we will need to process nodes even if we only need
13867     # instance information.
13868     if do_nodes or do_instances:
13869       all_nodes = lu.cfg.GetAllNodesInfo()
13870       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13871       node_to_group = {}
13872
13873       for node in all_nodes.values():
13874         if node.group in group_to_nodes:
13875           group_to_nodes[node.group].append(node.name)
13876           node_to_group[node.name] = node.group
13877
13878       if do_instances:
13879         all_instances = lu.cfg.GetAllInstancesInfo()
13880         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13881
13882         for instance in all_instances.values():
13883           node = instance.primary_node
13884           if node in node_to_group:
13885             group_to_instances[node_to_group[node]].append(instance.name)
13886
13887         if not do_nodes:
13888           # Do not pass on node information if it was not requested.
13889           group_to_nodes = None
13890
13891     return query.GroupQueryData(self._cluster,
13892                                 [self._all_groups[uuid]
13893                                  for uuid in self.wanted],
13894                                 group_to_nodes, group_to_instances,
13895                                 query.GQ_DISKPARAMS in self.requested_data)
13896
13897
13898 class LUGroupQuery(NoHooksLU):
13899   """Logical unit for querying node groups.
13900
13901   """
13902   REQ_BGL = False
13903
13904   def CheckArguments(self):
13905     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13906                           self.op.output_fields, False)
13907
13908   def ExpandNames(self):
13909     self.gq.ExpandNames(self)
13910
13911   def DeclareLocks(self, level):
13912     self.gq.DeclareLocks(self, level)
13913
13914   def Exec(self, feedback_fn):
13915     return self.gq.OldStyleQuery(self)
13916
13917
13918 class LUGroupSetParams(LogicalUnit):
13919   """Modifies the parameters of a node group.
13920
13921   """
13922   HPATH = "group-modify"
13923   HTYPE = constants.HTYPE_GROUP
13924   REQ_BGL = False
13925
13926   def CheckArguments(self):
13927     all_changes = [
13928       self.op.ndparams,
13929       self.op.diskparams,
13930       self.op.alloc_policy,
13931       self.op.hv_state,
13932       self.op.disk_state,
13933       self.op.ipolicy,
13934       ]
13935
13936     if all_changes.count(None) == len(all_changes):
13937       raise errors.OpPrereqError("Please pass at least one modification",
13938                                  errors.ECODE_INVAL)
13939
13940   def ExpandNames(self):
13941     # This raises errors.OpPrereqError on its own:
13942     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13943
13944     self.needed_locks = {
13945       locking.LEVEL_INSTANCE: [],
13946       locking.LEVEL_NODEGROUP: [self.group_uuid],
13947       }
13948
13949     self.share_locks[locking.LEVEL_INSTANCE] = 1
13950
13951   def DeclareLocks(self, level):
13952     if level == locking.LEVEL_INSTANCE:
13953       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13954
13955       # Lock instances optimistically, needs verification once group lock has
13956       # been acquired
13957       self.needed_locks[locking.LEVEL_INSTANCE] = \
13958           self.cfg.GetNodeGroupInstances(self.group_uuid)
13959
13960   @staticmethod
13961   def _UpdateAndVerifyDiskParams(old, new):
13962     """Updates and verifies disk parameters.
13963
13964     """
13965     new_params = _GetUpdatedParams(old, new)
13966     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13967     return new_params
13968
13969   def CheckPrereq(self):
13970     """Check prerequisites.
13971
13972     """
13973     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13974
13975     # Check if locked instances are still correct
13976     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13977
13978     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13979     cluster = self.cfg.GetClusterInfo()
13980
13981     if self.group is None:
13982       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13983                                (self.op.group_name, self.group_uuid))
13984
13985     if self.op.ndparams:
13986       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13987       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
13988       self.new_ndparams = new_ndparams
13989
13990     if self.op.diskparams:
13991       diskparams = self.group.diskparams
13992       uavdp = self._UpdateAndVerifyDiskParams
13993       # For each disktemplate subdict update and verify the values
13994       new_diskparams = dict((dt,
13995                              uavdp(diskparams.get(dt, {}),
13996                                    self.op.diskparams[dt]))
13997                             for dt in constants.DISK_TEMPLATES
13998                             if dt in self.op.diskparams)
13999       # As we've all subdicts of diskparams ready, lets merge the actual
14000       # dict with all updated subdicts
14001       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14002       try:
14003         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14004       except errors.OpPrereqError, err:
14005         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14006                                    errors.ECODE_INVAL)
14007
14008     if self.op.hv_state:
14009       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14010                                                  self.group.hv_state_static)
14011
14012     if self.op.disk_state:
14013       self.new_disk_state = \
14014         _MergeAndVerifyDiskState(self.op.disk_state,
14015                                  self.group.disk_state_static)
14016
14017     if self.op.ipolicy:
14018       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14019                                             self.op.ipolicy,
14020                                             group_policy=True)
14021
14022       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14023       inst_filter = lambda inst: inst.name in owned_instances
14024       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14025       violations = \
14026           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14027                                                                self.group),
14028                                         new_ipolicy, instances)
14029
14030       if violations:
14031         self.LogWarning("After the ipolicy change the following instances"
14032                         " violate them: %s",
14033                         utils.CommaJoin(violations))
14034
14035   def BuildHooksEnv(self):
14036     """Build hooks env.
14037
14038     """
14039     return {
14040       "GROUP_NAME": self.op.group_name,
14041       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14042       }
14043
14044   def BuildHooksNodes(self):
14045     """Build hooks nodes.
14046
14047     """
14048     mn = self.cfg.GetMasterNode()
14049     return ([mn], [mn])
14050
14051   def Exec(self, feedback_fn):
14052     """Modifies the node group.
14053
14054     """
14055     result = []
14056
14057     if self.op.ndparams:
14058       self.group.ndparams = self.new_ndparams
14059       result.append(("ndparams", str(self.group.ndparams)))
14060
14061     if self.op.diskparams:
14062       self.group.diskparams = self.new_diskparams
14063       result.append(("diskparams", str(self.group.diskparams)))
14064
14065     if self.op.alloc_policy:
14066       self.group.alloc_policy = self.op.alloc_policy
14067
14068     if self.op.hv_state:
14069       self.group.hv_state_static = self.new_hv_state
14070
14071     if self.op.disk_state:
14072       self.group.disk_state_static = self.new_disk_state
14073
14074     if self.op.ipolicy:
14075       self.group.ipolicy = self.new_ipolicy
14076
14077     self.cfg.Update(self.group, feedback_fn)
14078     return result
14079
14080
14081 class LUGroupRemove(LogicalUnit):
14082   HPATH = "group-remove"
14083   HTYPE = constants.HTYPE_GROUP
14084   REQ_BGL = False
14085
14086   def ExpandNames(self):
14087     # This will raises errors.OpPrereqError on its own:
14088     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14089     self.needed_locks = {
14090       locking.LEVEL_NODEGROUP: [self.group_uuid],
14091       }
14092
14093   def CheckPrereq(self):
14094     """Check prerequisites.
14095
14096     This checks that the given group name exists as a node group, that is
14097     empty (i.e., contains no nodes), and that is not the last group of the
14098     cluster.
14099
14100     """
14101     # Verify that the group is empty.
14102     group_nodes = [node.name
14103                    for node in self.cfg.GetAllNodesInfo().values()
14104                    if node.group == self.group_uuid]
14105
14106     if group_nodes:
14107       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14108                                  " nodes: %s" %
14109                                  (self.op.group_name,
14110                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14111                                  errors.ECODE_STATE)
14112
14113     # Verify the cluster would not be left group-less.
14114     if len(self.cfg.GetNodeGroupList()) == 1:
14115       raise errors.OpPrereqError("Group '%s' is the only group,"
14116                                  " cannot be removed" %
14117                                  self.op.group_name,
14118                                  errors.ECODE_STATE)
14119
14120   def BuildHooksEnv(self):
14121     """Build hooks env.
14122
14123     """
14124     return {
14125       "GROUP_NAME": self.op.group_name,
14126       }
14127
14128   def BuildHooksNodes(self):
14129     """Build hooks nodes.
14130
14131     """
14132     mn = self.cfg.GetMasterNode()
14133     return ([mn], [mn])
14134
14135   def Exec(self, feedback_fn):
14136     """Remove the node group.
14137
14138     """
14139     try:
14140       self.cfg.RemoveNodeGroup(self.group_uuid)
14141     except errors.ConfigurationError:
14142       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14143                                (self.op.group_name, self.group_uuid))
14144
14145     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14146
14147
14148 class LUGroupRename(LogicalUnit):
14149   HPATH = "group-rename"
14150   HTYPE = constants.HTYPE_GROUP
14151   REQ_BGL = False
14152
14153   def ExpandNames(self):
14154     # This raises errors.OpPrereqError on its own:
14155     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14156
14157     self.needed_locks = {
14158       locking.LEVEL_NODEGROUP: [self.group_uuid],
14159       }
14160
14161   def CheckPrereq(self):
14162     """Check prerequisites.
14163
14164     Ensures requested new name is not yet used.
14165
14166     """
14167     try:
14168       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14169     except errors.OpPrereqError:
14170       pass
14171     else:
14172       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14173                                  " node group (UUID: %s)" %
14174                                  (self.op.new_name, new_name_uuid),
14175                                  errors.ECODE_EXISTS)
14176
14177   def BuildHooksEnv(self):
14178     """Build hooks env.
14179
14180     """
14181     return {
14182       "OLD_NAME": self.op.group_name,
14183       "NEW_NAME": self.op.new_name,
14184       }
14185
14186   def BuildHooksNodes(self):
14187     """Build hooks nodes.
14188
14189     """
14190     mn = self.cfg.GetMasterNode()
14191
14192     all_nodes = self.cfg.GetAllNodesInfo()
14193     all_nodes.pop(mn, None)
14194
14195     run_nodes = [mn]
14196     run_nodes.extend(node.name for node in all_nodes.values()
14197                      if node.group == self.group_uuid)
14198
14199     return (run_nodes, run_nodes)
14200
14201   def Exec(self, feedback_fn):
14202     """Rename the node group.
14203
14204     """
14205     group = self.cfg.GetNodeGroup(self.group_uuid)
14206
14207     if group is None:
14208       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14209                                (self.op.group_name, self.group_uuid))
14210
14211     group.name = self.op.new_name
14212     self.cfg.Update(group, feedback_fn)
14213
14214     return self.op.new_name
14215
14216
14217 class LUGroupEvacuate(LogicalUnit):
14218   HPATH = "group-evacuate"
14219   HTYPE = constants.HTYPE_GROUP
14220   REQ_BGL = False
14221
14222   def ExpandNames(self):
14223     # This raises errors.OpPrereqError on its own:
14224     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14225
14226     if self.op.target_groups:
14227       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14228                                   self.op.target_groups)
14229     else:
14230       self.req_target_uuids = []
14231
14232     if self.group_uuid in self.req_target_uuids:
14233       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14234                                  " as a target group (targets are %s)" %
14235                                  (self.group_uuid,
14236                                   utils.CommaJoin(self.req_target_uuids)),
14237                                  errors.ECODE_INVAL)
14238
14239     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14240
14241     self.share_locks = _ShareAll()
14242     self.needed_locks = {
14243       locking.LEVEL_INSTANCE: [],
14244       locking.LEVEL_NODEGROUP: [],
14245       locking.LEVEL_NODE: [],
14246       }
14247
14248   def DeclareLocks(self, level):
14249     if level == locking.LEVEL_INSTANCE:
14250       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14251
14252       # Lock instances optimistically, needs verification once node and group
14253       # locks have been acquired
14254       self.needed_locks[locking.LEVEL_INSTANCE] = \
14255         self.cfg.GetNodeGroupInstances(self.group_uuid)
14256
14257     elif level == locking.LEVEL_NODEGROUP:
14258       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14259
14260       if self.req_target_uuids:
14261         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14262
14263         # Lock all groups used by instances optimistically; this requires going
14264         # via the node before it's locked, requiring verification later on
14265         lock_groups.update(group_uuid
14266                            for instance_name in
14267                              self.owned_locks(locking.LEVEL_INSTANCE)
14268                            for group_uuid in
14269                              self.cfg.GetInstanceNodeGroups(instance_name))
14270       else:
14271         # No target groups, need to lock all of them
14272         lock_groups = locking.ALL_SET
14273
14274       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14275
14276     elif level == locking.LEVEL_NODE:
14277       # This will only lock the nodes in the group to be evacuated which
14278       # contain actual instances
14279       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14280       self._LockInstancesNodes()
14281
14282       # Lock all nodes in group to be evacuated and target groups
14283       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14284       assert self.group_uuid in owned_groups
14285       member_nodes = [node_name
14286                       for group in owned_groups
14287                       for node_name in self.cfg.GetNodeGroup(group).members]
14288       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14289
14290   def CheckPrereq(self):
14291     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14292     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14293     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14294
14295     assert owned_groups.issuperset(self.req_target_uuids)
14296     assert self.group_uuid in owned_groups
14297
14298     # Check if locked instances are still correct
14299     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14300
14301     # Get instance information
14302     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14303
14304     # Check if node groups for locked instances are still correct
14305     _CheckInstancesNodeGroups(self.cfg, self.instances,
14306                               owned_groups, owned_nodes, self.group_uuid)
14307
14308     if self.req_target_uuids:
14309       # User requested specific target groups
14310       self.target_uuids = self.req_target_uuids
14311     else:
14312       # All groups except the one to be evacuated are potential targets
14313       self.target_uuids = [group_uuid for group_uuid in owned_groups
14314                            if group_uuid != self.group_uuid]
14315
14316       if not self.target_uuids:
14317         raise errors.OpPrereqError("There are no possible target groups",
14318                                    errors.ECODE_INVAL)
14319
14320   def BuildHooksEnv(self):
14321     """Build hooks env.
14322
14323     """
14324     return {
14325       "GROUP_NAME": self.op.group_name,
14326       "TARGET_GROUPS": " ".join(self.target_uuids),
14327       }
14328
14329   def BuildHooksNodes(self):
14330     """Build hooks nodes.
14331
14332     """
14333     mn = self.cfg.GetMasterNode()
14334
14335     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14336
14337     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14338
14339     return (run_nodes, run_nodes)
14340
14341   def Exec(self, feedback_fn):
14342     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14343
14344     assert self.group_uuid not in self.target_uuids
14345
14346     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14347                      instances=instances, target_groups=self.target_uuids)
14348
14349     ial.Run(self.op.iallocator)
14350
14351     if not ial.success:
14352       raise errors.OpPrereqError("Can't compute group evacuation using"
14353                                  " iallocator '%s': %s" %
14354                                  (self.op.iallocator, ial.info),
14355                                  errors.ECODE_NORES)
14356
14357     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14358
14359     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14360                  len(jobs), self.op.group_name)
14361
14362     return ResultWithJobs(jobs)
14363
14364
14365 class TagsLU(NoHooksLU): # pylint: disable=W0223
14366   """Generic tags LU.
14367
14368   This is an abstract class which is the parent of all the other tags LUs.
14369
14370   """
14371   def ExpandNames(self):
14372     self.group_uuid = None
14373     self.needed_locks = {}
14374
14375     if self.op.kind == constants.TAG_NODE:
14376       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14377       lock_level = locking.LEVEL_NODE
14378       lock_name = self.op.name
14379     elif self.op.kind == constants.TAG_INSTANCE:
14380       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14381       lock_level = locking.LEVEL_INSTANCE
14382       lock_name = self.op.name
14383     elif self.op.kind == constants.TAG_NODEGROUP:
14384       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14385       lock_level = locking.LEVEL_NODEGROUP
14386       lock_name = self.group_uuid
14387     else:
14388       lock_level = None
14389       lock_name = None
14390
14391     if lock_level and getattr(self.op, "use_locking", True):
14392       self.needed_locks[lock_level] = lock_name
14393
14394     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14395     # not possible to acquire the BGL based on opcode parameters)
14396
14397   def CheckPrereq(self):
14398     """Check prerequisites.
14399
14400     """
14401     if self.op.kind == constants.TAG_CLUSTER:
14402       self.target = self.cfg.GetClusterInfo()
14403     elif self.op.kind == constants.TAG_NODE:
14404       self.target = self.cfg.GetNodeInfo(self.op.name)
14405     elif self.op.kind == constants.TAG_INSTANCE:
14406       self.target = self.cfg.GetInstanceInfo(self.op.name)
14407     elif self.op.kind == constants.TAG_NODEGROUP:
14408       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14409     else:
14410       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14411                                  str(self.op.kind), errors.ECODE_INVAL)
14412
14413
14414 class LUTagsGet(TagsLU):
14415   """Returns the tags of a given object.
14416
14417   """
14418   REQ_BGL = False
14419
14420   def ExpandNames(self):
14421     TagsLU.ExpandNames(self)
14422
14423     # Share locks as this is only a read operation
14424     self.share_locks = _ShareAll()
14425
14426   def Exec(self, feedback_fn):
14427     """Returns the tag list.
14428
14429     """
14430     return list(self.target.GetTags())
14431
14432
14433 class LUTagsSearch(NoHooksLU):
14434   """Searches the tags for a given pattern.
14435
14436   """
14437   REQ_BGL = False
14438
14439   def ExpandNames(self):
14440     self.needed_locks = {}
14441
14442   def CheckPrereq(self):
14443     """Check prerequisites.
14444
14445     This checks the pattern passed for validity by compiling it.
14446
14447     """
14448     try:
14449       self.re = re.compile(self.op.pattern)
14450     except re.error, err:
14451       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14452                                  (self.op.pattern, err), errors.ECODE_INVAL)
14453
14454   def Exec(self, feedback_fn):
14455     """Returns the tag list.
14456
14457     """
14458     cfg = self.cfg
14459     tgts = [("/cluster", cfg.GetClusterInfo())]
14460     ilist = cfg.GetAllInstancesInfo().values()
14461     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14462     nlist = cfg.GetAllNodesInfo().values()
14463     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14464     tgts.extend(("/nodegroup/%s" % n.name, n)
14465                 for n in cfg.GetAllNodeGroupsInfo().values())
14466     results = []
14467     for path, target in tgts:
14468       for tag in target.GetTags():
14469         if self.re.search(tag):
14470           results.append((path, tag))
14471     return results
14472
14473
14474 class LUTagsSet(TagsLU):
14475   """Sets a tag on a given object.
14476
14477   """
14478   REQ_BGL = False
14479
14480   def CheckPrereq(self):
14481     """Check prerequisites.
14482
14483     This checks the type and length of the tag name and value.
14484
14485     """
14486     TagsLU.CheckPrereq(self)
14487     for tag in self.op.tags:
14488       objects.TaggableObject.ValidateTag(tag)
14489
14490   def Exec(self, feedback_fn):
14491     """Sets the tag.
14492
14493     """
14494     try:
14495       for tag in self.op.tags:
14496         self.target.AddTag(tag)
14497     except errors.TagError, err:
14498       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14499     self.cfg.Update(self.target, feedback_fn)
14500
14501
14502 class LUTagsDel(TagsLU):
14503   """Delete a list of tags from a given object.
14504
14505   """
14506   REQ_BGL = False
14507
14508   def CheckPrereq(self):
14509     """Check prerequisites.
14510
14511     This checks that we have the given tag.
14512
14513     """
14514     TagsLU.CheckPrereq(self)
14515     for tag in self.op.tags:
14516       objects.TaggableObject.ValidateTag(tag)
14517     del_tags = frozenset(self.op.tags)
14518     cur_tags = self.target.GetTags()
14519
14520     diff_tags = del_tags - cur_tags
14521     if diff_tags:
14522       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14523       raise errors.OpPrereqError("Tag(s) %s not found" %
14524                                  (utils.CommaJoin(diff_names), ),
14525                                  errors.ECODE_NOENT)
14526
14527   def Exec(self, feedback_fn):
14528     """Remove the tag from the object.
14529
14530     """
14531     for tag in self.op.tags:
14532       self.target.RemoveTag(tag)
14533     self.cfg.Update(self.target, feedback_fn)
14534
14535
14536 class LUTestDelay(NoHooksLU):
14537   """Sleep for a specified amount of time.
14538
14539   This LU sleeps on the master and/or nodes for a specified amount of
14540   time.
14541
14542   """
14543   REQ_BGL = False
14544
14545   def ExpandNames(self):
14546     """Expand names and set required locks.
14547
14548     This expands the node list, if any.
14549
14550     """
14551     self.needed_locks = {}
14552     if self.op.on_nodes:
14553       # _GetWantedNodes can be used here, but is not always appropriate to use
14554       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14555       # more information.
14556       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14557       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14558
14559   def _TestDelay(self):
14560     """Do the actual sleep.
14561
14562     """
14563     if self.op.on_master:
14564       if not utils.TestDelay(self.op.duration):
14565         raise errors.OpExecError("Error during master delay test")
14566     if self.op.on_nodes:
14567       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14568       for node, node_result in result.items():
14569         node_result.Raise("Failure during rpc call to node %s" % node)
14570
14571   def Exec(self, feedback_fn):
14572     """Execute the test delay opcode, with the wanted repetitions.
14573
14574     """
14575     if self.op.repeat == 0:
14576       self._TestDelay()
14577     else:
14578       top_value = self.op.repeat - 1
14579       for i in range(self.op.repeat):
14580         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14581         self._TestDelay()
14582
14583
14584 class LUTestJqueue(NoHooksLU):
14585   """Utility LU to test some aspects of the job queue.
14586
14587   """
14588   REQ_BGL = False
14589
14590   # Must be lower than default timeout for WaitForJobChange to see whether it
14591   # notices changed jobs
14592   _CLIENT_CONNECT_TIMEOUT = 20.0
14593   _CLIENT_CONFIRM_TIMEOUT = 60.0
14594
14595   @classmethod
14596   def _NotifyUsingSocket(cls, cb, errcls):
14597     """Opens a Unix socket and waits for another program to connect.
14598
14599     @type cb: callable
14600     @param cb: Callback to send socket name to client
14601     @type errcls: class
14602     @param errcls: Exception class to use for errors
14603
14604     """
14605     # Using a temporary directory as there's no easy way to create temporary
14606     # sockets without writing a custom loop around tempfile.mktemp and
14607     # socket.bind
14608     tmpdir = tempfile.mkdtemp()
14609     try:
14610       tmpsock = utils.PathJoin(tmpdir, "sock")
14611
14612       logging.debug("Creating temporary socket at %s", tmpsock)
14613       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14614       try:
14615         sock.bind(tmpsock)
14616         sock.listen(1)
14617
14618         # Send details to client
14619         cb(tmpsock)
14620
14621         # Wait for client to connect before continuing
14622         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14623         try:
14624           (conn, _) = sock.accept()
14625         except socket.error, err:
14626           raise errcls("Client didn't connect in time (%s)" % err)
14627       finally:
14628         sock.close()
14629     finally:
14630       # Remove as soon as client is connected
14631       shutil.rmtree(tmpdir)
14632
14633     # Wait for client to close
14634     try:
14635       try:
14636         # pylint: disable=E1101
14637         # Instance of '_socketobject' has no ... member
14638         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14639         conn.recv(1)
14640       except socket.error, err:
14641         raise errcls("Client failed to confirm notification (%s)" % err)
14642     finally:
14643       conn.close()
14644
14645   def _SendNotification(self, test, arg, sockname):
14646     """Sends a notification to the client.
14647
14648     @type test: string
14649     @param test: Test name
14650     @param arg: Test argument (depends on test)
14651     @type sockname: string
14652     @param sockname: Socket path
14653
14654     """
14655     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14656
14657   def _Notify(self, prereq, test, arg):
14658     """Notifies the client of a test.
14659
14660     @type prereq: bool
14661     @param prereq: Whether this is a prereq-phase test
14662     @type test: string
14663     @param test: Test name
14664     @param arg: Test argument (depends on test)
14665
14666     """
14667     if prereq:
14668       errcls = errors.OpPrereqError
14669     else:
14670       errcls = errors.OpExecError
14671
14672     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14673                                                   test, arg),
14674                                    errcls)
14675
14676   def CheckArguments(self):
14677     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14678     self.expandnames_calls = 0
14679
14680   def ExpandNames(self):
14681     checkargs_calls = getattr(self, "checkargs_calls", 0)
14682     if checkargs_calls < 1:
14683       raise errors.ProgrammerError("CheckArguments was not called")
14684
14685     self.expandnames_calls += 1
14686
14687     if self.op.notify_waitlock:
14688       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14689
14690     self.LogInfo("Expanding names")
14691
14692     # Get lock on master node (just to get a lock, not for a particular reason)
14693     self.needed_locks = {
14694       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14695       }
14696
14697   def Exec(self, feedback_fn):
14698     if self.expandnames_calls < 1:
14699       raise errors.ProgrammerError("ExpandNames was not called")
14700
14701     if self.op.notify_exec:
14702       self._Notify(False, constants.JQT_EXEC, None)
14703
14704     self.LogInfo("Executing")
14705
14706     if self.op.log_messages:
14707       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14708       for idx, msg in enumerate(self.op.log_messages):
14709         self.LogInfo("Sending log message %s", idx + 1)
14710         feedback_fn(constants.JQT_MSGPREFIX + msg)
14711         # Report how many test messages have been sent
14712         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14713
14714     if self.op.fail:
14715       raise errors.OpExecError("Opcode failure was requested")
14716
14717     return True
14718
14719
14720 class IAllocator(object):
14721   """IAllocator framework.
14722
14723   An IAllocator instance has three sets of attributes:
14724     - cfg that is needed to query the cluster
14725     - input data (all members of the _KEYS class attribute are required)
14726     - four buffer attributes (in|out_data|text), that represent the
14727       input (to the external script) in text and data structure format,
14728       and the output from it, again in two formats
14729     - the result variables from the script (success, info, nodes) for
14730       easy usage
14731
14732   """
14733   # pylint: disable=R0902
14734   # lots of instance attributes
14735
14736   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14737     self.cfg = cfg
14738     self.rpc = rpc_runner
14739     # init buffer variables
14740     self.in_text = self.out_text = self.in_data = self.out_data = None
14741     # init all input fields so that pylint is happy
14742     self.mode = mode
14743     self.memory = self.disks = self.disk_template = self.spindle_use = None
14744     self.os = self.tags = self.nics = self.vcpus = None
14745     self.hypervisor = None
14746     self.relocate_from = None
14747     self.name = None
14748     self.instances = None
14749     self.evac_mode = None
14750     self.target_groups = []
14751     # computed fields
14752     self.required_nodes = None
14753     # init result fields
14754     self.success = self.info = self.result = None
14755
14756     try:
14757       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14758     except KeyError:
14759       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14760                                    " IAllocator" % self.mode)
14761
14762     keyset = [n for (n, _) in keydata]
14763
14764     for key in kwargs:
14765       if key not in keyset:
14766         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14767                                      " IAllocator" % key)
14768       setattr(self, key, kwargs[key])
14769
14770     for key in keyset:
14771       if key not in kwargs:
14772         raise errors.ProgrammerError("Missing input parameter '%s' to"
14773                                      " IAllocator" % key)
14774     self._BuildInputData(compat.partial(fn, self), keydata)
14775
14776   def _ComputeClusterData(self):
14777     """Compute the generic allocator input data.
14778
14779     This is the data that is independent of the actual operation.
14780
14781     """
14782     cfg = self.cfg
14783     cluster_info = cfg.GetClusterInfo()
14784     # cluster data
14785     data = {
14786       "version": constants.IALLOCATOR_VERSION,
14787       "cluster_name": cfg.GetClusterName(),
14788       "cluster_tags": list(cluster_info.GetTags()),
14789       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14790       "ipolicy": cluster_info.ipolicy,
14791       }
14792     ninfo = cfg.GetAllNodesInfo()
14793     iinfo = cfg.GetAllInstancesInfo().values()
14794     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14795
14796     # node data
14797     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14798
14799     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14800       hypervisor_name = self.hypervisor
14801     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14802       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14803     else:
14804       hypervisor_name = cluster_info.primary_hypervisor
14805
14806     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14807                                         [hypervisor_name])
14808     node_iinfo = \
14809       self.rpc.call_all_instances_info(node_list,
14810                                        cluster_info.enabled_hypervisors)
14811
14812     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14813
14814     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14815     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14816                                                  i_list, config_ndata)
14817     assert len(data["nodes"]) == len(ninfo), \
14818         "Incomplete node data computed"
14819
14820     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14821
14822     self.in_data = data
14823
14824   @staticmethod
14825   def _ComputeNodeGroupData(cfg):
14826     """Compute node groups data.
14827
14828     """
14829     cluster = cfg.GetClusterInfo()
14830     ng = dict((guuid, {
14831       "name": gdata.name,
14832       "alloc_policy": gdata.alloc_policy,
14833       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14834       })
14835       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14836
14837     return ng
14838
14839   @staticmethod
14840   def _ComputeBasicNodeData(cfg, node_cfg):
14841     """Compute global node data.
14842
14843     @rtype: dict
14844     @returns: a dict of name: (node dict, node config)
14845
14846     """
14847     # fill in static (config-based) values
14848     node_results = dict((ninfo.name, {
14849       "tags": list(ninfo.GetTags()),
14850       "primary_ip": ninfo.primary_ip,
14851       "secondary_ip": ninfo.secondary_ip,
14852       "offline": ninfo.offline,
14853       "drained": ninfo.drained,
14854       "master_candidate": ninfo.master_candidate,
14855       "group": ninfo.group,
14856       "master_capable": ninfo.master_capable,
14857       "vm_capable": ninfo.vm_capable,
14858       "ndparams": cfg.GetNdParams(ninfo),
14859       })
14860       for ninfo in node_cfg.values())
14861
14862     return node_results
14863
14864   @staticmethod
14865   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14866                               node_results):
14867     """Compute global node data.
14868
14869     @param node_results: the basic node structures as filled from the config
14870
14871     """
14872     #TODO(dynmem): compute the right data on MAX and MIN memory
14873     # make a copy of the current dict
14874     node_results = dict(node_results)
14875     for nname, nresult in node_data.items():
14876       assert nname in node_results, "Missing basic data for node %s" % nname
14877       ninfo = node_cfg[nname]
14878
14879       if not (ninfo.offline or ninfo.drained):
14880         nresult.Raise("Can't get data for node %s" % nname)
14881         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14882                                 nname)
14883         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14884
14885         for attr in ["memory_total", "memory_free", "memory_dom0",
14886                      "vg_size", "vg_free", "cpu_total"]:
14887           if attr not in remote_info:
14888             raise errors.OpExecError("Node '%s' didn't return attribute"
14889                                      " '%s'" % (nname, attr))
14890           if not isinstance(remote_info[attr], int):
14891             raise errors.OpExecError("Node '%s' returned invalid value"
14892                                      " for '%s': %s" %
14893                                      (nname, attr, remote_info[attr]))
14894         # compute memory used by primary instances
14895         i_p_mem = i_p_up_mem = 0
14896         for iinfo, beinfo in i_list:
14897           if iinfo.primary_node == nname:
14898             i_p_mem += beinfo[constants.BE_MAXMEM]
14899             if iinfo.name not in node_iinfo[nname].payload:
14900               i_used_mem = 0
14901             else:
14902               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14903             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14904             remote_info["memory_free"] -= max(0, i_mem_diff)
14905
14906             if iinfo.admin_state == constants.ADMINST_UP:
14907               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14908
14909         # compute memory used by instances
14910         pnr_dyn = {
14911           "total_memory": remote_info["memory_total"],
14912           "reserved_memory": remote_info["memory_dom0"],
14913           "free_memory": remote_info["memory_free"],
14914           "total_disk": remote_info["vg_size"],
14915           "free_disk": remote_info["vg_free"],
14916           "total_cpus": remote_info["cpu_total"],
14917           "i_pri_memory": i_p_mem,
14918           "i_pri_up_memory": i_p_up_mem,
14919           }
14920         pnr_dyn.update(node_results[nname])
14921         node_results[nname] = pnr_dyn
14922
14923     return node_results
14924
14925   @staticmethod
14926   def _ComputeInstanceData(cluster_info, i_list):
14927     """Compute global instance data.
14928
14929     """
14930     instance_data = {}
14931     for iinfo, beinfo in i_list:
14932       nic_data = []
14933       for nic in iinfo.nics:
14934         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14935         nic_dict = {
14936           "mac": nic.mac,
14937           "ip": nic.ip,
14938           "mode": filled_params[constants.NIC_MODE],
14939           "link": filled_params[constants.NIC_LINK],
14940           }
14941         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14942           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14943         nic_data.append(nic_dict)
14944       pir = {
14945         "tags": list(iinfo.GetTags()),
14946         "admin_state": iinfo.admin_state,
14947         "vcpus": beinfo[constants.BE_VCPUS],
14948         "memory": beinfo[constants.BE_MAXMEM],
14949         "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14950         "os": iinfo.os,
14951         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14952         "nics": nic_data,
14953         "disks": [{constants.IDISK_SIZE: dsk.size,
14954                    constants.IDISK_MODE: dsk.mode}
14955                   for dsk in iinfo.disks],
14956         "disk_template": iinfo.disk_template,
14957         "hypervisor": iinfo.hypervisor,
14958         }
14959       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14960                                                  pir["disks"])
14961       instance_data[iinfo.name] = pir
14962
14963     return instance_data
14964
14965   def _AddNewInstance(self):
14966     """Add new instance data to allocator structure.
14967
14968     This in combination with _AllocatorGetClusterData will create the
14969     correct structure needed as input for the allocator.
14970
14971     The checks for the completeness of the opcode must have already been
14972     done.
14973
14974     """
14975     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14976
14977     if self.disk_template in constants.DTS_INT_MIRROR:
14978       self.required_nodes = 2
14979     else:
14980       self.required_nodes = 1
14981
14982     request = {
14983       "name": self.name,
14984       "disk_template": self.disk_template,
14985       "tags": self.tags,
14986       "os": self.os,
14987       "vcpus": self.vcpus,
14988       "memory": self.memory,
14989       "spindle_use": self.spindle_use,
14990       "disks": self.disks,
14991       "disk_space_total": disk_space,
14992       "nics": self.nics,
14993       "required_nodes": self.required_nodes,
14994       "hypervisor": self.hypervisor,
14995       }
14996
14997     return request
14998
14999   def _AddRelocateInstance(self):
15000     """Add relocate instance data to allocator structure.
15001
15002     This in combination with _IAllocatorGetClusterData will create the
15003     correct structure needed as input for the allocator.
15004
15005     The checks for the completeness of the opcode must have already been
15006     done.
15007
15008     """
15009     instance = self.cfg.GetInstanceInfo(self.name)
15010     if instance is None:
15011       raise errors.ProgrammerError("Unknown instance '%s' passed to"
15012                                    " IAllocator" % self.name)
15013
15014     if instance.disk_template not in constants.DTS_MIRRORED:
15015       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15016                                  errors.ECODE_INVAL)
15017
15018     if instance.disk_template in constants.DTS_INT_MIRROR and \
15019         len(instance.secondary_nodes) != 1:
15020       raise errors.OpPrereqError("Instance has not exactly one secondary node",
15021                                  errors.ECODE_STATE)
15022
15023     self.required_nodes = 1
15024     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15025     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15026
15027     request = {
15028       "name": self.name,
15029       "disk_space_total": disk_space,
15030       "required_nodes": self.required_nodes,
15031       "relocate_from": self.relocate_from,
15032       }
15033     return request
15034
15035   def _AddNodeEvacuate(self):
15036     """Get data for node-evacuate requests.
15037
15038     """
15039     return {
15040       "instances": self.instances,
15041       "evac_mode": self.evac_mode,
15042       }
15043
15044   def _AddChangeGroup(self):
15045     """Get data for node-evacuate requests.
15046
15047     """
15048     return {
15049       "instances": self.instances,
15050       "target_groups": self.target_groups,
15051       }
15052
15053   def _BuildInputData(self, fn, keydata):
15054     """Build input data structures.
15055
15056     """
15057     self._ComputeClusterData()
15058
15059     request = fn()
15060     request["type"] = self.mode
15061     for keyname, keytype in keydata:
15062       if keyname not in request:
15063         raise errors.ProgrammerError("Request parameter %s is missing" %
15064                                      keyname)
15065       val = request[keyname]
15066       if not keytype(val):
15067         raise errors.ProgrammerError("Request parameter %s doesn't pass"
15068                                      " validation, value %s, expected"
15069                                      " type %s" % (keyname, val, keytype))
15070     self.in_data["request"] = request
15071
15072     self.in_text = serializer.Dump(self.in_data)
15073
15074   _STRING_LIST = ht.TListOf(ht.TString)
15075   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15076      # pylint: disable=E1101
15077      # Class '...' has no 'OP_ID' member
15078      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15079                           opcodes.OpInstanceMigrate.OP_ID,
15080                           opcodes.OpInstanceReplaceDisks.OP_ID])
15081      })))
15082
15083   _NEVAC_MOVED = \
15084     ht.TListOf(ht.TAnd(ht.TIsLength(3),
15085                        ht.TItems([ht.TNonEmptyString,
15086                                   ht.TNonEmptyString,
15087                                   ht.TListOf(ht.TNonEmptyString),
15088                                  ])))
15089   _NEVAC_FAILED = \
15090     ht.TListOf(ht.TAnd(ht.TIsLength(2),
15091                        ht.TItems([ht.TNonEmptyString,
15092                                   ht.TMaybeString,
15093                                  ])))
15094   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15095                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15096
15097   _MODE_DATA = {
15098     constants.IALLOCATOR_MODE_ALLOC:
15099       (_AddNewInstance,
15100        [
15101         ("name", ht.TString),
15102         ("memory", ht.TInt),
15103         ("spindle_use", ht.TInt),
15104         ("disks", ht.TListOf(ht.TDict)),
15105         ("disk_template", ht.TString),
15106         ("os", ht.TString),
15107         ("tags", _STRING_LIST),
15108         ("nics", ht.TListOf(ht.TDict)),
15109         ("vcpus", ht.TInt),
15110         ("hypervisor", ht.TString),
15111         ], ht.TList),
15112     constants.IALLOCATOR_MODE_RELOC:
15113       (_AddRelocateInstance,
15114        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15115        ht.TList),
15116      constants.IALLOCATOR_MODE_NODE_EVAC:
15117       (_AddNodeEvacuate, [
15118         ("instances", _STRING_LIST),
15119         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15120         ], _NEVAC_RESULT),
15121      constants.IALLOCATOR_MODE_CHG_GROUP:
15122       (_AddChangeGroup, [
15123         ("instances", _STRING_LIST),
15124         ("target_groups", _STRING_LIST),
15125         ], _NEVAC_RESULT),
15126     }
15127
15128   def Run(self, name, validate=True, call_fn=None):
15129     """Run an instance allocator and return the results.
15130
15131     """
15132     if call_fn is None:
15133       call_fn = self.rpc.call_iallocator_runner
15134
15135     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15136     result.Raise("Failure while running the iallocator script")
15137
15138     self.out_text = result.payload
15139     if validate:
15140       self._ValidateResult()
15141
15142   def _ValidateResult(self):
15143     """Process the allocator results.
15144
15145     This will process and if successful save the result in
15146     self.out_data and the other parameters.
15147
15148     """
15149     try:
15150       rdict = serializer.Load(self.out_text)
15151     except Exception, err:
15152       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15153
15154     if not isinstance(rdict, dict):
15155       raise errors.OpExecError("Can't parse iallocator results: not a dict")
15156
15157     # TODO: remove backwards compatiblity in later versions
15158     if "nodes" in rdict and "result" not in rdict:
15159       rdict["result"] = rdict["nodes"]
15160       del rdict["nodes"]
15161
15162     for key in "success", "info", "result":
15163       if key not in rdict:
15164         raise errors.OpExecError("Can't parse iallocator results:"
15165                                  " missing key '%s'" % key)
15166       setattr(self, key, rdict[key])
15167
15168     if not self._result_check(self.result):
15169       raise errors.OpExecError("Iallocator returned invalid result,"
15170                                " expected %s, got %s" %
15171                                (self._result_check, self.result),
15172                                errors.ECODE_INVAL)
15173
15174     if self.mode == constants.IALLOCATOR_MODE_RELOC:
15175       assert self.relocate_from is not None
15176       assert self.required_nodes == 1
15177
15178       node2group = dict((name, ndata["group"])
15179                         for (name, ndata) in self.in_data["nodes"].items())
15180
15181       fn = compat.partial(self._NodesToGroups, node2group,
15182                           self.in_data["nodegroups"])
15183
15184       instance = self.cfg.GetInstanceInfo(self.name)
15185       request_groups = fn(self.relocate_from + [instance.primary_node])
15186       result_groups = fn(rdict["result"] + [instance.primary_node])
15187
15188       if self.success and not set(result_groups).issubset(request_groups):
15189         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15190                                  " differ from original groups (%s)" %
15191                                  (utils.CommaJoin(result_groups),
15192                                   utils.CommaJoin(request_groups)))
15193
15194     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15195       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15196
15197     self.out_data = rdict
15198
15199   @staticmethod
15200   def _NodesToGroups(node2group, groups, nodes):
15201     """Returns a list of unique group names for a list of nodes.
15202
15203     @type node2group: dict
15204     @param node2group: Map from node name to group UUID
15205     @type groups: dict
15206     @param groups: Group information
15207     @type nodes: list
15208     @param nodes: Node names
15209
15210     """
15211     result = set()
15212
15213     for node in nodes:
15214       try:
15215         group_uuid = node2group[node]
15216       except KeyError:
15217         # Ignore unknown node
15218         pass
15219       else:
15220         try:
15221           group = groups[group_uuid]
15222         except KeyError:
15223           # Can't find group, let's use UUID
15224           group_name = group_uuid
15225         else:
15226           group_name = group["name"]
15227
15228         result.add(group_name)
15229
15230     return sorted(result)
15231
15232
15233 class LUTestAllocator(NoHooksLU):
15234   """Run allocator tests.
15235
15236   This LU runs the allocator tests
15237
15238   """
15239   def CheckPrereq(self):
15240     """Check prerequisites.
15241
15242     This checks the opcode parameters depending on the director and mode test.
15243
15244     """
15245     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15246       for attr in ["memory", "disks", "disk_template",
15247                    "os", "tags", "nics", "vcpus"]:
15248         if not hasattr(self.op, attr):
15249           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15250                                      attr, errors.ECODE_INVAL)
15251       iname = self.cfg.ExpandInstanceName(self.op.name)
15252       if iname is not None:
15253         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15254                                    iname, errors.ECODE_EXISTS)
15255       if not isinstance(self.op.nics, list):
15256         raise errors.OpPrereqError("Invalid parameter 'nics'",
15257                                    errors.ECODE_INVAL)
15258       if not isinstance(self.op.disks, list):
15259         raise errors.OpPrereqError("Invalid parameter 'disks'",
15260                                    errors.ECODE_INVAL)
15261       for row in self.op.disks:
15262         if (not isinstance(row, dict) or
15263             constants.IDISK_SIZE not in row or
15264             not isinstance(row[constants.IDISK_SIZE], int) or
15265             constants.IDISK_MODE not in row or
15266             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15267           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15268                                      " parameter", errors.ECODE_INVAL)
15269       if self.op.hypervisor is None:
15270         self.op.hypervisor = self.cfg.GetHypervisorType()
15271     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15272       fname = _ExpandInstanceName(self.cfg, self.op.name)
15273       self.op.name = fname
15274       self.relocate_from = \
15275           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15276     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15277                           constants.IALLOCATOR_MODE_NODE_EVAC):
15278       if not self.op.instances:
15279         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15280       self.op.instances = _GetWantedInstances(self, self.op.instances)
15281     else:
15282       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15283                                  self.op.mode, errors.ECODE_INVAL)
15284
15285     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15286       if self.op.allocator is None:
15287         raise errors.OpPrereqError("Missing allocator name",
15288                                    errors.ECODE_INVAL)
15289     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15290       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15291                                  self.op.direction, errors.ECODE_INVAL)
15292
15293   def Exec(self, feedback_fn):
15294     """Run the allocator test.
15295
15296     """
15297     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15298       ial = IAllocator(self.cfg, self.rpc,
15299                        mode=self.op.mode,
15300                        name=self.op.name,
15301                        memory=self.op.memory,
15302                        disks=self.op.disks,
15303                        disk_template=self.op.disk_template,
15304                        os=self.op.os,
15305                        tags=self.op.tags,
15306                        nics=self.op.nics,
15307                        vcpus=self.op.vcpus,
15308                        hypervisor=self.op.hypervisor,
15309                        spindle_use=self.op.spindle_use,
15310                        )
15311     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15312       ial = IAllocator(self.cfg, self.rpc,
15313                        mode=self.op.mode,
15314                        name=self.op.name,
15315                        relocate_from=list(self.relocate_from),
15316                        )
15317     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15318       ial = IAllocator(self.cfg, self.rpc,
15319                        mode=self.op.mode,
15320                        instances=self.op.instances,
15321                        target_groups=self.op.target_groups)
15322     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15323       ial = IAllocator(self.cfg, self.rpc,
15324                        mode=self.op.mode,
15325                        instances=self.op.instances,
15326                        evac_mode=self.op.evac_mode)
15327     else:
15328       raise errors.ProgrammerError("Uncatched mode %s in"
15329                                    " LUTestAllocator.Exec", self.op.mode)
15330
15331     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15332       result = ial.in_text
15333     else:
15334       ial.Run(self.op.allocator, validate=False)
15335       result = ial.out_text
15336     return result
15337
15338
15339 #: Query type implementations
15340 _QUERY_IMPL = {
15341   constants.QR_CLUSTER: _ClusterQuery,
15342   constants.QR_INSTANCE: _InstanceQuery,
15343   constants.QR_NODE: _NodeQuery,
15344   constants.QR_GROUP: _GroupQuery,
15345   constants.QR_OS: _OsQuery,
15346   constants.QR_EXPORT: _ExportQuery,
15347   }
15348
15349 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15350
15351
15352 def _GetQueryImplementation(name):
15353   """Returns the implemtnation for a query type.
15354
15355   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15356
15357   """
15358   try:
15359     return _QUERY_IMPL[name]
15360   except KeyError:
15361     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15362                                errors.ECODE_INVAL)