code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43 import ipaddr
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63 from ganeti import runtime
  64 from ganeti import network
  65
  66 import ganeti.masterd.instance # pylint: disable=W0611
  67
  68
  69 #: Size of DRBD meta block device
  70 DRBD_META_SIZE = 128
  71
  72 # States of instance
  73 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  76
  77 #: Instance status in which an instance can be marked as offline/online
  78 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  79   constants.ADMINST_OFFLINE,
  80   ]))
  81
  82
  83 class ResultWithJobs:
  84   """Data container for LU results with jobs.
  85
  86   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  87   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  88   contained in the C{jobs} attribute and include the job IDs in the opcode
  89   result.
  90
  91   """
  92   def __init__(self, jobs, **kwargs):
  93     """Initializes this class.
  94
  95     Additional return values can be specified as keyword arguments.
  96
  97     @type jobs: list of lists of L{opcode.OpCode}
  98     @param jobs: A list of lists of opcode objects
  99
 100     """
 101     self.jobs = jobs
 102     self.other = kwargs
 103
 104
 105 class LogicalUnit(object):
 106   """Logical Unit base class.
 107
 108   Subclasses must follow these rules:
 109     - implement ExpandNames
 110     - implement CheckPrereq (except when tasklets are used)
 111     - implement Exec (except when tasklets are used)
 112     - implement BuildHooksEnv
 113     - implement BuildHooksNodes
 114     - redefine HPATH and HTYPE
 115     - optionally redefine their run requirements:
 116         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 117
 118   Note that all commands require root permissions.
 119
 120   @ivar dry_run_result: the value (if any) that will be returned to the caller
 121       in dry-run mode (signalled by opcode dry_run parameter)
 122
 123   """
 124   HPATH = None
 125   HTYPE = None
 126   REQ_BGL = True
 127
 128   def __init__(self, processor, op, context, rpc_runner):
 129     """Constructor for LogicalUnit.
 130
 131     This needs to be overridden in derived classes in order to check op
 132     validity.
 133
 134     """
 135     self.proc = processor
 136     self.op = op
 137     self.cfg = context.cfg
 138     self.glm = context.glm
 139     # readability alias
 140     self.owned_locks = context.glm.list_owned
 141     self.context = context
 142     self.rpc = rpc_runner
 143     # Dicts used to declare locking needs to mcpu
 144     self.needed_locks = None
 145     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 146     self.add_locks = {}
 147     self.remove_locks = {}
 148     # Used to force good behavior when calling helper functions
 149     self.recalculate_locks = {}
 150     # logging
 151     self.Log = processor.Log # pylint: disable=C0103
 152     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 153     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 154     self.LogStep = processor.LogStep # pylint: disable=C0103
 155     # support for dry-run
 156     self.dry_run_result = None
 157     # support for generic debug attribute
 158     if (not hasattr(self.op, "debug_level") or
 159         not isinstance(self.op.debug_level, int)):
 160       self.op.debug_level = 0
 161
 162     # Tasklets
 163     self.tasklets = None
 164
 165     # Validate opcode parameters and set defaults
 166     self.op.Validate(True)
 167
 168     self.CheckArguments()
 169
 170   def CheckArguments(self):
 171     """Check syntactic validity for the opcode arguments.
 172
 173     This method is for doing a simple syntactic check and ensure
 174     validity of opcode parameters, without any cluster-related
 175     checks. While the same can be accomplished in ExpandNames and/or
 176     CheckPrereq, doing these separate is better because:
 177
 178       - ExpandNames is left as as purely a lock-related function
 179       - CheckPrereq is run after we have acquired locks (and possible
 180         waited for them)
 181
 182     The function is allowed to change the self.op attribute so that
 183     later methods can no longer worry about missing parameters.
 184
 185     """
 186     pass
 187
 188   def ExpandNames(self):
 189     """Expand names for this LU.
 190
 191     This method is called before starting to execute the opcode, and it should
 192     update all the parameters of the opcode to their canonical form (e.g. a
 193     short node name must be fully expanded after this method has successfully
 194     completed). This way locking, hooks, logging, etc. can work correctly.
 195
 196     LUs which implement this method must also populate the self.needed_locks
 197     member, as a dict with lock levels as keys, and a list of needed lock names
 198     as values. Rules:
 199
 200       - use an empty dict if you don't need any lock
 201       - if you don't need any lock at a particular level omit that
 202         level (note that in this case C{DeclareLocks} won't be called
 203         at all for that level)
 204       - if you need locks at a level, but you can't calculate it in
 205         this function, initialise that level with an empty list and do
 206         further processing in L{LogicalUnit.DeclareLocks} (see that
 207         function's docstring)
 208       - don't put anything for the BGL level
 209       - if you want all locks at a level use L{locking.ALL_SET} as a value
 210
 211     If you need to share locks (rather than acquire them exclusively) at one
 212     level you can modify self.share_locks, setting a true value (usually 1) for
 213     that level. By default locks are not shared.
 214
 215     This function can also define a list of tasklets, which then will be
 216     executed in order instead of the usual LU-level CheckPrereq and Exec
 217     functions, if those are not defined by the LU.
 218
 219     Examples::
 220
 221       # Acquire all nodes and one instance
 222       self.needed_locks = {
 223         locking.LEVEL_NODE: locking.ALL_SET,
 224         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 225       }
 226       # Acquire just two nodes
 227       self.needed_locks = {
 228         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 229       }
 230       # Acquire no locks
 231       self.needed_locks = {} # No, you can't leave it to the default value None
 232
 233     """
 234     # The implementation of this method is mandatory only if the new LU is
 235     # concurrent, so that old LUs don't need to be changed all at the same
 236     # time.
 237     if self.REQ_BGL:
 238       self.needed_locks = {} # Exclusive LUs don't need locks.
 239     else:
 240       raise NotImplementedError
 241
 242   def DeclareLocks(self, level):
 243     """Declare LU locking needs for a level
 244
 245     While most LUs can just declare their locking needs at ExpandNames time,
 246     sometimes there's the need to calculate some locks after having acquired
 247     the ones before. This function is called just before acquiring locks at a
 248     particular level, but after acquiring the ones at lower levels, and permits
 249     such calculations. It can be used to modify self.needed_locks, and by
 250     default it does nothing.
 251
 252     This function is only called if you have something already set in
 253     self.needed_locks for the level.
 254
 255     @param level: Locking level which is going to be locked
 256     @type level: member of L{ganeti.locking.LEVELS}
 257
 258     """
 259
 260   def CheckPrereq(self):
 261     """Check prerequisites for this LU.
 262
 263     This method should check that the prerequisites for the execution
 264     of this LU are fulfilled. It can do internode communication, but
 265     it should be idempotent - no cluster or system changes are
 266     allowed.
 267
 268     The method should raise errors.OpPrereqError in case something is
 269     not fulfilled. Its return value is ignored.
 270
 271     This method should also update all the parameters of the opcode to
 272     their canonical form if it hasn't been done by ExpandNames before.
 273
 274     """
 275     if self.tasklets is not None:
 276       for (idx, tl) in enumerate(self.tasklets):
 277         logging.debug("Checking prerequisites for tasklet %s/%s",
 278                       idx + 1, len(self.tasklets))
 279         tl.CheckPrereq()
 280     else:
 281       pass
 282
 283   def Exec(self, feedback_fn):
 284     """Execute the LU.
 285
 286     This method should implement the actual work. It should raise
 287     errors.OpExecError for failures that are somewhat dealt with in
 288     code, or expected.
 289
 290     """
 291     if self.tasklets is not None:
 292       for (idx, tl) in enumerate(self.tasklets):
 293         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 294         tl.Exec(feedback_fn)
 295     else:
 296       raise NotImplementedError
 297
 298   def BuildHooksEnv(self):
 299     """Build hooks environment for this LU.
 300
 301     @rtype: dict
 302     @return: Dictionary containing the environment that will be used for
 303       running the hooks for this LU. The keys of the dict must not be prefixed
 304       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 305       will extend the environment with additional variables. If no environment
 306       should be defined, an empty dictionary should be returned (not C{None}).
 307     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 308       will not be called.
 309
 310     """
 311     raise NotImplementedError
 312
 313   def BuildHooksNodes(self):
 314     """Build list of nodes to run LU's hooks.
 315
 316     @rtype: tuple; (list, list)
 317     @return: Tuple containing a list of node names on which the hook
 318       should run before the execution and a list of node names on which the
 319       hook should run after the execution. No nodes should be returned as an
 320       empty list (and not None).
 321     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 322       will not be called.
 323
 324     """
 325     raise NotImplementedError
 326
 327   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 328     """Notify the LU about the results of its hooks.
 329
 330     This method is called every time a hooks phase is executed, and notifies
 331     the Logical Unit about the hooks' result. The LU can then use it to alter
 332     its result based on the hooks.  By default the method does nothing and the
 333     previous result is passed back unchanged but any LU can define it if it
 334     wants to use the local cluster hook-scripts somehow.
 335
 336     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 337         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 338     @param hook_results: the results of the multi-node hooks rpc call
 339     @param feedback_fn: function used send feedback back to the caller
 340     @param lu_result: the previous Exec result this LU had, or None
 341         in the PRE phase
 342     @return: the new Exec result, based on the previous result
 343         and hook results
 344
 345     """
 346     # API must be kept, thus we ignore the unused argument and could
 347     # be a function warnings
 348     # pylint: disable=W0613,R0201
 349     return lu_result
 350
 351   def _ExpandAndLockInstance(self):
 352     """Helper function to expand and lock an instance.
 353
 354     Many LUs that work on an instance take its name in self.op.instance_name
 355     and need to expand it and then declare the expanded name for locking. This
 356     function does it, and then updates self.op.instance_name to the expanded
 357     name. It also initializes needed_locks as a dict, if this hasn't been done
 358     before.
 359
 360     """
 361     if self.needed_locks is None:
 362       self.needed_locks = {}
 363     else:
 364       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 365         "_ExpandAndLockInstance called with instance-level locks set"
 366     self.op.instance_name = _ExpandInstanceName(self.cfg,
 367                                                 self.op.instance_name)
 368     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 369
 370   def _LockInstancesNodes(self, primary_only=False,
 371                           level=locking.LEVEL_NODE):
 372     """Helper function to declare instances' nodes for locking.
 373
 374     This function should be called after locking one or more instances to lock
 375     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 376     with all primary or secondary nodes for instances already locked and
 377     present in self.needed_locks[locking.LEVEL_INSTANCE].
 378
 379     It should be called from DeclareLocks, and for safety only works if
 380     self.recalculate_locks[locking.LEVEL_NODE] is set.
 381
 382     In the future it may grow parameters to just lock some instance's nodes, or
 383     to just lock primaries or secondary nodes, if needed.
 384
 385     If should be called in DeclareLocks in a way similar to::
 386
 387       if level == locking.LEVEL_NODE:
 388         self._LockInstancesNodes()
 389
 390     @type primary_only: boolean
 391     @param primary_only: only lock primary nodes of locked instances
 392     @param level: Which lock level to use for locking nodes
 393
 394     """
 395     assert level in self.recalculate_locks, \
 396       "_LockInstancesNodes helper function called with no nodes to recalculate"
 397
 398     # TODO: check if we're really been called with the instance locks held
 399
 400     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 401     # future we might want to have different behaviors depending on the value
 402     # of self.recalculate_locks[locking.LEVEL_NODE]
 403     wanted_nodes = []
 404     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 405     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 406       wanted_nodes.append(instance.primary_node)
 407       if not primary_only:
 408         wanted_nodes.extend(instance.secondary_nodes)
 409
 410     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 411       self.needed_locks[level] = wanted_nodes
 412     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 413       self.needed_locks[level].extend(wanted_nodes)
 414     else:
 415       raise errors.ProgrammerError("Unknown recalculation mode")
 416
 417     del self.recalculate_locks[level]
 418
 419
 420 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 421   """Simple LU which runs no hooks.
 422
 423   This LU is intended as a parent for other LogicalUnits which will
 424   run no hooks, in order to reduce duplicate code.
 425
 426   """
 427   HPATH = None
 428   HTYPE = None
 429
 430   def BuildHooksEnv(self):
 431     """Empty BuildHooksEnv for NoHooksLu.
 432
 433     This just raises an error.
 434
 435     """
 436     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 437
 438   def BuildHooksNodes(self):
 439     """Empty BuildHooksNodes for NoHooksLU.
 440
 441     """
 442     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 443
 444
 445 class Tasklet:
 446   """Tasklet base class.
 447
 448   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 449   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 450   tasklets know nothing about locks.
 451
 452   Subclasses must follow these rules:
 453     - Implement CheckPrereq
 454     - Implement Exec
 455
 456   """
 457   def __init__(self, lu):
 458     self.lu = lu
 459
 460     # Shortcuts
 461     self.cfg = lu.cfg
 462     self.rpc = lu.rpc
 463
 464   def CheckPrereq(self):
 465     """Check prerequisites for this tasklets.
 466
 467     This method should check whether the prerequisites for the execution of
 468     this tasklet are fulfilled. It can do internode communication, but it
 469     should be idempotent - no cluster or system changes are allowed.
 470
 471     The method should raise errors.OpPrereqError in case something is not
 472     fulfilled. Its return value is ignored.
 473
 474     This method should also update all parameters to their canonical form if it
 475     hasn't been done before.
 476
 477     """
 478     pass
 479
 480   def Exec(self, feedback_fn):
 481     """Execute the tasklet.
 482
 483     This method should implement the actual work. It should raise
 484     errors.OpExecError for failures that are somewhat dealt with in code, or
 485     expected.
 486
 487     """
 488     raise NotImplementedError
 489
 490
 491 class _QueryBase:
 492   """Base for query utility classes.
 493
 494   """
 495   #: Attribute holding field definitions
 496   FIELDS = None
 497
 498   #: Field to sort by
 499   SORT_FIELD = "name"
 500
 501   def __init__(self, qfilter, fields, use_locking):
 502     """Initializes this class.
 503
 504     """
 505     self.use_locking = use_locking
 506
 507     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 508                              namefield=self.SORT_FIELD)
 509     self.requested_data = self.query.RequestedData()
 510     self.names = self.query.RequestedNames()
 511
 512     # Sort only if no names were requested
 513     self.sort_by_name = not self.names
 514
 515     self.do_locking = None
 516     self.wanted = None
 517
 518   def _GetNames(self, lu, all_names, lock_level):
 519     """Helper function to determine names asked for in the query.
 520
 521     """
 522     if self.do_locking:
 523       names = lu.owned_locks(lock_level)
 524     else:
 525       names = all_names
 526
 527     if self.wanted == locking.ALL_SET:
 528       assert not self.names
 529       # caller didn't specify names, so ordering is not important
 530       return utils.NiceSort(names)
 531
 532     # caller specified names and we must keep the same order
 533     assert self.names
 534     assert not self.do_locking or lu.glm.is_owned(lock_level)
 535
 536     missing = set(self.wanted).difference(names)
 537     if missing:
 538       raise errors.OpExecError("Some items were removed before retrieving"
 539                                " their data: %s" % missing)
 540
 541     # Return expanded names
 542     return self.wanted
 543
 544   def ExpandNames(self, lu):
 545     """Expand names for this query.
 546
 547     See L{LogicalUnit.ExpandNames}.
 548
 549     """
 550     raise NotImplementedError()
 551
 552   def DeclareLocks(self, lu, level):
 553     """Declare locks for this query.
 554
 555     See L{LogicalUnit.DeclareLocks}.
 556
 557     """
 558     raise NotImplementedError()
 559
 560   def _GetQueryData(self, lu):
 561     """Collects all data for this query.
 562
 563     @return: Query data object
 564
 565     """
 566     raise NotImplementedError()
 567
 568   def NewStyleQuery(self, lu):
 569     """Collect data and execute query.
 570
 571     """
 572     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 573                                   sort_by_name=self.sort_by_name)
 574
 575   def OldStyleQuery(self, lu):
 576     """Collect data and execute query.
 577
 578     """
 579     return self.query.OldStyleQuery(self._GetQueryData(lu),
 580                                     sort_by_name=self.sort_by_name)
 581
 582
 583 def _ShareAll():
 584   """Returns a dict declaring all lock levels shared.
 585
 586   """
 587   return dict.fromkeys(locking.LEVELS, 1)
 588
 589
 590 def _MakeLegacyNodeInfo(data):
 591   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 592
 593   Converts the data into a single dictionary. This is fine for most use cases,
 594   but some require information from more than one volume group or hypervisor.
 595
 596   """
 597   (bootid, (vg_info, ), (hv_info, )) = data
 598
 599   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 600     "bootid": bootid,
 601     })
 602
 603
 604 def _AnnotateDiskParams(instance, devs, cfg):
 605   """Little helper wrapper to the rpc annotation method.
 606
 607   @param instance: The instance object
 608   @type devs: List of L{objects.Disk}
 609   @param devs: The root devices (not any of its children!)
 610   @param cfg: The config object
 611   @returns The annotated disk copies
 612   @see L{rpc.AnnotateDiskParams}
 613
 614   """
 615   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 616                                 cfg.GetInstanceDiskParams(instance))
 617
 618
 619 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 620                               cur_group_uuid):
 621   """Checks if node groups for locked instances are still correct.
 622
 623   @type cfg: L{config.ConfigWriter}
 624   @param cfg: Cluster configuration
 625   @type instances: dict; string as key, L{objects.Instance} as value
 626   @param instances: Dictionary, instance name as key, instance object as value
 627   @type owned_groups: iterable of string
 628   @param owned_groups: List of owned groups
 629   @type owned_nodes: iterable of string
 630   @param owned_nodes: List of owned nodes
 631   @type cur_group_uuid: string or None
 632   @param cur_group_uuid: Optional group UUID to check against instance's groups
 633
 634   """
 635   for (name, inst) in instances.items():
 636     assert owned_nodes.issuperset(inst.all_nodes), \
 637       "Instance %s's nodes changed while we kept the lock" % name
 638
 639     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 640
 641     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 642       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 643
 644
 645 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 646   """Checks if the owned node groups are still correct for an instance.
 647
 648   @type cfg: L{config.ConfigWriter}
 649   @param cfg: The cluster configuration
 650   @type instance_name: string
 651   @param instance_name: Instance name
 652   @type owned_groups: set or frozenset
 653   @param owned_groups: List of currently owned node groups
 654
 655   """
 656   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 657
 658   if not owned_groups.issuperset(inst_groups):
 659     raise errors.OpPrereqError("Instance %s's node groups changed since"
 660                                " locks were acquired, current groups are"
 661                                " are '%s', owning groups '%s'; retry the"
 662                                " operation" %
 663                                (instance_name,
 664                                 utils.CommaJoin(inst_groups),
 665                                 utils.CommaJoin(owned_groups)),
 666                                errors.ECODE_STATE)
 667
 668   return inst_groups
 669
 670
 671 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 672   """Checks if the instances in a node group are still correct.
 673
 674   @type cfg: L{config.ConfigWriter}
 675   @param cfg: The cluster configuration
 676   @type group_uuid: string
 677   @param group_uuid: Node group UUID
 678   @type owned_instances: set or frozenset
 679   @param owned_instances: List of currently owned instances
 680
 681   """
 682   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 683   if owned_instances != wanted_instances:
 684     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 685                                " locks were acquired, wanted '%s', have '%s';"
 686                                " retry the operation" %
 687                                (group_uuid,
 688                                 utils.CommaJoin(wanted_instances),
 689                                 utils.CommaJoin(owned_instances)),
 690                                errors.ECODE_STATE)
 691
 692   return wanted_instances
 693
 694
 695 def _SupportsOob(cfg, node):
 696   """Tells if node supports OOB.
 697
 698   @type cfg: L{config.ConfigWriter}
 699   @param cfg: The cluster configuration
 700   @type node: L{objects.Node}
 701   @param node: The node
 702   @return: The OOB script if supported or an empty string otherwise
 703
 704   """
 705   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 706
 707
 708 def _GetWantedNodes(lu, nodes):
 709   """Returns list of checked and expanded node names.
 710
 711   @type lu: L{LogicalUnit}
 712   @param lu: the logical unit on whose behalf we execute
 713   @type nodes: list
 714   @param nodes: list of node names or None for all nodes
 715   @rtype: list
 716   @return: the list of nodes, sorted
 717   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 718
 719   """
 720   if nodes:
 721     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 722
 723   return utils.NiceSort(lu.cfg.GetNodeList())
 724
 725
 726 def _GetWantedInstances(lu, instances):
 727   """Returns list of checked and expanded instance names.
 728
 729   @type lu: L{LogicalUnit}
 730   @param lu: the logical unit on whose behalf we execute
 731   @type instances: list
 732   @param instances: list of instance names or None for all instances
 733   @rtype: list
 734   @return: the list of instances, sorted
 735   @raise errors.OpPrereqError: if the instances parameter is wrong type
 736   @raise errors.OpPrereqError: if any of the passed instances is not found
 737
 738   """
 739   if instances:
 740     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 741   else:
 742     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 743   return wanted
 744
 745
 746 def _GetUpdatedParams(old_params, update_dict,
 747                       use_default=True, use_none=False):
 748   """Return the new version of a parameter dictionary.
 749
 750   @type old_params: dict
 751   @param old_params: old parameters
 752   @type update_dict: dict
 753   @param update_dict: dict containing new parameter values, or
 754       constants.VALUE_DEFAULT to reset the parameter to its default
 755       value
 756   @param use_default: boolean
 757   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 758       values as 'to be deleted' values
 759   @param use_none: boolean
 760   @type use_none: whether to recognise C{None} values as 'to be
 761       deleted' values
 762   @rtype: dict
 763   @return: the new parameter dictionary
 764
 765   """
 766   params_copy = copy.deepcopy(old_params)
 767   for key, val in update_dict.iteritems():
 768     if ((use_default and val == constants.VALUE_DEFAULT) or
 769         (use_none and val is None)):
 770       try:
 771         del params_copy[key]
 772       except KeyError:
 773         pass
 774     else:
 775       params_copy[key] = val
 776   return params_copy
 777
 778
 779 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 780   """Return the new version of a instance policy.
 781
 782   @param group_policy: whether this policy applies to a group and thus
 783     we should support removal of policy entries
 784
 785   """
 786   use_none = use_default = group_policy
 787   ipolicy = copy.deepcopy(old_ipolicy)
 788   for key, value in new_ipolicy.items():
 789     if key not in constants.IPOLICY_ALL_KEYS:
 790       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 791                                  errors.ECODE_INVAL)
 792     if key in constants.IPOLICY_ISPECS:
 793       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 794       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 795                                        use_none=use_none,
 796                                        use_default=use_default)
 797     else:
 798       if (not value or value == [constants.VALUE_DEFAULT] or
 799           value == constants.VALUE_DEFAULT):
 800         if group_policy:
 801           del ipolicy[key]
 802         else:
 803           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 804                                      " on the cluster'" % key,
 805                                      errors.ECODE_INVAL)
 806       else:
 807         if key in constants.IPOLICY_PARAMETERS:
 808           # FIXME: we assume all such values are float
 809           try:
 810             ipolicy[key] = float(value)
 811           except (TypeError, ValueError), err:
 812             raise errors.OpPrereqError("Invalid value for attribute"
 813                                        " '%s': '%s', error: %s" %
 814                                        (key, value, err), errors.ECODE_INVAL)
 815         else:
 816           # FIXME: we assume all others are lists; this should be redone
 817           # in a nicer way
 818           ipolicy[key] = list(value)
 819   try:
 820     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 821   except errors.ConfigurationError, err:
 822     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 823                                errors.ECODE_INVAL)
 824   return ipolicy
 825
 826
 827 def _UpdateAndVerifySubDict(base, updates, type_check):
 828   """Updates and verifies a dict with sub dicts of the same type.
 829
 830   @param base: The dict with the old data
 831   @param updates: The dict with the new data
 832   @param type_check: Dict suitable to ForceDictType to verify correct types
 833   @returns: A new dict with updated and verified values
 834
 835   """
 836   def fn(old, value):
 837     new = _GetUpdatedParams(old, value)
 838     utils.ForceDictType(new, type_check)
 839     return new
 840
 841   ret = copy.deepcopy(base)
 842   ret.update(dict((key, fn(base.get(key, {}), value))
 843                   for key, value in updates.items()))
 844   return ret
 845
 846
 847 def _MergeAndVerifyHvState(op_input, obj_input):
 848   """Combines the hv state from an opcode with the one of the object
 849
 850   @param op_input: The input dict from the opcode
 851   @param obj_input: The input dict from the objects
 852   @return: The verified and updated dict
 853
 854   """
 855   if op_input:
 856     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 857     if invalid_hvs:
 858       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 859                                  " %s" % utils.CommaJoin(invalid_hvs),
 860                                  errors.ECODE_INVAL)
 861     if obj_input is None:
 862       obj_input = {}
 863     type_check = constants.HVSTS_PARAMETER_TYPES
 864     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 865
 866   return None
 867
 868
 869 def _MergeAndVerifyDiskState(op_input, obj_input):
 870   """Combines the disk state from an opcode with the one of the object
 871
 872   @param op_input: The input dict from the opcode
 873   @param obj_input: The input dict from the objects
 874   @return: The verified and updated dict
 875   """
 876   if op_input:
 877     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 878     if invalid_dst:
 879       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 880                                  utils.CommaJoin(invalid_dst),
 881                                  errors.ECODE_INVAL)
 882     type_check = constants.DSS_PARAMETER_TYPES
 883     if obj_input is None:
 884       obj_input = {}
 885     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 886                                               type_check))
 887                 for key, value in op_input.items())
 888
 889   return None
 890
 891
 892 def _ReleaseLocks(lu, level, names=None, keep=None):
 893   """Releases locks owned by an LU.
 894
 895   @type lu: L{LogicalUnit}
 896   @param level: Lock level
 897   @type names: list or None
 898   @param names: Names of locks to release
 899   @type keep: list or None
 900   @param keep: Names of locks to retain
 901
 902   """
 903   assert not (keep is not None and names is not None), \
 904          "Only one of the 'names' and the 'keep' parameters can be given"
 905
 906   if names is not None:
 907     should_release = names.__contains__
 908   elif keep:
 909     should_release = lambda name: name not in keep
 910   else:
 911     should_release = None
 912
 913   owned = lu.owned_locks(level)
 914   if not owned:
 915     # Not owning any lock at this level, do nothing
 916     pass
 917
 918   elif should_release:
 919     retain = []
 920     release = []
 921
 922     # Determine which locks to release
 923     for name in owned:
 924       if should_release(name):
 925         release.append(name)
 926       else:
 927         retain.append(name)
 928
 929     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 930
 931     # Release just some locks
 932     lu.glm.release(level, names=release)
 933
 934     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 935   else:
 936     # Release everything
 937     lu.glm.release(level)
 938
 939     assert not lu.glm.is_owned(level), "No locks should be owned"
 940
 941
 942 def _MapInstanceDisksToNodes(instances):
 943   """Creates a map from (node, volume) to instance name.
 944
 945   @type instances: list of L{objects.Instance}
 946   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 947
 948   """
 949   return dict(((node, vol), inst.name)
 950               for inst in instances
 951               for (node, vols) in inst.MapLVsByNode().items()
 952               for vol in vols)
 953
 954
 955 def _RunPostHook(lu, node_name):
 956   """Runs the post-hook for an opcode on a single node.
 957
 958   """
 959   hm = lu.proc.BuildHooksManager(lu)
 960   try:
 961     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 962   except:
 963     # pylint: disable=W0702
 964     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 965
 966
 967 def _CheckOutputFields(static, dynamic, selected):
 968   """Checks whether all selected fields are valid.
 969
 970   @type static: L{utils.FieldSet}
 971   @param static: static fields set
 972   @type dynamic: L{utils.FieldSet}
 973   @param dynamic: dynamic fields set
 974
 975   """
 976   f = utils.FieldSet()
 977   f.Extend(static)
 978   f.Extend(dynamic)
 979
 980   delta = f.NonMatching(selected)
 981   if delta:
 982     raise errors.OpPrereqError("Unknown output fields selected: %s"
 983                                % ",".join(delta), errors.ECODE_INVAL)
 984
 985
 986 def _CheckGlobalHvParams(params):
 987   """Validates that given hypervisor params are not global ones.
 988
 989   This will ensure that instances don't get customised versions of
 990   global params.
 991
 992   """
 993   used_globals = constants.HVC_GLOBALS.intersection(params)
 994   if used_globals:
 995     msg = ("The following hypervisor parameters are global and cannot"
 996            " be customized at instance level, please modify them at"
 997            " cluster level: %s" % utils.CommaJoin(used_globals))
 998     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 999
1000
1001 def _CheckNodeOnline(lu, node, msg=None):
1002   """Ensure that a given node is online.
1003
1004   @param lu: the LU on behalf of which we make the check
1005   @param node: the node to check
1006   @param msg: if passed, should be a message to replace the default one
1007   @raise errors.OpPrereqError: if the node is offline
1008
1009   """
1010   if msg is None:
1011     msg = "Can't use offline node"
1012   if lu.cfg.GetNodeInfo(node).offline:
1013     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1014
1015
1016 def _CheckNodeNotDrained(lu, node):
1017   """Ensure that a given node is not drained.
1018
1019   @param lu: the LU on behalf of which we make the check
1020   @param node: the node to check
1021   @raise errors.OpPrereqError: if the node is drained
1022
1023   """
1024   if lu.cfg.GetNodeInfo(node).drained:
1025     raise errors.OpPrereqError("Can't use drained node %s" % node,
1026                                errors.ECODE_STATE)
1027
1028
1029 def _CheckNodeVmCapable(lu, node):
1030   """Ensure that a given node is vm capable.
1031
1032   @param lu: the LU on behalf of which we make the check
1033   @param node: the node to check
1034   @raise errors.OpPrereqError: if the node is not vm capable
1035
1036   """
1037   if not lu.cfg.GetNodeInfo(node).vm_capable:
1038     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1039                                errors.ECODE_STATE)
1040
1041
1042 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1043   """Ensure that a node supports a given OS.
1044
1045   @param lu: the LU on behalf of which we make the check
1046   @param node: the node to check
1047   @param os_name: the OS to query about
1048   @param force_variant: whether to ignore variant errors
1049   @raise errors.OpPrereqError: if the node is not supporting the OS
1050
1051   """
1052   result = lu.rpc.call_os_get(node, os_name)
1053   result.Raise("OS '%s' not in supported OS list for node %s" %
1054                (os_name, node),
1055                prereq=True, ecode=errors.ECODE_INVAL)
1056   if not force_variant:
1057     _CheckOSVariant(result.payload, os_name)
1058
1059
1060 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1061   """Ensure that a node has the given secondary ip.
1062
1063   @type lu: L{LogicalUnit}
1064   @param lu: the LU on behalf of which we make the check
1065   @type node: string
1066   @param node: the node to check
1067   @type secondary_ip: string
1068   @param secondary_ip: the ip to check
1069   @type prereq: boolean
1070   @param prereq: whether to throw a prerequisite or an execute error
1071   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1072   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1073
1074   """
1075   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1076   result.Raise("Failure checking secondary ip on node %s" % node,
1077                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1078   if not result.payload:
1079     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1080            " please fix and re-run this command" % secondary_ip)
1081     if prereq:
1082       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1083     else:
1084       raise errors.OpExecError(msg)
1085
1086
1087 def _GetClusterDomainSecret():
1088   """Reads the cluster domain secret.
1089
1090   """
1091   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1092                                strict=True)
1093
1094
1095 def _CheckInstanceState(lu, instance, req_states, msg=None):
1096   """Ensure that an instance is in one of the required states.
1097
1098   @param lu: the LU on behalf of which we make the check
1099   @param instance: the instance to check
1100   @param msg: if passed, should be a message to replace the default one
1101   @raise errors.OpPrereqError: if the instance is not in the required state
1102
1103   """
1104   if msg is None:
1105     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1106   if instance.admin_state not in req_states:
1107     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1108                                (instance.name, instance.admin_state, msg),
1109                                errors.ECODE_STATE)
1110
1111   if constants.ADMINST_UP not in req_states:
1112     pnode = instance.primary_node
1113     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1114     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1115                 prereq=True, ecode=errors.ECODE_ENVIRON)
1116
1117     if instance.name in ins_l.payload:
1118       raise errors.OpPrereqError("Instance %s is running, %s" %
1119                                  (instance.name, msg), errors.ECODE_STATE)
1120
1121
1122 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1123   """Computes if value is in the desired range.
1124
1125   @param name: name of the parameter for which we perform the check
1126   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1127       not just 'disk')
1128   @param ipolicy: dictionary containing min, max and std values
1129   @param value: actual value that we want to use
1130   @return: None or element not meeting the criteria
1131
1132
1133   """
1134   if value in [None, constants.VALUE_AUTO]:
1135     return None
1136   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1137   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1138   if value > max_v or min_v > value:
1139     if qualifier:
1140       fqn = "%s/%s" % (name, qualifier)
1141     else:
1142       fqn = name
1143     return ("%s value %s is not in range [%s, %s]" %
1144             (fqn, value, min_v, max_v))
1145   return None
1146
1147
1148 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1149                                  nic_count, disk_sizes, spindle_use,
1150                                  _compute_fn=_ComputeMinMaxSpec):
1151   """Verifies ipolicy against provided specs.
1152
1153   @type ipolicy: dict
1154   @param ipolicy: The ipolicy
1155   @type mem_size: int
1156   @param mem_size: The memory size
1157   @type cpu_count: int
1158   @param cpu_count: Used cpu cores
1159   @type disk_count: int
1160   @param disk_count: Number of disks used
1161   @type nic_count: int
1162   @param nic_count: Number of nics used
1163   @type disk_sizes: list of ints
1164   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1165   @type spindle_use: int
1166   @param spindle_use: The number of spindles this instance uses
1167   @param _compute_fn: The compute function (unittest only)
1168   @return: A list of violations, or an empty list of no violations are found
1169
1170   """
1171   assert disk_count == len(disk_sizes)
1172
1173   test_settings = [
1174     (constants.ISPEC_MEM_SIZE, "", mem_size),
1175     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1176     (constants.ISPEC_DISK_COUNT, "", disk_count),
1177     (constants.ISPEC_NIC_COUNT, "", nic_count),
1178     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1179     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1180          for idx, d in enumerate(disk_sizes)]
1181
1182   return filter(None,
1183                 (_compute_fn(name, qualifier, ipolicy, value)
1184                  for (name, qualifier, value) in test_settings))
1185
1186
1187 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1188                                      _compute_fn=_ComputeIPolicySpecViolation):
1189   """Compute if instance meets the specs of ipolicy.
1190
1191   @type ipolicy: dict
1192   @param ipolicy: The ipolicy to verify against
1193   @type instance: L{objects.Instance}
1194   @param instance: The instance to verify
1195   @param _compute_fn: The function to verify ipolicy (unittest only)
1196   @see: L{_ComputeIPolicySpecViolation}
1197
1198   """
1199   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1200   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1201   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1202   disk_count = len(instance.disks)
1203   disk_sizes = [disk.size for disk in instance.disks]
1204   nic_count = len(instance.nics)
1205
1206   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1207                      disk_sizes, spindle_use)
1208
1209
1210 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1211     _compute_fn=_ComputeIPolicySpecViolation):
1212   """Compute if instance specs meets the specs of ipolicy.
1213
1214   @type ipolicy: dict
1215   @param ipolicy: The ipolicy to verify against
1216   @param instance_spec: dict
1217   @param instance_spec: The instance spec to verify
1218   @param _compute_fn: The function to verify ipolicy (unittest only)
1219   @see: L{_ComputeIPolicySpecViolation}
1220
1221   """
1222   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1223   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1224   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1225   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1226   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1227   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1228
1229   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1230                      disk_sizes, spindle_use)
1231
1232
1233 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1234                                  target_group,
1235                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1236   """Compute if instance meets the specs of the new target group.
1237
1238   @param ipolicy: The ipolicy to verify
1239   @param instance: The instance object to verify
1240   @param current_group: The current group of the instance
1241   @param target_group: The new group of the instance
1242   @param _compute_fn: The function to verify ipolicy (unittest only)
1243   @see: L{_ComputeIPolicySpecViolation}
1244
1245   """
1246   if current_group == target_group:
1247     return []
1248   else:
1249     return _compute_fn(ipolicy, instance)
1250
1251
1252 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1253                             _compute_fn=_ComputeIPolicyNodeViolation):
1254   """Checks that the target node is correct in terms of instance policy.
1255
1256   @param ipolicy: The ipolicy to verify
1257   @param instance: The instance object to verify
1258   @param node: The new node to relocate
1259   @param ignore: Ignore violations of the ipolicy
1260   @param _compute_fn: The function to verify ipolicy (unittest only)
1261   @see: L{_ComputeIPolicySpecViolation}
1262
1263   """
1264   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1265   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1266
1267   if res:
1268     msg = ("Instance does not meet target node group's (%s) instance"
1269            " policy: %s") % (node.group, utils.CommaJoin(res))
1270     if ignore:
1271       lu.LogWarning(msg)
1272     else:
1273       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1274
1275
1276 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1277   """Computes a set of any instances that would violate the new ipolicy.
1278
1279   @param old_ipolicy: The current (still in-place) ipolicy
1280   @param new_ipolicy: The new (to become) ipolicy
1281   @param instances: List of instances to verify
1282   @return: A list of instances which violates the new ipolicy but
1283       did not before
1284
1285   """
1286   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1287           _ComputeViolatingInstances(old_ipolicy, instances))
1288
1289
1290 def _ExpandItemName(fn, name, kind):
1291   """Expand an item name.
1292
1293   @param fn: the function to use for expansion
1294   @param name: requested item name
1295   @param kind: text description ('Node' or 'Instance')
1296   @return: the resolved (full) name
1297   @raise errors.OpPrereqError: if the item is not found
1298
1299   """
1300   full_name = fn(name)
1301   if full_name is None:
1302     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1303                                errors.ECODE_NOENT)
1304   return full_name
1305
1306
1307 def _ExpandNodeName(cfg, name):
1308   """Wrapper over L{_ExpandItemName} for nodes."""
1309   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1310
1311
1312 def _ExpandInstanceName(cfg, name):
1313   """Wrapper over L{_ExpandItemName} for instance."""
1314   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1315
1316 def _BuildNetworkHookEnv(name, network, gateway, network6, gateway6,
1317                          network_type, mac_prefix, tags):
1318   env = dict()
1319   if name:
1320     env["NETWORK_NAME"] = name
1321   if network:
1322     env["NETWORK_SUBNET"] = network
1323   if gateway:
1324     env["NETWORK_GATEWAY"] = gateway
1325   if network6:
1326     env["NETWORK_SUBNET6"] = network6
1327   if gateway6:
1328     env["NETWORK_GATEWAY6"] = gateway6
1329   if mac_prefix:
1330     env["NETWORK_MAC_PREFIX"] = mac_prefix
1331   if network_type:
1332     env["NETWORK_TYPE"] = network_type
1333   if tags:
1334     env["NETWORK_TAGS"] = " ".join(tags)
1335
1336   return env
1337
1338
1339 def _BuildNetworkHookEnvByObject(lu, network):
1340   args = {
1341     "name": network.name,
1342     "network": network.network,
1343     "gateway": network.gateway,
1344     "network6": network.network6,
1345     "gateway6": network.gateway6,
1346     "network_type": network.network_type,
1347     "mac_prefix": network.mac_prefix,
1348     "tags" : network.tags,
1349   }
1350   return _BuildNetworkHookEnv(**args)
1351
1352
1353 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1354                           minmem, maxmem, vcpus, nics, disk_template, disks,
1355                           bep, hvp, hypervisor_name, tags):
1356   """Builds instance related env variables for hooks
1357
1358   This builds the hook environment from individual variables.
1359
1360   @type name: string
1361   @param name: the name of the instance
1362   @type primary_node: string
1363   @param primary_node: the name of the instance's primary node
1364   @type secondary_nodes: list
1365   @param secondary_nodes: list of secondary nodes as strings
1366   @type os_type: string
1367   @param os_type: the name of the instance's OS
1368   @type status: string
1369   @param status: the desired status of the instance
1370   @type minmem: string
1371   @param minmem: the minimum memory size of the instance
1372   @type maxmem: string
1373   @param maxmem: the maximum memory size of the instance
1374   @type vcpus: string
1375   @param vcpus: the count of VCPUs the instance has
1376   @type nics: list
1377   @param nics: list of tuples (ip, mac, mode, link, network) representing
1378       the NICs the instance has
1379   @type disk_template: string
1380   @param disk_template: the disk template of the instance
1381   @type disks: list
1382   @param disks: the list of (size, mode) pairs
1383   @type bep: dict
1384   @param bep: the backend parameters for the instance
1385   @type hvp: dict
1386   @param hvp: the hypervisor parameters for the instance
1387   @type hypervisor_name: string
1388   @param hypervisor_name: the hypervisor for the instance
1389   @type tags: list
1390   @param tags: list of instance tags as strings
1391   @rtype: dict
1392   @return: the hook environment for this instance
1393
1394   """
1395   env = {
1396     "OP_TARGET": name,
1397     "INSTANCE_NAME": name,
1398     "INSTANCE_PRIMARY": primary_node,
1399     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1400     "INSTANCE_OS_TYPE": os_type,
1401     "INSTANCE_STATUS": status,
1402     "INSTANCE_MINMEM": minmem,
1403     "INSTANCE_MAXMEM": maxmem,
1404     # TODO(2.7) remove deprecated "memory" value
1405     "INSTANCE_MEMORY": maxmem,
1406     "INSTANCE_VCPUS": vcpus,
1407     "INSTANCE_DISK_TEMPLATE": disk_template,
1408     "INSTANCE_HYPERVISOR": hypervisor_name,
1409   }
1410   if nics:
1411     nic_count = len(nics)
1412     for idx, (ip, mac, mode, link, network, netinfo) in enumerate(nics):
1413       if ip is None:
1414         ip = ""
1415       env["INSTANCE_NIC%d_IP" % idx] = ip
1416       env["INSTANCE_NIC%d_MAC" % idx] = mac
1417       env["INSTANCE_NIC%d_MODE" % idx] = mode
1418       env["INSTANCE_NIC%d_LINK" % idx] = link
1419       if network:
1420         env["INSTANCE_NIC%d_NETWORK" % idx] = network
1421         if netinfo:
1422           nobj = objects.Network.FromDict(netinfo)
1423           if nobj.network:
1424             env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1425           if nobj.gateway:
1426             env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1427           if nobj.network6:
1428             env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1429           if nobj.gateway6:
1430             env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1431           if nobj.mac_prefix:
1432             env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1433           if nobj.network_type:
1434             env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1435           if nobj.tags:
1436             env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1437       if mode == constants.NIC_MODE_BRIDGED:
1438         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1439   else:
1440     nic_count = 0
1441
1442   env["INSTANCE_NIC_COUNT"] = nic_count
1443
1444   if disks:
1445     disk_count = len(disks)
1446     for idx, (size, mode) in enumerate(disks):
1447       env["INSTANCE_DISK%d_SIZE" % idx] = size
1448       env["INSTANCE_DISK%d_MODE" % idx] = mode
1449   else:
1450     disk_count = 0
1451
1452   env["INSTANCE_DISK_COUNT"] = disk_count
1453
1454   if not tags:
1455     tags = []
1456
1457   env["INSTANCE_TAGS"] = " ".join(tags)
1458
1459   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1460     for key, value in source.items():
1461       env["INSTANCE_%s_%s" % (kind, key)] = value
1462
1463   return env
1464
1465 def _NICToTuple(lu, nic):
1466   """Build a tupple of nic information.
1467
1468   @type lu:  L{LogicalUnit}
1469   @param lu: the logical unit on whose behalf we execute
1470   @type nic: L{objects.NIC}
1471   @param nic: nic to convert to hooks tuple
1472
1473   """
1474   cluster = lu.cfg.GetClusterInfo()
1475   ip = nic.ip
1476   mac = nic.mac
1477   filled_params = cluster.SimpleFillNIC(nic.nicparams)
1478   mode = filled_params[constants.NIC_MODE]
1479   link = filled_params[constants.NIC_LINK]
1480   network = nic.network
1481   netinfo = None
1482   if network:
1483     net_uuid = lu.cfg.LookupNetwork(network)
1484     if net_uuid:
1485       nobj = lu.cfg.GetNetwork(net_uuid)
1486       netinfo = objects.Network.ToDict(nobj)
1487   return (ip, mac, mode, link, network, netinfo)
1488
1489 def _NICListToTuple(lu, nics):
1490   """Build a list of nic information tuples.
1491
1492   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1493   value in LUInstanceQueryData.
1494
1495   @type lu:  L{LogicalUnit}
1496   @param lu: the logical unit on whose behalf we execute
1497   @type nics: list of L{objects.NIC}
1498   @param nics: list of nics to convert to hooks tuples
1499
1500   """
1501   hooks_nics = []
1502   cluster = lu.cfg.GetClusterInfo()
1503   for nic in nics:
1504     hooks_nics.append(_NICToTuple(lu, nic))
1505   return hooks_nics
1506
1507 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1508   """Builds instance related env variables for hooks from an object.
1509
1510   @type lu: L{LogicalUnit}
1511   @param lu: the logical unit on whose behalf we execute
1512   @type instance: L{objects.Instance}
1513   @param instance: the instance for which we should build the
1514       environment
1515   @type override: dict
1516   @param override: dictionary with key/values that will override
1517       our values
1518   @rtype: dict
1519   @return: the hook environment dictionary
1520
1521   """
1522   cluster = lu.cfg.GetClusterInfo()
1523   bep = cluster.FillBE(instance)
1524   hvp = cluster.FillHV(instance)
1525   args = {
1526     "name": instance.name,
1527     "primary_node": instance.primary_node,
1528     "secondary_nodes": instance.secondary_nodes,
1529     "os_type": instance.os,
1530     "status": instance.admin_state,
1531     "maxmem": bep[constants.BE_MAXMEM],
1532     "minmem": bep[constants.BE_MINMEM],
1533     "vcpus": bep[constants.BE_VCPUS],
1534     "nics": _NICListToTuple(lu, instance.nics),
1535     "disk_template": instance.disk_template,
1536     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1537     "bep": bep,
1538     "hvp": hvp,
1539     "hypervisor_name": instance.hypervisor,
1540     "tags": instance.tags,
1541   }
1542   if override:
1543     args.update(override)
1544   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1545
1546
1547 def _AdjustCandidatePool(lu, exceptions):
1548   """Adjust the candidate pool after node operations.
1549
1550   """
1551   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1552   if mod_list:
1553     lu.LogInfo("Promoted nodes to master candidate role: %s",
1554                utils.CommaJoin(node.name for node in mod_list))
1555     for name in mod_list:
1556       lu.context.ReaddNode(name)
1557   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1558   if mc_now > mc_max:
1559     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1560                (mc_now, mc_max))
1561
1562
1563 def _DecideSelfPromotion(lu, exceptions=None):
1564   """Decide whether I should promote myself as a master candidate.
1565
1566   """
1567   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1568   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1569   # the new node will increase mc_max with one, so:
1570   mc_should = min(mc_should + 1, cp_size)
1571   return mc_now < mc_should
1572
1573
1574 def _CalculateGroupIPolicy(cluster, group):
1575   """Calculate instance policy for group.
1576
1577   """
1578   return cluster.SimpleFillIPolicy(group.ipolicy)
1579
1580
1581 def _ComputeViolatingInstances(ipolicy, instances):
1582   """Computes a set of instances who violates given ipolicy.
1583
1584   @param ipolicy: The ipolicy to verify
1585   @type instances: object.Instance
1586   @param instances: List of instances to verify
1587   @return: A frozenset of instance names violating the ipolicy
1588
1589   """
1590   return frozenset([inst.name for inst in instances
1591                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1592
1593
1594 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1595   """Check that the brigdes needed by a list of nics exist.
1596
1597   """
1598   cluster = lu.cfg.GetClusterInfo()
1599   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1600   brlist = [params[constants.NIC_LINK] for params in paramslist
1601             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1602   if brlist:
1603     result = lu.rpc.call_bridges_exist(target_node, brlist)
1604     result.Raise("Error checking bridges on destination node '%s'" %
1605                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1606
1607
1608 def _CheckInstanceBridgesExist(lu, instance, node=None):
1609   """Check that the brigdes needed by an instance exist.
1610
1611   """
1612   if node is None:
1613     node = instance.primary_node
1614   _CheckNicsBridgesExist(lu, instance.nics, node)
1615
1616
1617 def _CheckOSVariant(os_obj, name):
1618   """Check whether an OS name conforms to the os variants specification.
1619
1620   @type os_obj: L{objects.OS}
1621   @param os_obj: OS object to check
1622   @type name: string
1623   @param name: OS name passed by the user, to check for validity
1624
1625   """
1626   variant = objects.OS.GetVariant(name)
1627   if not os_obj.supported_variants:
1628     if variant:
1629       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1630                                  " passed)" % (os_obj.name, variant),
1631                                  errors.ECODE_INVAL)
1632     return
1633   if not variant:
1634     raise errors.OpPrereqError("OS name must include a variant",
1635                                errors.ECODE_INVAL)
1636
1637   if variant not in os_obj.supported_variants:
1638     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1639
1640
1641 def _GetNodeInstancesInner(cfg, fn):
1642   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1643
1644
1645 def _GetNodeInstances(cfg, node_name):
1646   """Returns a list of all primary and secondary instances on a node.
1647
1648   """
1649
1650   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1651
1652
1653 def _GetNodePrimaryInstances(cfg, node_name):
1654   """Returns primary instances on a node.
1655
1656   """
1657   return _GetNodeInstancesInner(cfg,
1658                                 lambda inst: node_name == inst.primary_node)
1659
1660
1661 def _GetNodeSecondaryInstances(cfg, node_name):
1662   """Returns secondary instances on a node.
1663
1664   """
1665   return _GetNodeInstancesInner(cfg,
1666                                 lambda inst: node_name in inst.secondary_nodes)
1667
1668
1669 def _GetStorageTypeArgs(cfg, storage_type):
1670   """Returns the arguments for a storage type.
1671
1672   """
1673   # Special case for file storage
1674   if storage_type == constants.ST_FILE:
1675     # storage.FileStorage wants a list of storage directories
1676     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1677
1678   return []
1679
1680
1681 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1682   faulty = []
1683
1684   for dev in instance.disks:
1685     cfg.SetDiskID(dev, node_name)
1686
1687   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1688                                                                 instance))
1689   result.Raise("Failed to get disk status from node %s" % node_name,
1690                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1691
1692   for idx, bdev_status in enumerate(result.payload):
1693     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1694       faulty.append(idx)
1695
1696   return faulty
1697
1698
1699 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1700   """Check the sanity of iallocator and node arguments and use the
1701   cluster-wide iallocator if appropriate.
1702
1703   Check that at most one of (iallocator, node) is specified. If none is
1704   specified, then the LU's opcode's iallocator slot is filled with the
1705   cluster-wide default iallocator.
1706
1707   @type iallocator_slot: string
1708   @param iallocator_slot: the name of the opcode iallocator slot
1709   @type node_slot: string
1710   @param node_slot: the name of the opcode target node slot
1711
1712   """
1713   node = getattr(lu.op, node_slot, None)
1714   iallocator = getattr(lu.op, iallocator_slot, None)
1715
1716   if node is not None and iallocator is not None:
1717     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1718                                errors.ECODE_INVAL)
1719   elif node is None and iallocator is None:
1720     default_iallocator = lu.cfg.GetDefaultIAllocator()
1721     if default_iallocator:
1722       setattr(lu.op, iallocator_slot, default_iallocator)
1723     else:
1724       raise errors.OpPrereqError("No iallocator or node given and no"
1725                                  " cluster-wide default iallocator found;"
1726                                  " please specify either an iallocator or a"
1727                                  " node, or set a cluster-wide default"
1728                                  " iallocator")
1729
1730
1731 def _GetDefaultIAllocator(cfg, iallocator):
1732   """Decides on which iallocator to use.
1733
1734   @type cfg: L{config.ConfigWriter}
1735   @param cfg: Cluster configuration object
1736   @type iallocator: string or None
1737   @param iallocator: Iallocator specified in opcode
1738   @rtype: string
1739   @return: Iallocator name
1740
1741   """
1742   if not iallocator:
1743     # Use default iallocator
1744     iallocator = cfg.GetDefaultIAllocator()
1745
1746   if not iallocator:
1747     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1748                                " opcode nor as a cluster-wide default",
1749                                errors.ECODE_INVAL)
1750
1751   return iallocator
1752
1753
1754 class LUClusterPostInit(LogicalUnit):
1755   """Logical unit for running hooks after cluster initialization.
1756
1757   """
1758   HPATH = "cluster-init"
1759   HTYPE = constants.HTYPE_CLUSTER
1760
1761   def BuildHooksEnv(self):
1762     """Build hooks env.
1763
1764     """
1765     return {
1766       "OP_TARGET": self.cfg.GetClusterName(),
1767       }
1768
1769   def BuildHooksNodes(self):
1770     """Build hooks nodes.
1771
1772     """
1773     return ([], [self.cfg.GetMasterNode()])
1774
1775   def Exec(self, feedback_fn):
1776     """Nothing to do.
1777
1778     """
1779     return True
1780
1781
1782 class LUClusterDestroy(LogicalUnit):
1783   """Logical unit for destroying the cluster.
1784
1785   """
1786   HPATH = "cluster-destroy"
1787   HTYPE = constants.HTYPE_CLUSTER
1788
1789   def BuildHooksEnv(self):
1790     """Build hooks env.
1791
1792     """
1793     return {
1794       "OP_TARGET": self.cfg.GetClusterName(),
1795       }
1796
1797   def BuildHooksNodes(self):
1798     """Build hooks nodes.
1799
1800     """
1801     return ([], [])
1802
1803   def CheckPrereq(self):
1804     """Check prerequisites.
1805
1806     This checks whether the cluster is empty.
1807
1808     Any errors are signaled by raising errors.OpPrereqError.
1809
1810     """
1811     master = self.cfg.GetMasterNode()
1812
1813     nodelist = self.cfg.GetNodeList()
1814     if len(nodelist) != 1 or nodelist[0] != master:
1815       raise errors.OpPrereqError("There are still %d node(s) in"
1816                                  " this cluster." % (len(nodelist) - 1),
1817                                  errors.ECODE_INVAL)
1818     instancelist = self.cfg.GetInstanceList()
1819     if instancelist:
1820       raise errors.OpPrereqError("There are still %d instance(s) in"
1821                                  " this cluster." % len(instancelist),
1822                                  errors.ECODE_INVAL)
1823
1824   def Exec(self, feedback_fn):
1825     """Destroys the cluster.
1826
1827     """
1828     master_params = self.cfg.GetMasterNetworkParameters()
1829
1830     # Run post hooks on master node before it's removed
1831     _RunPostHook(self, master_params.name)
1832
1833     ems = self.cfg.GetUseExternalMipScript()
1834     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1835                                                      master_params, ems)
1836     if result.fail_msg:
1837       self.LogWarning("Error disabling the master IP address: %s",
1838                       result.fail_msg)
1839
1840     return master_params.name
1841
1842
1843 def _VerifyCertificate(filename):
1844   """Verifies a certificate for L{LUClusterVerifyConfig}.
1845
1846   @type filename: string
1847   @param filename: Path to PEM file
1848
1849   """
1850   try:
1851     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1852                                            utils.ReadFile(filename))
1853   except Exception, err: # pylint: disable=W0703
1854     return (LUClusterVerifyConfig.ETYPE_ERROR,
1855             "Failed to load X509 certificate %s: %s" % (filename, err))
1856
1857   (errcode, msg) = \
1858     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1859                                 constants.SSL_CERT_EXPIRATION_ERROR)
1860
1861   if msg:
1862     fnamemsg = "While verifying %s: %s" % (filename, msg)
1863   else:
1864     fnamemsg = None
1865
1866   if errcode is None:
1867     return (None, fnamemsg)
1868   elif errcode == utils.CERT_WARNING:
1869     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1870   elif errcode == utils.CERT_ERROR:
1871     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1872
1873   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1874
1875
1876 def _GetAllHypervisorParameters(cluster, instances):
1877   """Compute the set of all hypervisor parameters.
1878
1879   @type cluster: L{objects.Cluster}
1880   @param cluster: the cluster object
1881   @param instances: list of L{objects.Instance}
1882   @param instances: additional instances from which to obtain parameters
1883   @rtype: list of (origin, hypervisor, parameters)
1884   @return: a list with all parameters found, indicating the hypervisor they
1885        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1886
1887   """
1888   hvp_data = []
1889
1890   for hv_name in cluster.enabled_hypervisors:
1891     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1892
1893   for os_name, os_hvp in cluster.os_hvp.items():
1894     for hv_name, hv_params in os_hvp.items():
1895       if hv_params:
1896         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1897         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1898
1899   # TODO: collapse identical parameter values in a single one
1900   for instance in instances:
1901     if instance.hvparams:
1902       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1903                        cluster.FillHV(instance)))
1904
1905   return hvp_data
1906
1907
1908 class _VerifyErrors(object):
1909   """Mix-in for cluster/group verify LUs.
1910
1911   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1912   self.op and self._feedback_fn to be available.)
1913
1914   """
1915
1916   ETYPE_FIELD = "code"
1917   ETYPE_ERROR = "ERROR"
1918   ETYPE_WARNING = "WARNING"
1919
1920   def _Error(self, ecode, item, msg, *args, **kwargs):
1921     """Format an error message.
1922
1923     Based on the opcode's error_codes parameter, either format a
1924     parseable error code, or a simpler error string.
1925
1926     This must be called only from Exec and functions called from Exec.
1927
1928     """
1929     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1930     itype, etxt, _ = ecode
1931     # first complete the msg
1932     if args:
1933       msg = msg % args
1934     # then format the whole message
1935     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1936       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1937     else:
1938       if item:
1939         item = " " + item
1940       else:
1941         item = ""
1942       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1943     # and finally report it via the feedback_fn
1944     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1945
1946   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1947     """Log an error message if the passed condition is True.
1948
1949     """
1950     cond = (bool(cond)
1951             or self.op.debug_simulate_errors) # pylint: disable=E1101
1952
1953     # If the error code is in the list of ignored errors, demote the error to a
1954     # warning
1955     (_, etxt, _) = ecode
1956     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1957       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1958
1959     if cond:
1960       self._Error(ecode, *args, **kwargs)
1961
1962     # do not mark the operation as failed for WARN cases only
1963     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1964       self.bad = self.bad or cond
1965
1966
1967 class LUClusterVerify(NoHooksLU):
1968   """Submits all jobs necessary to verify the cluster.
1969
1970   """
1971   REQ_BGL = False
1972
1973   def ExpandNames(self):
1974     self.needed_locks = {}
1975
1976   def Exec(self, feedback_fn):
1977     jobs = []
1978
1979     if self.op.group_name:
1980       groups = [self.op.group_name]
1981       depends_fn = lambda: None
1982     else:
1983       groups = self.cfg.GetNodeGroupList()
1984
1985       # Verify global configuration
1986       jobs.append([
1987         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1988         ])
1989
1990       # Always depend on global verification
1991       depends_fn = lambda: [(-len(jobs), [])]
1992
1993     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1994                                             ignore_errors=self.op.ignore_errors,
1995                                             depends=depends_fn())]
1996                 for group in groups)
1997
1998     # Fix up all parameters
1999     for op in itertools.chain(*jobs): # pylint: disable=W0142
2000       op.debug_simulate_errors = self.op.debug_simulate_errors
2001       op.verbose = self.op.verbose
2002       op.error_codes = self.op.error_codes
2003       try:
2004         op.skip_checks = self.op.skip_checks
2005       except AttributeError:
2006         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2007
2008     return ResultWithJobs(jobs)
2009
2010
2011 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2012   """Verifies the cluster config.
2013
2014   """
2015   REQ_BGL = False
2016
2017   def _VerifyHVP(self, hvp_data):
2018     """Verifies locally the syntax of the hypervisor parameters.
2019
2020     """
2021     for item, hv_name, hv_params in hvp_data:
2022       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2023              (item, hv_name))
2024       try:
2025         hv_class = hypervisor.GetHypervisor(hv_name)
2026         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2027         hv_class.CheckParameterSyntax(hv_params)
2028       except errors.GenericError, err:
2029         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2030
2031   def ExpandNames(self):
2032     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2033     self.share_locks = _ShareAll()
2034
2035   def CheckPrereq(self):
2036     """Check prerequisites.
2037
2038     """
2039     # Retrieve all information
2040     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2041     self.all_node_info = self.cfg.GetAllNodesInfo()
2042     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2043
2044   def Exec(self, feedback_fn):
2045     """Verify integrity of cluster, performing various test on nodes.
2046
2047     """
2048     self.bad = False
2049     self._feedback_fn = feedback_fn
2050
2051     feedback_fn("* Verifying cluster config")
2052
2053     for msg in self.cfg.VerifyConfig():
2054       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2055
2056     feedback_fn("* Verifying cluster certificate files")
2057
2058     for cert_filename in constants.ALL_CERT_FILES:
2059       (errcode, msg) = _VerifyCertificate(cert_filename)
2060       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2061
2062     feedback_fn("* Verifying hypervisor parameters")
2063
2064     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2065                                                 self.all_inst_info.values()))
2066
2067     feedback_fn("* Verifying all nodes belong to an existing group")
2068
2069     # We do this verification here because, should this bogus circumstance
2070     # occur, it would never be caught by VerifyGroup, which only acts on
2071     # nodes/instances reachable from existing node groups.
2072
2073     dangling_nodes = set(node.name for node in self.all_node_info.values()
2074                          if node.group not in self.all_group_info)
2075
2076     dangling_instances = {}
2077     no_node_instances = []
2078
2079     for inst in self.all_inst_info.values():
2080       if inst.primary_node in dangling_nodes:
2081         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2082       elif inst.primary_node not in self.all_node_info:
2083         no_node_instances.append(inst.name)
2084
2085     pretty_dangling = [
2086         "%s (%s)" %
2087         (node.name,
2088          utils.CommaJoin(dangling_instances.get(node.name,
2089                                                 ["no instances"])))
2090         for node in dangling_nodes]
2091
2092     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2093                   None,
2094                   "the following nodes (and their instances) belong to a non"
2095                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2096
2097     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2098                   None,
2099                   "the following instances have a non-existing primary-node:"
2100                   " %s", utils.CommaJoin(no_node_instances))
2101
2102     return not self.bad
2103
2104
2105 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2106   """Verifies the status of a node group.
2107
2108   """
2109   HPATH = "cluster-verify"
2110   HTYPE = constants.HTYPE_CLUSTER
2111   REQ_BGL = False
2112
2113   _HOOKS_INDENT_RE = re.compile("^", re.M)
2114
2115   class NodeImage(object):
2116     """A class representing the logical and physical status of a node.
2117
2118     @type name: string
2119     @ivar name: the node name to which this object refers
2120     @ivar volumes: a structure as returned from
2121         L{ganeti.backend.GetVolumeList} (runtime)
2122     @ivar instances: a list of running instances (runtime)
2123     @ivar pinst: list of configured primary instances (config)
2124     @ivar sinst: list of configured secondary instances (config)
2125     @ivar sbp: dictionary of {primary-node: list of instances} for all
2126         instances for which this node is secondary (config)
2127     @ivar mfree: free memory, as reported by hypervisor (runtime)
2128     @ivar dfree: free disk, as reported by the node (runtime)
2129     @ivar offline: the offline status (config)
2130     @type rpc_fail: boolean
2131     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2132         not whether the individual keys were correct) (runtime)
2133     @type lvm_fail: boolean
2134     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2135     @type hyp_fail: boolean
2136     @ivar hyp_fail: whether the RPC call didn't return the instance list
2137     @type ghost: boolean
2138     @ivar ghost: whether this is a known node or not (config)
2139     @type os_fail: boolean
2140     @ivar os_fail: whether the RPC call didn't return valid OS data
2141     @type oslist: list
2142     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2143     @type vm_capable: boolean
2144     @ivar vm_capable: whether the node can host instances
2145
2146     """
2147     def __init__(self, offline=False, name=None, vm_capable=True):
2148       self.name = name
2149       self.volumes = {}
2150       self.instances = []
2151       self.pinst = []
2152       self.sinst = []
2153       self.sbp = {}
2154       self.mfree = 0
2155       self.dfree = 0
2156       self.offline = offline
2157       self.vm_capable = vm_capable
2158       self.rpc_fail = False
2159       self.lvm_fail = False
2160       self.hyp_fail = False
2161       self.ghost = False
2162       self.os_fail = False
2163       self.oslist = {}
2164
2165   def ExpandNames(self):
2166     # This raises errors.OpPrereqError on its own:
2167     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2168
2169     # Get instances in node group; this is unsafe and needs verification later
2170     inst_names = \
2171       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2172
2173     self.needed_locks = {
2174       locking.LEVEL_INSTANCE: inst_names,
2175       locking.LEVEL_NODEGROUP: [self.group_uuid],
2176       locking.LEVEL_NODE: [],
2177       }
2178
2179     self.share_locks = _ShareAll()
2180
2181   def DeclareLocks(self, level):
2182     if level == locking.LEVEL_NODE:
2183       # Get members of node group; this is unsafe and needs verification later
2184       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2185
2186       all_inst_info = self.cfg.GetAllInstancesInfo()
2187
2188       # In Exec(), we warn about mirrored instances that have primary and
2189       # secondary living in separate node groups. To fully verify that
2190       # volumes for these instances are healthy, we will need to do an
2191       # extra call to their secondaries. We ensure here those nodes will
2192       # be locked.
2193       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2194         # Important: access only the instances whose lock is owned
2195         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2196           nodes.update(all_inst_info[inst].secondary_nodes)
2197
2198       self.needed_locks[locking.LEVEL_NODE] = nodes
2199
2200   def CheckPrereq(self):
2201     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2202     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2203
2204     group_nodes = set(self.group_info.members)
2205     group_instances = \
2206       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2207
2208     unlocked_nodes = \
2209         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2210
2211     unlocked_instances = \
2212         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2213
2214     if unlocked_nodes:
2215       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2216                                  utils.CommaJoin(unlocked_nodes),
2217                                  errors.ECODE_STATE)
2218
2219     if unlocked_instances:
2220       raise errors.OpPrereqError("Missing lock for instances: %s" %
2221                                  utils.CommaJoin(unlocked_instances),
2222                                  errors.ECODE_STATE)
2223
2224     self.all_node_info = self.cfg.GetAllNodesInfo()
2225     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2226
2227     self.my_node_names = utils.NiceSort(group_nodes)
2228     self.my_inst_names = utils.NiceSort(group_instances)
2229
2230     self.my_node_info = dict((name, self.all_node_info[name])
2231                              for name in self.my_node_names)
2232
2233     self.my_inst_info = dict((name, self.all_inst_info[name])
2234                              for name in self.my_inst_names)
2235
2236     # We detect here the nodes that will need the extra RPC calls for verifying
2237     # split LV volumes; they should be locked.
2238     extra_lv_nodes = set()
2239
2240     for inst in self.my_inst_info.values():
2241       if inst.disk_template in constants.DTS_INT_MIRROR:
2242         for nname in inst.all_nodes:
2243           if self.all_node_info[nname].group != self.group_uuid:
2244             extra_lv_nodes.add(nname)
2245
2246     unlocked_lv_nodes = \
2247         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2248
2249     if unlocked_lv_nodes:
2250       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2251                                  utils.CommaJoin(unlocked_lv_nodes),
2252                                  errors.ECODE_STATE)
2253     self.extra_lv_nodes = list(extra_lv_nodes)
2254
2255   def _VerifyNode(self, ninfo, nresult):
2256     """Perform some basic validation on data returned from a node.
2257
2258       - check the result data structure is well formed and has all the
2259         mandatory fields
2260       - check ganeti version
2261
2262     @type ninfo: L{objects.Node}
2263     @param ninfo: the node to check
2264     @param nresult: the results from the node
2265     @rtype: boolean
2266     @return: whether overall this call was successful (and we can expect
2267          reasonable values in the respose)
2268
2269     """
2270     node = ninfo.name
2271     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2272
2273     # main result, nresult should be a non-empty dict
2274     test = not nresult or not isinstance(nresult, dict)
2275     _ErrorIf(test, constants.CV_ENODERPC, node,
2276                   "unable to verify node: no data returned")
2277     if test:
2278       return False
2279
2280     # compares ganeti version
2281     local_version = constants.PROTOCOL_VERSION
2282     remote_version = nresult.get("version", None)
2283     test = not (remote_version and
2284                 isinstance(remote_version, (list, tuple)) and
2285                 len(remote_version) == 2)
2286     _ErrorIf(test, constants.CV_ENODERPC, node,
2287              "connection to node returned invalid data")
2288     if test:
2289       return False
2290
2291     test = local_version != remote_version[0]
2292     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2293              "incompatible protocol versions: master %s,"
2294              " node %s", local_version, remote_version[0])
2295     if test:
2296       return False
2297
2298     # node seems compatible, we can actually try to look into its results
2299
2300     # full package version
2301     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2302                   constants.CV_ENODEVERSION, node,
2303                   "software version mismatch: master %s, node %s",
2304                   constants.RELEASE_VERSION, remote_version[1],
2305                   code=self.ETYPE_WARNING)
2306
2307     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2308     if ninfo.vm_capable and isinstance(hyp_result, dict):
2309       for hv_name, hv_result in hyp_result.iteritems():
2310         test = hv_result is not None
2311         _ErrorIf(test, constants.CV_ENODEHV, node,
2312                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2313
2314     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2315     if ninfo.vm_capable and isinstance(hvp_result, list):
2316       for item, hv_name, hv_result in hvp_result:
2317         _ErrorIf(True, constants.CV_ENODEHV, node,
2318                  "hypervisor %s parameter verify failure (source %s): %s",
2319                  hv_name, item, hv_result)
2320
2321     test = nresult.get(constants.NV_NODESETUP,
2322                        ["Missing NODESETUP results"])
2323     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2324              "; ".join(test))
2325
2326     return True
2327
2328   def _VerifyNodeTime(self, ninfo, nresult,
2329                       nvinfo_starttime, nvinfo_endtime):
2330     """Check the node time.
2331
2332     @type ninfo: L{objects.Node}
2333     @param ninfo: the node to check
2334     @param nresult: the remote results for the node
2335     @param nvinfo_starttime: the start time of the RPC call
2336     @param nvinfo_endtime: the end time of the RPC call
2337
2338     """
2339     node = ninfo.name
2340     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2341
2342     ntime = nresult.get(constants.NV_TIME, None)
2343     try:
2344       ntime_merged = utils.MergeTime(ntime)
2345     except (ValueError, TypeError):
2346       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2347       return
2348
2349     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2350       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2351     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2352       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2353     else:
2354       ntime_diff = None
2355
2356     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2357              "Node time diverges by at least %s from master node time",
2358              ntime_diff)
2359
2360   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2361     """Check the node LVM results.
2362
2363     @type ninfo: L{objects.Node}
2364     @param ninfo: the node to check
2365     @param nresult: the remote results for the node
2366     @param vg_name: the configured VG name
2367
2368     """
2369     if vg_name is None:
2370       return
2371
2372     node = ninfo.name
2373     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374
2375     # checks vg existence and size > 20G
2376     vglist = nresult.get(constants.NV_VGLIST, None)
2377     test = not vglist
2378     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2379     if not test:
2380       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2381                                             constants.MIN_VG_SIZE)
2382       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2383
2384     # check pv names
2385     pvlist = nresult.get(constants.NV_PVLIST, None)
2386     test = pvlist is None
2387     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2388     if not test:
2389       # check that ':' is not present in PV names, since it's a
2390       # special character for lvcreate (denotes the range of PEs to
2391       # use on the PV)
2392       for _, pvname, owner_vg in pvlist:
2393         test = ":" in pvname
2394         _ErrorIf(test, constants.CV_ENODELVM, node,
2395                  "Invalid character ':' in PV '%s' of VG '%s'",
2396                  pvname, owner_vg)
2397
2398   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2399     """Check the node bridges.
2400
2401     @type ninfo: L{objects.Node}
2402     @param ninfo: the node to check
2403     @param nresult: the remote results for the node
2404     @param bridges: the expected list of bridges
2405
2406     """
2407     if not bridges:
2408       return
2409
2410     node = ninfo.name
2411     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2412
2413     missing = nresult.get(constants.NV_BRIDGES, None)
2414     test = not isinstance(missing, list)
2415     _ErrorIf(test, constants.CV_ENODENET, node,
2416              "did not return valid bridge information")
2417     if not test:
2418       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2419                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2420
2421   def _VerifyNodeUserScripts(self, ninfo, nresult):
2422     """Check the results of user scripts presence and executability on the node
2423
2424     @type ninfo: L{objects.Node}
2425     @param ninfo: the node to check
2426     @param nresult: the remote results for the node
2427
2428     """
2429     node = ninfo.name
2430
2431     test = not constants.NV_USERSCRIPTS in nresult
2432     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2433                   "did not return user scripts information")
2434
2435     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2436     if not test:
2437       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2438                     "user scripts not present or not executable: %s" %
2439                     utils.CommaJoin(sorted(broken_scripts)))
2440
2441   def _VerifyNodeNetwork(self, ninfo, nresult):
2442     """Check the node network connectivity results.
2443
2444     @type ninfo: L{objects.Node}
2445     @param ninfo: the node to check
2446     @param nresult: the remote results for the node
2447
2448     """
2449     node = ninfo.name
2450     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2451
2452     test = constants.NV_NODELIST not in nresult
2453     _ErrorIf(test, constants.CV_ENODESSH, node,
2454              "node hasn't returned node ssh connectivity data")
2455     if not test:
2456       if nresult[constants.NV_NODELIST]:
2457         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2458           _ErrorIf(True, constants.CV_ENODESSH, node,
2459                    "ssh communication with node '%s': %s", a_node, a_msg)
2460
2461     test = constants.NV_NODENETTEST not in nresult
2462     _ErrorIf(test, constants.CV_ENODENET, node,
2463              "node hasn't returned node tcp connectivity data")
2464     if not test:
2465       if nresult[constants.NV_NODENETTEST]:
2466         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2467         for anode in nlist:
2468           _ErrorIf(True, constants.CV_ENODENET, node,
2469                    "tcp communication with node '%s': %s",
2470                    anode, nresult[constants.NV_NODENETTEST][anode])
2471
2472     test = constants.NV_MASTERIP not in nresult
2473     _ErrorIf(test, constants.CV_ENODENET, node,
2474              "node hasn't returned node master IP reachability data")
2475     if not test:
2476       if not nresult[constants.NV_MASTERIP]:
2477         if node == self.master_node:
2478           msg = "the master node cannot reach the master IP (not configured?)"
2479         else:
2480           msg = "cannot reach the master IP"
2481         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2482
2483   def _VerifyInstance(self, instance, instanceconfig, node_image,
2484                       diskstatus):
2485     """Verify an instance.
2486
2487     This function checks to see if the required block devices are
2488     available on the instance's node.
2489
2490     """
2491     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2492     node_current = instanceconfig.primary_node
2493
2494     node_vol_should = {}
2495     instanceconfig.MapLVsByNode(node_vol_should)
2496
2497     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2498     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2499     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2500
2501     for node in node_vol_should:
2502       n_img = node_image[node]
2503       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2504         # ignore missing volumes on offline or broken nodes
2505         continue
2506       for volume in node_vol_should[node]:
2507         test = volume not in n_img.volumes
2508         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2509                  "volume %s missing on node %s", volume, node)
2510
2511     if instanceconfig.admin_state == constants.ADMINST_UP:
2512       pri_img = node_image[node_current]
2513       test = instance not in pri_img.instances and not pri_img.offline
2514       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2515                "instance not running on its primary node %s",
2516                node_current)
2517
2518     diskdata = [(nname, success, status, idx)
2519                 for (nname, disks) in diskstatus.items()
2520                 for idx, (success, status) in enumerate(disks)]
2521
2522     for nname, success, bdev_status, idx in diskdata:
2523       # the 'ghost node' construction in Exec() ensures that we have a
2524       # node here
2525       snode = node_image[nname]
2526       bad_snode = snode.ghost or snode.offline
2527       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2528                not success and not bad_snode,
2529                constants.CV_EINSTANCEFAULTYDISK, instance,
2530                "couldn't retrieve status for disk/%s on %s: %s",
2531                idx, nname, bdev_status)
2532       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2533                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2534                constants.CV_EINSTANCEFAULTYDISK, instance,
2535                "disk/%s on %s is faulty", idx, nname)
2536
2537   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2538     """Verify if there are any unknown volumes in the cluster.
2539
2540     The .os, .swap and backup volumes are ignored. All other volumes are
2541     reported as unknown.
2542
2543     @type reserved: L{ganeti.utils.FieldSet}
2544     @param reserved: a FieldSet of reserved volume names
2545
2546     """
2547     for node, n_img in node_image.items():
2548       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2549           self.all_node_info[node].group != self.group_uuid):
2550         # skip non-healthy nodes
2551         continue
2552       for volume in n_img.volumes:
2553         test = ((node not in node_vol_should or
2554                 volume not in node_vol_should[node]) and
2555                 not reserved.Matches(volume))
2556         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2557                       "volume %s is unknown", volume)
2558
2559   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2560     """Verify N+1 Memory Resilience.
2561
2562     Check that if one single node dies we can still start all the
2563     instances it was primary for.
2564
2565     """
2566     cluster_info = self.cfg.GetClusterInfo()
2567     for node, n_img in node_image.items():
2568       # This code checks that every node which is now listed as
2569       # secondary has enough memory to host all instances it is
2570       # supposed to should a single other node in the cluster fail.
2571       # FIXME: not ready for failover to an arbitrary node
2572       # FIXME: does not support file-backed instances
2573       # WARNING: we currently take into account down instances as well
2574       # as up ones, considering that even if they're down someone
2575       # might want to start them even in the event of a node failure.
2576       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2577         # we're skipping nodes marked offline and nodes in other groups from
2578         # the N+1 warning, since most likely we don't have good memory
2579         # infromation from them; we already list instances living on such
2580         # nodes, and that's enough warning
2581         continue
2582       #TODO(dynmem): also consider ballooning out other instances
2583       for prinode, instances in n_img.sbp.items():
2584         needed_mem = 0
2585         for instance in instances:
2586           bep = cluster_info.FillBE(instance_cfg[instance])
2587           if bep[constants.BE_AUTO_BALANCE]:
2588             needed_mem += bep[constants.BE_MINMEM]
2589         test = n_img.mfree < needed_mem
2590         self._ErrorIf(test, constants.CV_ENODEN1, node,
2591                       "not enough memory to accomodate instance failovers"
2592                       " should node %s fail (%dMiB needed, %dMiB available)",
2593                       prinode, needed_mem, n_img.mfree)
2594
2595   @classmethod
2596   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2597                    (files_all, files_opt, files_mc, files_vm)):
2598     """Verifies file checksums collected from all nodes.
2599
2600     @param errorif: Callback for reporting errors
2601     @param nodeinfo: List of L{objects.Node} objects
2602     @param master_node: Name of master node
2603     @param all_nvinfo: RPC results
2604
2605     """
2606     # Define functions determining which nodes to consider for a file
2607     files2nodefn = [
2608       (files_all, None),
2609       (files_mc, lambda node: (node.master_candidate or
2610                                node.name == master_node)),
2611       (files_vm, lambda node: node.vm_capable),
2612       ]
2613
2614     # Build mapping from filename to list of nodes which should have the file
2615     nodefiles = {}
2616     for (files, fn) in files2nodefn:
2617       if fn is None:
2618         filenodes = nodeinfo
2619       else:
2620         filenodes = filter(fn, nodeinfo)
2621       nodefiles.update((filename,
2622                         frozenset(map(operator.attrgetter("name"), filenodes)))
2623                        for filename in files)
2624
2625     assert set(nodefiles) == (files_all | files_mc | files_vm)
2626
2627     fileinfo = dict((filename, {}) for filename in nodefiles)
2628     ignore_nodes = set()
2629
2630     for node in nodeinfo:
2631       if node.offline:
2632         ignore_nodes.add(node.name)
2633         continue
2634
2635       nresult = all_nvinfo[node.name]
2636
2637       if nresult.fail_msg or not nresult.payload:
2638         node_files = None
2639       else:
2640         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2641
2642       test = not (node_files and isinstance(node_files, dict))
2643       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2644               "Node did not return file checksum data")
2645       if test:
2646         ignore_nodes.add(node.name)
2647         continue
2648
2649       # Build per-checksum mapping from filename to nodes having it
2650       for (filename, checksum) in node_files.items():
2651         assert filename in nodefiles
2652         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2653
2654     for (filename, checksums) in fileinfo.items():
2655       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2656
2657       # Nodes having the file
2658       with_file = frozenset(node_name
2659                             for nodes in fileinfo[filename].values()
2660                             for node_name in nodes) - ignore_nodes
2661
2662       expected_nodes = nodefiles[filename] - ignore_nodes
2663
2664       # Nodes missing file
2665       missing_file = expected_nodes - with_file
2666
2667       if filename in files_opt:
2668         # All or no nodes
2669         errorif(missing_file and missing_file != expected_nodes,
2670                 constants.CV_ECLUSTERFILECHECK, None,
2671                 "File %s is optional, but it must exist on all or no"
2672                 " nodes (not found on %s)",
2673                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2674       else:
2675         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2676                 "File %s is missing from node(s) %s", filename,
2677                 utils.CommaJoin(utils.NiceSort(missing_file)))
2678
2679         # Warn if a node has a file it shouldn't
2680         unexpected = with_file - expected_nodes
2681         errorif(unexpected,
2682                 constants.CV_ECLUSTERFILECHECK, None,
2683                 "File %s should not exist on node(s) %s",
2684                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2685
2686       # See if there are multiple versions of the file
2687       test = len(checksums) > 1
2688       if test:
2689         variants = ["variant %s on %s" %
2690                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2691                     for (idx, (checksum, nodes)) in
2692                       enumerate(sorted(checksums.items()))]
2693       else:
2694         variants = []
2695
2696       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2697               "File %s found with %s different checksums (%s)",
2698               filename, len(checksums), "; ".join(variants))
2699
2700   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2701                       drbd_map):
2702     """Verifies and the node DRBD status.
2703
2704     @type ninfo: L{objects.Node}
2705     @param ninfo: the node to check
2706     @param nresult: the remote results for the node
2707     @param instanceinfo: the dict of instances
2708     @param drbd_helper: the configured DRBD usermode helper
2709     @param drbd_map: the DRBD map as returned by
2710         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2711
2712     """
2713     node = ninfo.name
2714     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2715
2716     if drbd_helper:
2717       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2718       test = (helper_result == None)
2719       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2720                "no drbd usermode helper returned")
2721       if helper_result:
2722         status, payload = helper_result
2723         test = not status
2724         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2725                  "drbd usermode helper check unsuccessful: %s", payload)
2726         test = status and (payload != drbd_helper)
2727         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2728                  "wrong drbd usermode helper: %s", payload)
2729
2730     # compute the DRBD minors
2731     node_drbd = {}
2732     for minor, instance in drbd_map[node].items():
2733       test = instance not in instanceinfo
2734       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2735                "ghost instance '%s' in temporary DRBD map", instance)
2736         # ghost instance should not be running, but otherwise we
2737         # don't give double warnings (both ghost instance and
2738         # unallocated minor in use)
2739       if test:
2740         node_drbd[minor] = (instance, False)
2741       else:
2742         instance = instanceinfo[instance]
2743         node_drbd[minor] = (instance.name,
2744                             instance.admin_state == constants.ADMINST_UP)
2745
2746     # and now check them
2747     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2748     test = not isinstance(used_minors, (tuple, list))
2749     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2750              "cannot parse drbd status file: %s", str(used_minors))
2751     if test:
2752       # we cannot check drbd status
2753       return
2754
2755     for minor, (iname, must_exist) in node_drbd.items():
2756       test = minor not in used_minors and must_exist
2757       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2758                "drbd minor %d of instance %s is not active", minor, iname)
2759     for minor in used_minors:
2760       test = minor not in node_drbd
2761       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2762                "unallocated drbd minor %d is in use", minor)
2763
2764   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2765     """Builds the node OS structures.
2766
2767     @type ninfo: L{objects.Node}
2768     @param ninfo: the node to check
2769     @param nresult: the remote results for the node
2770     @param nimg: the node image object
2771
2772     """
2773     node = ninfo.name
2774     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2775
2776     remote_os = nresult.get(constants.NV_OSLIST, None)
2777     test = (not isinstance(remote_os, list) or
2778             not compat.all(isinstance(v, list) and len(v) == 7
2779                            for v in remote_os))
2780
2781     _ErrorIf(test, constants.CV_ENODEOS, node,
2782              "node hasn't returned valid OS data")
2783
2784     nimg.os_fail = test
2785
2786     if test:
2787       return
2788
2789     os_dict = {}
2790
2791     for (name, os_path, status, diagnose,
2792          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2793
2794       if name not in os_dict:
2795         os_dict[name] = []
2796
2797       # parameters is a list of lists instead of list of tuples due to
2798       # JSON lacking a real tuple type, fix it:
2799       parameters = [tuple(v) for v in parameters]
2800       os_dict[name].append((os_path, status, diagnose,
2801                             set(variants), set(parameters), set(api_ver)))
2802
2803     nimg.oslist = os_dict
2804
2805   def _VerifyNodeOS(self, ninfo, nimg, base):
2806     """Verifies the node OS list.
2807
2808     @type ninfo: L{objects.Node}
2809     @param ninfo: the node to check
2810     @param nimg: the node image object
2811     @param base: the 'template' node we match against (e.g. from the master)
2812
2813     """
2814     node = ninfo.name
2815     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2816
2817     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2818
2819     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2820     for os_name, os_data in nimg.oslist.items():
2821       assert os_data, "Empty OS status for OS %s?!" % os_name
2822       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2823       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2824                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2825       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2826                "OS '%s' has multiple entries (first one shadows the rest): %s",
2827                os_name, utils.CommaJoin([v[0] for v in os_data]))
2828       # comparisons with the 'base' image
2829       test = os_name not in base.oslist
2830       _ErrorIf(test, constants.CV_ENODEOS, node,
2831                "Extra OS %s not present on reference node (%s)",
2832                os_name, base.name)
2833       if test:
2834         continue
2835       assert base.oslist[os_name], "Base node has empty OS status?"
2836       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2837       if not b_status:
2838         # base OS is invalid, skipping
2839         continue
2840       for kind, a, b in [("API version", f_api, b_api),
2841                          ("variants list", f_var, b_var),
2842                          ("parameters", beautify_params(f_param),
2843                           beautify_params(b_param))]:
2844         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2845                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2846                  kind, os_name, base.name,
2847                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2848
2849     # check any missing OSes
2850     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2851     _ErrorIf(missing, constants.CV_ENODEOS, node,
2852              "OSes present on reference node %s but missing on this node: %s",
2853              base.name, utils.CommaJoin(missing))
2854
2855   def _VerifyOob(self, ninfo, nresult):
2856     """Verifies out of band functionality of a node.
2857
2858     @type ninfo: L{objects.Node}
2859     @param ninfo: the node to check
2860     @param nresult: the remote results for the node
2861
2862     """
2863     node = ninfo.name
2864     # We just have to verify the paths on master and/or master candidates
2865     # as the oob helper is invoked on the master
2866     if ((ninfo.master_candidate or ninfo.master_capable) and
2867         constants.NV_OOB_PATHS in nresult):
2868       for path_result in nresult[constants.NV_OOB_PATHS]:
2869         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2870
2871   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2872     """Verifies and updates the node volume data.
2873
2874     This function will update a L{NodeImage}'s internal structures
2875     with data from the remote call.
2876
2877     @type ninfo: L{objects.Node}
2878     @param ninfo: the node to check
2879     @param nresult: the remote results for the node
2880     @param nimg: the node image object
2881     @param vg_name: the configured VG name
2882
2883     """
2884     node = ninfo.name
2885     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2886
2887     nimg.lvm_fail = True
2888     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2889     if vg_name is None:
2890       pass
2891     elif isinstance(lvdata, basestring):
2892       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2893                utils.SafeEncode(lvdata))
2894     elif not isinstance(lvdata, dict):
2895       _ErrorIf(True, constants.CV_ENODELVM, node,
2896                "rpc call to node failed (lvlist)")
2897     else:
2898       nimg.volumes = lvdata
2899       nimg.lvm_fail = False
2900
2901   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2902     """Verifies and updates the node instance list.
2903
2904     If the listing was successful, then updates this node's instance
2905     list. Otherwise, it marks the RPC call as failed for the instance
2906     list key.
2907
2908     @type ninfo: L{objects.Node}
2909     @param ninfo: the node to check
2910     @param nresult: the remote results for the node
2911     @param nimg: the node image object
2912
2913     """
2914     idata = nresult.get(constants.NV_INSTANCELIST, None)
2915     test = not isinstance(idata, list)
2916     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2917                   "rpc call to node failed (instancelist): %s",
2918                   utils.SafeEncode(str(idata)))
2919     if test:
2920       nimg.hyp_fail = True
2921     else:
2922       nimg.instances = idata
2923
2924   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2925     """Verifies and computes a node information map
2926
2927     @type ninfo: L{objects.Node}
2928     @param ninfo: the node to check
2929     @param nresult: the remote results for the node
2930     @param nimg: the node image object
2931     @param vg_name: the configured VG name
2932
2933     """
2934     node = ninfo.name
2935     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2936
2937     # try to read free memory (from the hypervisor)
2938     hv_info = nresult.get(constants.NV_HVINFO, None)
2939     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2940     _ErrorIf(test, constants.CV_ENODEHV, node,
2941              "rpc call to node failed (hvinfo)")
2942     if not test:
2943       try:
2944         nimg.mfree = int(hv_info["memory_free"])
2945       except (ValueError, TypeError):
2946         _ErrorIf(True, constants.CV_ENODERPC, node,
2947                  "node returned invalid nodeinfo, check hypervisor")
2948
2949     # FIXME: devise a free space model for file based instances as well
2950     if vg_name is not None:
2951       test = (constants.NV_VGLIST not in nresult or
2952               vg_name not in nresult[constants.NV_VGLIST])
2953       _ErrorIf(test, constants.CV_ENODELVM, node,
2954                "node didn't return data for the volume group '%s'"
2955                " - it is either missing or broken", vg_name)
2956       if not test:
2957         try:
2958           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2959         except (ValueError, TypeError):
2960           _ErrorIf(True, constants.CV_ENODERPC, node,
2961                    "node returned invalid LVM info, check LVM status")
2962
2963   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2964     """Gets per-disk status information for all instances.
2965
2966     @type nodelist: list of strings
2967     @param nodelist: Node names
2968     @type node_image: dict of (name, L{objects.Node})
2969     @param node_image: Node objects
2970     @type instanceinfo: dict of (name, L{objects.Instance})
2971     @param instanceinfo: Instance objects
2972     @rtype: {instance: {node: [(succes, payload)]}}
2973     @return: a dictionary of per-instance dictionaries with nodes as
2974         keys and disk information as values; the disk information is a
2975         list of tuples (success, payload)
2976
2977     """
2978     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2979
2980     node_disks = {}
2981     node_disks_devonly = {}
2982     diskless_instances = set()
2983     diskless = constants.DT_DISKLESS
2984
2985     for nname in nodelist:
2986       node_instances = list(itertools.chain(node_image[nname].pinst,
2987                                             node_image[nname].sinst))
2988       diskless_instances.update(inst for inst in node_instances
2989                                 if instanceinfo[inst].disk_template == diskless)
2990       disks = [(inst, disk)
2991                for inst in node_instances
2992                for disk in instanceinfo[inst].disks]
2993
2994       if not disks:
2995         # No need to collect data
2996         continue
2997
2998       node_disks[nname] = disks
2999
3000       # _AnnotateDiskParams makes already copies of the disks
3001       devonly = []
3002       for (inst, dev) in disks:
3003         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3004         self.cfg.SetDiskID(anno_disk, nname)
3005         devonly.append(anno_disk)
3006
3007       node_disks_devonly[nname] = devonly
3008
3009     assert len(node_disks) == len(node_disks_devonly)
3010
3011     # Collect data from all nodes with disks
3012     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3013                                                           node_disks_devonly)
3014
3015     assert len(result) == len(node_disks)
3016
3017     instdisk = {}
3018
3019     for (nname, nres) in result.items():
3020       disks = node_disks[nname]
3021
3022       if nres.offline:
3023         # No data from this node
3024         data = len(disks) * [(False, "node offline")]
3025       else:
3026         msg = nres.fail_msg
3027         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3028                  "while getting disk information: %s", msg)
3029         if msg:
3030           # No data from this node
3031           data = len(disks) * [(False, msg)]
3032         else:
3033           data = []
3034           for idx, i in enumerate(nres.payload):
3035             if isinstance(i, (tuple, list)) and len(i) == 2:
3036               data.append(i)
3037             else:
3038               logging.warning("Invalid result from node %s, entry %d: %s",
3039                               nname, idx, i)
3040               data.append((False, "Invalid result from the remote node"))
3041
3042       for ((inst, _), status) in zip(disks, data):
3043         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3044
3045     # Add empty entries for diskless instances.
3046     for inst in diskless_instances:
3047       assert inst not in instdisk
3048       instdisk[inst] = {}
3049
3050     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3051                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3052                       compat.all(isinstance(s, (tuple, list)) and
3053                                  len(s) == 2 for s in statuses)
3054                       for inst, nnames in instdisk.items()
3055                       for nname, statuses in nnames.items())
3056     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3057
3058     return instdisk
3059
3060   @staticmethod
3061   def _SshNodeSelector(group_uuid, all_nodes):
3062     """Create endless iterators for all potential SSH check hosts.
3063
3064     """
3065     nodes = [node for node in all_nodes
3066              if (node.group != group_uuid and
3067                  not node.offline)]
3068     keyfunc = operator.attrgetter("group")
3069
3070     return map(itertools.cycle,
3071                [sorted(map(operator.attrgetter("name"), names))
3072                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3073                                                   keyfunc)])
3074
3075   @classmethod
3076   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3077     """Choose which nodes should talk to which other nodes.
3078
3079     We will make nodes contact all nodes in their group, and one node from
3080     every other group.
3081
3082     @warning: This algorithm has a known issue if one node group is much
3083       smaller than others (e.g. just one node). In such a case all other
3084       nodes will talk to the single node.
3085
3086     """
3087     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3088     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3089
3090     return (online_nodes,
3091             dict((name, sorted([i.next() for i in sel]))
3092                  for name in online_nodes))
3093
3094   def BuildHooksEnv(self):
3095     """Build hooks env.
3096
3097     Cluster-Verify hooks just ran in the post phase and their failure makes
3098     the output be logged in the verify output and the verification to fail.
3099
3100     """
3101     env = {
3102       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3103       }
3104
3105     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3106                for node in self.my_node_info.values())
3107
3108     return env
3109
3110   def BuildHooksNodes(self):
3111     """Build hooks nodes.
3112
3113     """
3114     return ([], self.my_node_names)
3115
3116   def Exec(self, feedback_fn):
3117     """Verify integrity of the node group, performing various test on nodes.
3118
3119     """
3120     # This method has too many local variables. pylint: disable=R0914
3121     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3122
3123     if not self.my_node_names:
3124       # empty node group
3125       feedback_fn("* Empty node group, skipping verification")
3126       return True
3127
3128     self.bad = False
3129     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3130     verbose = self.op.verbose
3131     self._feedback_fn = feedback_fn
3132
3133     vg_name = self.cfg.GetVGName()
3134     drbd_helper = self.cfg.GetDRBDHelper()
3135     cluster = self.cfg.GetClusterInfo()
3136     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3137     hypervisors = cluster.enabled_hypervisors
3138     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3139
3140     i_non_redundant = [] # Non redundant instances
3141     i_non_a_balanced = [] # Non auto-balanced instances
3142     i_offline = 0 # Count of offline instances
3143     n_offline = 0 # Count of offline nodes
3144     n_drained = 0 # Count of nodes being drained
3145     node_vol_should = {}
3146
3147     # FIXME: verify OS list
3148
3149     # File verification
3150     filemap = _ComputeAncillaryFiles(cluster, False)
3151
3152     # do local checksums
3153     master_node = self.master_node = self.cfg.GetMasterNode()
3154     master_ip = self.cfg.GetMasterIP()
3155
3156     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3157
3158     user_scripts = []
3159     if self.cfg.GetUseExternalMipScript():
3160       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3161
3162     node_verify_param = {
3163       constants.NV_FILELIST:
3164         utils.UniqueSequence(filename
3165                              for files in filemap
3166                              for filename in files),
3167       constants.NV_NODELIST:
3168         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3169                                   self.all_node_info.values()),
3170       constants.NV_HYPERVISOR: hypervisors,
3171       constants.NV_HVPARAMS:
3172         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3173       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3174                                  for node in node_data_list
3175                                  if not node.offline],
3176       constants.NV_INSTANCELIST: hypervisors,
3177       constants.NV_VERSION: None,
3178       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3179       constants.NV_NODESETUP: None,
3180       constants.NV_TIME: None,
3181       constants.NV_MASTERIP: (master_node, master_ip),
3182       constants.NV_OSLIST: None,
3183       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3184       constants.NV_USERSCRIPTS: user_scripts,
3185       }
3186
3187     if vg_name is not None:
3188       node_verify_param[constants.NV_VGLIST] = None
3189       node_verify_param[constants.NV_LVLIST] = vg_name
3190       node_verify_param[constants.NV_PVLIST] = [vg_name]
3191       node_verify_param[constants.NV_DRBDLIST] = None
3192
3193     if drbd_helper:
3194       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3195
3196     # bridge checks
3197     # FIXME: this needs to be changed per node-group, not cluster-wide
3198     bridges = set()
3199     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3200     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3201       bridges.add(default_nicpp[constants.NIC_LINK])
3202     for instance in self.my_inst_info.values():
3203       for nic in instance.nics:
3204         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3205         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3206           bridges.add(full_nic[constants.NIC_LINK])
3207
3208     if bridges:
3209       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3210
3211     # Build our expected cluster state
3212     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3213                                                  name=node.name,
3214                                                  vm_capable=node.vm_capable))
3215                       for node in node_data_list)
3216
3217     # Gather OOB paths
3218     oob_paths = []
3219     for node in self.all_node_info.values():
3220       path = _SupportsOob(self.cfg, node)
3221       if path and path not in oob_paths:
3222         oob_paths.append(path)
3223
3224     if oob_paths:
3225       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3226
3227     for instance in self.my_inst_names:
3228       inst_config = self.my_inst_info[instance]
3229       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3230         i_offline += 1
3231
3232       for nname in inst_config.all_nodes:
3233         if nname not in node_image:
3234           gnode = self.NodeImage(name=nname)
3235           gnode.ghost = (nname not in self.all_node_info)
3236           node_image[nname] = gnode
3237
3238       inst_config.MapLVsByNode(node_vol_should)
3239
3240       pnode = inst_config.primary_node
3241       node_image[pnode].pinst.append(instance)
3242
3243       for snode in inst_config.secondary_nodes:
3244         nimg = node_image[snode]
3245         nimg.sinst.append(instance)
3246         if pnode not in nimg.sbp:
3247           nimg.sbp[pnode] = []
3248         nimg.sbp[pnode].append(instance)
3249
3250     # At this point, we have the in-memory data structures complete,
3251     # except for the runtime information, which we'll gather next
3252
3253     # Due to the way our RPC system works, exact response times cannot be
3254     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3255     # time before and after executing the request, we can at least have a time
3256     # window.
3257     nvinfo_starttime = time.time()
3258     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3259                                            node_verify_param,
3260                                            self.cfg.GetClusterName())
3261     nvinfo_endtime = time.time()
3262
3263     if self.extra_lv_nodes and vg_name is not None:
3264       extra_lv_nvinfo = \
3265           self.rpc.call_node_verify(self.extra_lv_nodes,
3266                                     {constants.NV_LVLIST: vg_name},
3267                                     self.cfg.GetClusterName())
3268     else:
3269       extra_lv_nvinfo = {}
3270
3271     all_drbd_map = self.cfg.ComputeDRBDMap()
3272
3273     feedback_fn("* Gathering disk information (%s nodes)" %
3274                 len(self.my_node_names))
3275     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3276                                      self.my_inst_info)
3277
3278     feedback_fn("* Verifying configuration file consistency")
3279
3280     # If not all nodes are being checked, we need to make sure the master node
3281     # and a non-checked vm_capable node are in the list.
3282     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3283     if absent_nodes:
3284       vf_nvinfo = all_nvinfo.copy()
3285       vf_node_info = list(self.my_node_info.values())
3286       additional_nodes = []
3287       if master_node not in self.my_node_info:
3288         additional_nodes.append(master_node)
3289         vf_node_info.append(self.all_node_info[master_node])
3290       # Add the first vm_capable node we find which is not included,
3291       # excluding the master node (which we already have)
3292       for node in absent_nodes:
3293         nodeinfo = self.all_node_info[node]
3294         if (nodeinfo.vm_capable and not nodeinfo.offline and
3295             node != master_node):
3296           additional_nodes.append(node)
3297           vf_node_info.append(self.all_node_info[node])
3298           break
3299       key = constants.NV_FILELIST
3300       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3301                                                  {key: node_verify_param[key]},
3302                                                  self.cfg.GetClusterName()))
3303     else:
3304       vf_nvinfo = all_nvinfo
3305       vf_node_info = self.my_node_info.values()
3306
3307     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3308
3309     feedback_fn("* Verifying node status")
3310
3311     refos_img = None
3312
3313     for node_i in node_data_list:
3314       node = node_i.name
3315       nimg = node_image[node]
3316
3317       if node_i.offline:
3318         if verbose:
3319           feedback_fn("* Skipping offline node %s" % (node,))
3320         n_offline += 1
3321         continue
3322
3323       if node == master_node:
3324         ntype = "master"
3325       elif node_i.master_candidate:
3326         ntype = "master candidate"
3327       elif node_i.drained:
3328         ntype = "drained"
3329         n_drained += 1
3330       else:
3331         ntype = "regular"
3332       if verbose:
3333         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3334
3335       msg = all_nvinfo[node].fail_msg
3336       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3337                msg)
3338       if msg:
3339         nimg.rpc_fail = True
3340         continue
3341
3342       nresult = all_nvinfo[node].payload
3343
3344       nimg.call_ok = self._VerifyNode(node_i, nresult)
3345       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3346       self._VerifyNodeNetwork(node_i, nresult)
3347       self._VerifyNodeUserScripts(node_i, nresult)
3348       self._VerifyOob(node_i, nresult)
3349
3350       if nimg.vm_capable:
3351         self._VerifyNodeLVM(node_i, nresult, vg_name)
3352         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3353                              all_drbd_map)
3354
3355         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3356         self._UpdateNodeInstances(node_i, nresult, nimg)
3357         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3358         self._UpdateNodeOS(node_i, nresult, nimg)
3359
3360         if not nimg.os_fail:
3361           if refos_img is None:
3362             refos_img = nimg
3363           self._VerifyNodeOS(node_i, nimg, refos_img)
3364         self._VerifyNodeBridges(node_i, nresult, bridges)
3365
3366         # Check whether all running instancies are primary for the node. (This
3367         # can no longer be done from _VerifyInstance below, since some of the
3368         # wrong instances could be from other node groups.)
3369         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3370
3371         for inst in non_primary_inst:
3372           test = inst in self.all_inst_info
3373           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3374                    "instance should not run on node %s", node_i.name)
3375           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3376                    "node is running unknown instance %s", inst)
3377
3378     for node, result in extra_lv_nvinfo.items():
3379       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3380                               node_image[node], vg_name)
3381
3382     feedback_fn("* Verifying instance status")
3383     for instance in self.my_inst_names:
3384       if verbose:
3385         feedback_fn("* Verifying instance %s" % instance)
3386       inst_config = self.my_inst_info[instance]
3387       self._VerifyInstance(instance, inst_config, node_image,
3388                            instdisk[instance])
3389       inst_nodes_offline = []
3390
3391       pnode = inst_config.primary_node
3392       pnode_img = node_image[pnode]
3393       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3394                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3395                " primary node failed", instance)
3396
3397       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3398                pnode_img.offline,
3399                constants.CV_EINSTANCEBADNODE, instance,
3400                "instance is marked as running and lives on offline node %s",
3401                inst_config.primary_node)
3402
3403       # If the instance is non-redundant we cannot survive losing its primary
3404       # node, so we are not N+1 compliant. On the other hand we have no disk
3405       # templates with more than one secondary so that situation is not well
3406       # supported either.
3407       # FIXME: does not support file-backed instances
3408       if not inst_config.secondary_nodes:
3409         i_non_redundant.append(instance)
3410
3411       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3412                constants.CV_EINSTANCELAYOUT,
3413                instance, "instance has multiple secondary nodes: %s",
3414                utils.CommaJoin(inst_config.secondary_nodes),
3415                code=self.ETYPE_WARNING)
3416
3417       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3418         pnode = inst_config.primary_node
3419         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3420         instance_groups = {}
3421
3422         for node in instance_nodes:
3423           instance_groups.setdefault(self.all_node_info[node].group,
3424                                      []).append(node)
3425
3426         pretty_list = [
3427           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3428           # Sort so that we always list the primary node first.
3429           for group, nodes in sorted(instance_groups.items(),
3430                                      key=lambda (_, nodes): pnode in nodes,
3431                                      reverse=True)]
3432
3433         self._ErrorIf(len(instance_groups) > 1,
3434                       constants.CV_EINSTANCESPLITGROUPS,
3435                       instance, "instance has primary and secondary nodes in"
3436                       " different groups: %s", utils.CommaJoin(pretty_list),
3437                       code=self.ETYPE_WARNING)
3438
3439       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3440         i_non_a_balanced.append(instance)
3441
3442       for snode in inst_config.secondary_nodes:
3443         s_img = node_image[snode]
3444         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3445                  snode, "instance %s, connection to secondary node failed",
3446                  instance)
3447
3448         if s_img.offline:
3449           inst_nodes_offline.append(snode)
3450
3451       # warn that the instance lives on offline nodes
3452       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3453                "instance has offline secondary node(s) %s",
3454                utils.CommaJoin(inst_nodes_offline))
3455       # ... or ghost/non-vm_capable nodes
3456       for node in inst_config.all_nodes:
3457         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3458                  instance, "instance lives on ghost node %s", node)
3459         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3460                  instance, "instance lives on non-vm_capable node %s", node)
3461
3462     feedback_fn("* Verifying orphan volumes")
3463     reserved = utils.FieldSet(*cluster.reserved_lvs)
3464
3465     # We will get spurious "unknown volume" warnings if any node of this group
3466     # is secondary for an instance whose primary is in another group. To avoid
3467     # them, we find these instances and add their volumes to node_vol_should.
3468     for inst in self.all_inst_info.values():
3469       for secondary in inst.secondary_nodes:
3470         if (secondary in self.my_node_info
3471             and inst.name not in self.my_inst_info):
3472           inst.MapLVsByNode(node_vol_should)
3473           break
3474
3475     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3476
3477     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3478       feedback_fn("* Verifying N+1 Memory redundancy")
3479       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3480
3481     feedback_fn("* Other Notes")
3482     if i_non_redundant:
3483       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3484                   % len(i_non_redundant))
3485
3486     if i_non_a_balanced:
3487       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3488                   % len(i_non_a_balanced))
3489
3490     if i_offline:
3491       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3492
3493     if n_offline:
3494       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3495
3496     if n_drained:
3497       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3498
3499     return not self.bad
3500
3501   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3502     """Analyze the post-hooks' result
3503
3504     This method analyses the hook result, handles it, and sends some
3505     nicely-formatted feedback back to the user.
3506
3507     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3508         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3509     @param hooks_results: the results of the multi-node hooks rpc call
3510     @param feedback_fn: function used send feedback back to the caller
3511     @param lu_result: previous Exec result
3512     @return: the new Exec result, based on the previous result
3513         and hook results
3514
3515     """
3516     # We only really run POST phase hooks, only for non-empty groups,
3517     # and are only interested in their results
3518     if not self.my_node_names:
3519       # empty node group
3520       pass
3521     elif phase == constants.HOOKS_PHASE_POST:
3522       # Used to change hooks' output to proper indentation
3523       feedback_fn("* Hooks Results")
3524       assert hooks_results, "invalid result from hooks"
3525
3526       for node_name in hooks_results:
3527         res = hooks_results[node_name]
3528         msg = res.fail_msg
3529         test = msg and not res.offline
3530         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3531                       "Communication failure in hooks execution: %s", msg)
3532         if res.offline or msg:
3533           # No need to investigate payload if node is offline or gave
3534           # an error.
3535           continue
3536         for script, hkr, output in res.payload:
3537           test = hkr == constants.HKR_FAIL
3538           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3539                         "Script %s failed, output:", script)
3540           if test:
3541             output = self._HOOKS_INDENT_RE.sub("      ", output)
3542             feedback_fn("%s" % output)
3543             lu_result = False
3544
3545     return lu_result
3546
3547
3548 class LUClusterVerifyDisks(NoHooksLU):
3549   """Verifies the cluster disks status.
3550
3551   """
3552   REQ_BGL = False
3553
3554   def ExpandNames(self):
3555     self.share_locks = _ShareAll()
3556     self.needed_locks = {
3557       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3558       }
3559
3560   def Exec(self, feedback_fn):
3561     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3562
3563     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3564     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3565                            for group in group_names])
3566
3567
3568 class LUGroupVerifyDisks(NoHooksLU):
3569   """Verifies the status of all disks in a node group.
3570
3571   """
3572   REQ_BGL = False
3573
3574   def ExpandNames(self):
3575     # Raises errors.OpPrereqError on its own if group can't be found
3576     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3577
3578     self.share_locks = _ShareAll()
3579     self.needed_locks = {
3580       locking.LEVEL_INSTANCE: [],
3581       locking.LEVEL_NODEGROUP: [],
3582       locking.LEVEL_NODE: [],
3583       }
3584
3585   def DeclareLocks(self, level):
3586     if level == locking.LEVEL_INSTANCE:
3587       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3588
3589       # Lock instances optimistically, needs verification once node and group
3590       # locks have been acquired
3591       self.needed_locks[locking.LEVEL_INSTANCE] = \
3592         self.cfg.GetNodeGroupInstances(self.group_uuid)
3593
3594     elif level == locking.LEVEL_NODEGROUP:
3595       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3596
3597       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3598         set([self.group_uuid] +
3599             # Lock all groups used by instances optimistically; this requires
3600             # going via the node before it's locked, requiring verification
3601             # later on
3602             [group_uuid
3603              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3604              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3605
3606     elif level == locking.LEVEL_NODE:
3607       # This will only lock the nodes in the group to be verified which contain
3608       # actual instances
3609       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3610       self._LockInstancesNodes()
3611
3612       # Lock all nodes in group to be verified
3613       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3614       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3615       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3616
3617   def CheckPrereq(self):
3618     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3619     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3620     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3621
3622     assert self.group_uuid in owned_groups
3623
3624     # Check if locked instances are still correct
3625     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3626
3627     # Get instance information
3628     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3629
3630     # Check if node groups for locked instances are still correct
3631     _CheckInstancesNodeGroups(self.cfg, self.instances,
3632                               owned_groups, owned_nodes, self.group_uuid)
3633
3634   def Exec(self, feedback_fn):
3635     """Verify integrity of cluster disks.
3636
3637     @rtype: tuple of three items
3638     @return: a tuple of (dict of node-to-node_error, list of instances
3639         which need activate-disks, dict of instance: (node, volume) for
3640         missing volumes
3641
3642     """
3643     res_nodes = {}
3644     res_instances = set()
3645     res_missing = {}
3646
3647     nv_dict = _MapInstanceDisksToNodes([inst
3648             for inst in self.instances.values()
3649             if inst.admin_state == constants.ADMINST_UP])
3650
3651     if nv_dict:
3652       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3653                              set(self.cfg.GetVmCapableNodeList()))
3654
3655       node_lvs = self.rpc.call_lv_list(nodes, [])
3656
3657       for (node, node_res) in node_lvs.items():
3658         if node_res.offline:
3659           continue
3660
3661         msg = node_res.fail_msg
3662         if msg:
3663           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3664           res_nodes[node] = msg
3665           continue
3666
3667         for lv_name, (_, _, lv_online) in node_res.payload.items():
3668           inst = nv_dict.pop((node, lv_name), None)
3669           if not (lv_online or inst is None):
3670             res_instances.add(inst)
3671
3672       # any leftover items in nv_dict are missing LVs, let's arrange the data
3673       # better
3674       for key, inst in nv_dict.iteritems():
3675         res_missing.setdefault(inst, []).append(list(key))
3676
3677     return (res_nodes, list(res_instances), res_missing)
3678
3679
3680 class LUClusterRepairDiskSizes(NoHooksLU):
3681   """Verifies the cluster disks sizes.
3682
3683   """
3684   REQ_BGL = False
3685
3686   def ExpandNames(self):
3687     if self.op.instances:
3688       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3689       self.needed_locks = {
3690         locking.LEVEL_NODE_RES: [],
3691         locking.LEVEL_INSTANCE: self.wanted_names,
3692         }
3693       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3694     else:
3695       self.wanted_names = None
3696       self.needed_locks = {
3697         locking.LEVEL_NODE_RES: locking.ALL_SET,
3698         locking.LEVEL_INSTANCE: locking.ALL_SET,
3699         }
3700     self.share_locks = {
3701       locking.LEVEL_NODE_RES: 1,
3702       locking.LEVEL_INSTANCE: 0,
3703       }
3704
3705   def DeclareLocks(self, level):
3706     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3707       self._LockInstancesNodes(primary_only=True, level=level)
3708
3709   def CheckPrereq(self):
3710     """Check prerequisites.
3711
3712     This only checks the optional instance list against the existing names.
3713
3714     """
3715     if self.wanted_names is None:
3716       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3717
3718     self.wanted_instances = \
3719         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3720
3721   def _EnsureChildSizes(self, disk):
3722     """Ensure children of the disk have the needed disk size.
3723
3724     This is valid mainly for DRBD8 and fixes an issue where the
3725     children have smaller disk size.
3726
3727     @param disk: an L{ganeti.objects.Disk} object
3728
3729     """
3730     if disk.dev_type == constants.LD_DRBD8:
3731       assert disk.children, "Empty children for DRBD8?"
3732       fchild = disk.children[0]
3733       mismatch = fchild.size < disk.size
3734       if mismatch:
3735         self.LogInfo("Child disk has size %d, parent %d, fixing",
3736                      fchild.size, disk.size)
3737         fchild.size = disk.size
3738
3739       # and we recurse on this child only, not on the metadev
3740       return self._EnsureChildSizes(fchild) or mismatch
3741     else:
3742       return False
3743
3744   def Exec(self, feedback_fn):
3745     """Verify the size of cluster disks.
3746
3747     """
3748     # TODO: check child disks too
3749     # TODO: check differences in size between primary/secondary nodes
3750     per_node_disks = {}
3751     for instance in self.wanted_instances:
3752       pnode = instance.primary_node
3753       if pnode not in per_node_disks:
3754         per_node_disks[pnode] = []
3755       for idx, disk in enumerate(instance.disks):
3756         per_node_disks[pnode].append((instance, idx, disk))
3757
3758     assert not (frozenset(per_node_disks.keys()) -
3759                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3760       "Not owning correct locks"
3761     assert not self.owned_locks(locking.LEVEL_NODE)
3762
3763     changed = []
3764     for node, dskl in per_node_disks.items():
3765       newl = [v[2].Copy() for v in dskl]
3766       for dsk in newl:
3767         self.cfg.SetDiskID(dsk, node)
3768       result = self.rpc.call_blockdev_getsize(node, newl)
3769       if result.fail_msg:
3770         self.LogWarning("Failure in blockdev_getsize call to node"
3771                         " %s, ignoring", node)
3772         continue
3773       if len(result.payload) != len(dskl):
3774         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3775                         " result.payload=%s", node, len(dskl), result.payload)
3776         self.LogWarning("Invalid result from node %s, ignoring node results",
3777                         node)
3778         continue
3779       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3780         if size is None:
3781           self.LogWarning("Disk %d of instance %s did not return size"
3782                           " information, ignoring", idx, instance.name)
3783           continue
3784         if not isinstance(size, (int, long)):
3785           self.LogWarning("Disk %d of instance %s did not return valid"
3786                           " size information, ignoring", idx, instance.name)
3787           continue
3788         size = size >> 20
3789         if size != disk.size:
3790           self.LogInfo("Disk %d of instance %s has mismatched size,"
3791                        " correcting: recorded %d, actual %d", idx,
3792                        instance.name, disk.size, size)
3793           disk.size = size
3794           self.cfg.Update(instance, feedback_fn)
3795           changed.append((instance.name, idx, size))
3796         if self._EnsureChildSizes(disk):
3797           self.cfg.Update(instance, feedback_fn)
3798           changed.append((instance.name, idx, disk.size))
3799     return changed
3800
3801
3802 class LUClusterRename(LogicalUnit):
3803   """Rename the cluster.
3804
3805   """
3806   HPATH = "cluster-rename"
3807   HTYPE = constants.HTYPE_CLUSTER
3808
3809   def BuildHooksEnv(self):
3810     """Build hooks env.
3811
3812     """
3813     return {
3814       "OP_TARGET": self.cfg.GetClusterName(),
3815       "NEW_NAME": self.op.name,
3816       }
3817
3818   def BuildHooksNodes(self):
3819     """Build hooks nodes.
3820
3821     """
3822     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3823
3824   def CheckPrereq(self):
3825     """Verify that the passed name is a valid one.
3826
3827     """
3828     hostname = netutils.GetHostname(name=self.op.name,
3829                                     family=self.cfg.GetPrimaryIPFamily())
3830
3831     new_name = hostname.name
3832     self.ip = new_ip = hostname.ip
3833     old_name = self.cfg.GetClusterName()
3834     old_ip = self.cfg.GetMasterIP()
3835     if new_name == old_name and new_ip == old_ip:
3836       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3837                                  " cluster has changed",
3838                                  errors.ECODE_INVAL)
3839     if new_ip != old_ip:
3840       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3841         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3842                                    " reachable on the network" %
3843                                    new_ip, errors.ECODE_NOTUNIQUE)
3844
3845     self.op.name = new_name
3846
3847   def Exec(self, feedback_fn):
3848     """Rename the cluster.
3849
3850     """
3851     clustername = self.op.name
3852     new_ip = self.ip
3853
3854     # shutdown the master IP
3855     master_params = self.cfg.GetMasterNetworkParameters()
3856     ems = self.cfg.GetUseExternalMipScript()
3857     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3858                                                      master_params, ems)
3859     result.Raise("Could not disable the master role")
3860
3861     try:
3862       cluster = self.cfg.GetClusterInfo()
3863       cluster.cluster_name = clustername
3864       cluster.master_ip = new_ip
3865       self.cfg.Update(cluster, feedback_fn)
3866
3867       # update the known hosts file
3868       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3869       node_list = self.cfg.GetOnlineNodeList()
3870       try:
3871         node_list.remove(master_params.name)
3872       except ValueError:
3873         pass
3874       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3875     finally:
3876       master_params.ip = new_ip
3877       result = self.rpc.call_node_activate_master_ip(master_params.name,
3878                                                      master_params, ems)
3879       msg = result.fail_msg
3880       if msg:
3881         self.LogWarning("Could not re-enable the master role on"
3882                         " the master, please restart manually: %s", msg)
3883
3884     return clustername
3885
3886
3887 def _ValidateNetmask(cfg, netmask):
3888   """Checks if a netmask is valid.
3889
3890   @type cfg: L{config.ConfigWriter}
3891   @param cfg: The cluster configuration
3892   @type netmask: int
3893   @param netmask: the netmask to be verified
3894   @raise errors.OpPrereqError: if the validation fails
3895
3896   """
3897   ip_family = cfg.GetPrimaryIPFamily()
3898   try:
3899     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3900   except errors.ProgrammerError:
3901     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3902                                ip_family)
3903   if not ipcls.ValidateNetmask(netmask):
3904     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3905                                 (netmask))
3906
3907
3908 class LUClusterSetParams(LogicalUnit):
3909   """Change the parameters of the cluster.
3910
3911   """
3912   HPATH = "cluster-modify"
3913   HTYPE = constants.HTYPE_CLUSTER
3914   REQ_BGL = False
3915
3916   def CheckArguments(self):
3917     """Check parameters
3918
3919     """
3920     if self.op.uid_pool:
3921       uidpool.CheckUidPool(self.op.uid_pool)
3922
3923     if self.op.add_uids:
3924       uidpool.CheckUidPool(self.op.add_uids)
3925
3926     if self.op.remove_uids:
3927       uidpool.CheckUidPool(self.op.remove_uids)
3928
3929     if self.op.master_netmask is not None:
3930       _ValidateNetmask(self.cfg, self.op.master_netmask)
3931
3932     if self.op.diskparams:
3933       for dt_params in self.op.diskparams.values():
3934         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3935       try:
3936         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3937       except errors.OpPrereqError, err:
3938         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3939                                    errors.ECODE_INVAL)
3940
3941   def ExpandNames(self):
3942     # FIXME: in the future maybe other cluster params won't require checking on
3943     # all nodes to be modified.
3944     self.needed_locks = {
3945       locking.LEVEL_NODE: locking.ALL_SET,
3946       locking.LEVEL_INSTANCE: locking.ALL_SET,
3947       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3948     }
3949     self.share_locks = {
3950         locking.LEVEL_NODE: 1,
3951         locking.LEVEL_INSTANCE: 1,
3952         locking.LEVEL_NODEGROUP: 1,
3953     }
3954
3955   def BuildHooksEnv(self):
3956     """Build hooks env.
3957
3958     """
3959     return {
3960       "OP_TARGET": self.cfg.GetClusterName(),
3961       "NEW_VG_NAME": self.op.vg_name,
3962       }
3963
3964   def BuildHooksNodes(self):
3965     """Build hooks nodes.
3966
3967     """
3968     mn = self.cfg.GetMasterNode()
3969     return ([mn], [mn])
3970
3971   def CheckPrereq(self):
3972     """Check prerequisites.
3973
3974     This checks whether the given params don't conflict and
3975     if the given volume group is valid.
3976
3977     """
3978     if self.op.vg_name is not None and not self.op.vg_name:
3979       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3980         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3981                                    " instances exist", errors.ECODE_INVAL)
3982
3983     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3984       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3985         raise errors.OpPrereqError("Cannot disable drbd helper while"
3986                                    " drbd-based instances exist",
3987                                    errors.ECODE_INVAL)
3988
3989     node_list = self.owned_locks(locking.LEVEL_NODE)
3990
3991     # if vg_name not None, checks given volume group on all nodes
3992     if self.op.vg_name:
3993       vglist = self.rpc.call_vg_list(node_list)
3994       for node in node_list:
3995         msg = vglist[node].fail_msg
3996         if msg:
3997           # ignoring down node
3998           self.LogWarning("Error while gathering data on node %s"
3999                           " (ignoring node): %s", node, msg)
4000           continue
4001         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4002                                               self.op.vg_name,
4003                                               constants.MIN_VG_SIZE)
4004         if vgstatus:
4005           raise errors.OpPrereqError("Error on node '%s': %s" %
4006                                      (node, vgstatus), errors.ECODE_ENVIRON)
4007
4008     if self.op.drbd_helper:
4009       # checks given drbd helper on all nodes
4010       helpers = self.rpc.call_drbd_helper(node_list)
4011       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4012         if ninfo.offline:
4013           self.LogInfo("Not checking drbd helper on offline node %s", node)
4014           continue
4015         msg = helpers[node].fail_msg
4016         if msg:
4017           raise errors.OpPrereqError("Error checking drbd helper on node"
4018                                      " '%s': %s" % (node, msg),
4019                                      errors.ECODE_ENVIRON)
4020         node_helper = helpers[node].payload
4021         if node_helper != self.op.drbd_helper:
4022           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4023                                      (node, node_helper), errors.ECODE_ENVIRON)
4024
4025     self.cluster = cluster = self.cfg.GetClusterInfo()
4026     # validate params changes
4027     if self.op.beparams:
4028       objects.UpgradeBeParams(self.op.beparams)
4029       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4030       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4031
4032     if self.op.ndparams:
4033       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4034       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4035
4036       # TODO: we need a more general way to handle resetting
4037       # cluster-level parameters to default values
4038       if self.new_ndparams["oob_program"] == "":
4039         self.new_ndparams["oob_program"] = \
4040             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4041
4042     if self.op.hv_state:
4043       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4044                                             self.cluster.hv_state_static)
4045       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4046                                for hv, values in new_hv_state.items())
4047
4048     if self.op.disk_state:
4049       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4050                                                 self.cluster.disk_state_static)
4051       self.new_disk_state = \
4052         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4053                             for name, values in svalues.items()))
4054              for storage, svalues in new_disk_state.items())
4055
4056     if self.op.ipolicy:
4057       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4058                                             group_policy=False)
4059
4060       all_instances = self.cfg.GetAllInstancesInfo().values()
4061       violations = set()
4062       for group in self.cfg.GetAllNodeGroupsInfo().values():
4063         instances = frozenset([inst for inst in all_instances
4064                                if compat.any(node in group.members
4065                                              for node in inst.all_nodes)])
4066         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4067         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4068                                                                    group),
4069                                             new_ipolicy, instances)
4070         if new:
4071           violations.update(new)
4072
4073       if violations:
4074         self.LogWarning("After the ipolicy change the following instances"
4075                         " violate them: %s",
4076                         utils.CommaJoin(utils.NiceSort(violations)))
4077
4078     if self.op.nicparams:
4079       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4080       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4081       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4082       nic_errors = []
4083
4084       # check all instances for consistency
4085       for instance in self.cfg.GetAllInstancesInfo().values():
4086         for nic_idx, nic in enumerate(instance.nics):
4087           params_copy = copy.deepcopy(nic.nicparams)
4088           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4089
4090           # check parameter syntax
4091           try:
4092             objects.NIC.CheckParameterSyntax(params_filled)
4093           except errors.ConfigurationError, err:
4094             nic_errors.append("Instance %s, nic/%d: %s" %
4095                               (instance.name, nic_idx, err))
4096
4097           # if we're moving instances to routed, check that they have an ip
4098           target_mode = params_filled[constants.NIC_MODE]
4099           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4100             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4101                               " address" % (instance.name, nic_idx))
4102       if nic_errors:
4103         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4104                                    "\n".join(nic_errors))
4105
4106     # hypervisor list/parameters
4107     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4108     if self.op.hvparams:
4109       for hv_name, hv_dict in self.op.hvparams.items():
4110         if hv_name not in self.new_hvparams:
4111           self.new_hvparams[hv_name] = hv_dict
4112         else:
4113           self.new_hvparams[hv_name].update(hv_dict)
4114
4115     # disk template parameters
4116     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4117     if self.op.diskparams:
4118       for dt_name, dt_params in self.op.diskparams.items():
4119         if dt_name not in self.op.diskparams:
4120           self.new_diskparams[dt_name] = dt_params
4121         else:
4122           self.new_diskparams[dt_name].update(dt_params)
4123
4124     # os hypervisor parameters
4125     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4126     if self.op.os_hvp:
4127       for os_name, hvs in self.op.os_hvp.items():
4128         if os_name not in self.new_os_hvp:
4129           self.new_os_hvp[os_name] = hvs
4130         else:
4131           for hv_name, hv_dict in hvs.items():
4132             if hv_name not in self.new_os_hvp[os_name]:
4133               self.new_os_hvp[os_name][hv_name] = hv_dict
4134             else:
4135               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4136
4137     # os parameters
4138     self.new_osp = objects.FillDict(cluster.osparams, {})
4139     if self.op.osparams:
4140       for os_name, osp in self.op.osparams.items():
4141         if os_name not in self.new_osp:
4142           self.new_osp[os_name] = {}
4143
4144         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4145                                                   use_none=True)
4146
4147         if not self.new_osp[os_name]:
4148           # we removed all parameters
4149           del self.new_osp[os_name]
4150         else:
4151           # check the parameter validity (remote check)
4152           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4153                          os_name, self.new_osp[os_name])
4154
4155     # changes to the hypervisor list
4156     if self.op.enabled_hypervisors is not None:
4157       self.hv_list = self.op.enabled_hypervisors
4158       for hv in self.hv_list:
4159         # if the hypervisor doesn't already exist in the cluster
4160         # hvparams, we initialize it to empty, and then (in both
4161         # cases) we make sure to fill the defaults, as we might not
4162         # have a complete defaults list if the hypervisor wasn't
4163         # enabled before
4164         if hv not in new_hvp:
4165           new_hvp[hv] = {}
4166         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4167         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4168     else:
4169       self.hv_list = cluster.enabled_hypervisors
4170
4171     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4172       # either the enabled list has changed, or the parameters have, validate
4173       for hv_name, hv_params in self.new_hvparams.items():
4174         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4175             (self.op.enabled_hypervisors and
4176              hv_name in self.op.enabled_hypervisors)):
4177           # either this is a new hypervisor, or its parameters have changed
4178           hv_class = hypervisor.GetHypervisor(hv_name)
4179           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4180           hv_class.CheckParameterSyntax(hv_params)
4181           _CheckHVParams(self, node_list, hv_name, hv_params)
4182
4183     if self.op.os_hvp:
4184       # no need to check any newly-enabled hypervisors, since the
4185       # defaults have already been checked in the above code-block
4186       for os_name, os_hvp in self.new_os_hvp.items():
4187         for hv_name, hv_params in os_hvp.items():
4188           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4189           # we need to fill in the new os_hvp on top of the actual hv_p
4190           cluster_defaults = self.new_hvparams.get(hv_name, {})
4191           new_osp = objects.FillDict(cluster_defaults, hv_params)
4192           hv_class = hypervisor.GetHypervisor(hv_name)
4193           hv_class.CheckParameterSyntax(new_osp)
4194           _CheckHVParams(self, node_list, hv_name, new_osp)
4195
4196     if self.op.default_iallocator:
4197       alloc_script = utils.FindFile(self.op.default_iallocator,
4198                                     constants.IALLOCATOR_SEARCH_PATH,
4199                                     os.path.isfile)
4200       if alloc_script is None:
4201         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4202                                    " specified" % self.op.default_iallocator,
4203                                    errors.ECODE_INVAL)
4204
4205   def Exec(self, feedback_fn):
4206     """Change the parameters of the cluster.
4207
4208     """
4209     if self.op.vg_name is not None:
4210       new_volume = self.op.vg_name
4211       if not new_volume:
4212         new_volume = None
4213       if new_volume != self.cfg.GetVGName():
4214         self.cfg.SetVGName(new_volume)
4215       else:
4216         feedback_fn("Cluster LVM configuration already in desired"
4217                     " state, not changing")
4218     if self.op.drbd_helper is not None:
4219       new_helper = self.op.drbd_helper
4220       if not new_helper:
4221         new_helper = None
4222       if new_helper != self.cfg.GetDRBDHelper():
4223         self.cfg.SetDRBDHelper(new_helper)
4224       else:
4225         feedback_fn("Cluster DRBD helper already in desired state,"
4226                     " not changing")
4227     if self.op.hvparams:
4228       self.cluster.hvparams = self.new_hvparams
4229     if self.op.os_hvp:
4230       self.cluster.os_hvp = self.new_os_hvp
4231     if self.op.enabled_hypervisors is not None:
4232       self.cluster.hvparams = self.new_hvparams
4233       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4234     if self.op.beparams:
4235       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4236     if self.op.nicparams:
4237       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4238     if self.op.ipolicy:
4239       self.cluster.ipolicy = self.new_ipolicy
4240     if self.op.osparams:
4241       self.cluster.osparams = self.new_osp
4242     if self.op.ndparams:
4243       self.cluster.ndparams = self.new_ndparams
4244     if self.op.diskparams:
4245       self.cluster.diskparams = self.new_diskparams
4246     if self.op.hv_state:
4247       self.cluster.hv_state_static = self.new_hv_state
4248     if self.op.disk_state:
4249       self.cluster.disk_state_static = self.new_disk_state
4250
4251     if self.op.candidate_pool_size is not None:
4252       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4253       # we need to update the pool size here, otherwise the save will fail
4254       _AdjustCandidatePool(self, [])
4255
4256     if self.op.maintain_node_health is not None:
4257       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4258         feedback_fn("Note: CONFD was disabled at build time, node health"
4259                     " maintenance is not useful (still enabling it)")
4260       self.cluster.maintain_node_health = self.op.maintain_node_health
4261
4262     if self.op.prealloc_wipe_disks is not None:
4263       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4264
4265     if self.op.add_uids is not None:
4266       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4267
4268     if self.op.remove_uids is not None:
4269       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4270
4271     if self.op.uid_pool is not None:
4272       self.cluster.uid_pool = self.op.uid_pool
4273
4274     if self.op.default_iallocator is not None:
4275       self.cluster.default_iallocator = self.op.default_iallocator
4276
4277     if self.op.reserved_lvs is not None:
4278       self.cluster.reserved_lvs = self.op.reserved_lvs
4279
4280     if self.op.use_external_mip_script is not None:
4281       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4282
4283     def helper_os(aname, mods, desc):
4284       desc += " OS list"
4285       lst = getattr(self.cluster, aname)
4286       for key, val in mods:
4287         if key == constants.DDM_ADD:
4288           if val in lst:
4289             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4290           else:
4291             lst.append(val)
4292         elif key == constants.DDM_REMOVE:
4293           if val in lst:
4294             lst.remove(val)
4295           else:
4296             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4297         else:
4298           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4299
4300     if self.op.hidden_os:
4301       helper_os("hidden_os", self.op.hidden_os, "hidden")
4302
4303     if self.op.blacklisted_os:
4304       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4305
4306     if self.op.master_netdev:
4307       master_params = self.cfg.GetMasterNetworkParameters()
4308       ems = self.cfg.GetUseExternalMipScript()
4309       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4310                   self.cluster.master_netdev)
4311       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4312                                                        master_params, ems)
4313       result.Raise("Could not disable the master ip")
4314       feedback_fn("Changing master_netdev from %s to %s" %
4315                   (master_params.netdev, self.op.master_netdev))
4316       self.cluster.master_netdev = self.op.master_netdev
4317
4318     if self.op.master_netmask:
4319       master_params = self.cfg.GetMasterNetworkParameters()
4320       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4321       result = self.rpc.call_node_change_master_netmask(master_params.name,
4322                                                         master_params.netmask,
4323                                                         self.op.master_netmask,
4324                                                         master_params.ip,
4325                                                         master_params.netdev)
4326       if result.fail_msg:
4327         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4328         feedback_fn(msg)
4329
4330       self.cluster.master_netmask = self.op.master_netmask
4331
4332     self.cfg.Update(self.cluster, feedback_fn)
4333
4334     if self.op.master_netdev:
4335       master_params = self.cfg.GetMasterNetworkParameters()
4336       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4337                   self.op.master_netdev)
4338       ems = self.cfg.GetUseExternalMipScript()
4339       result = self.rpc.call_node_activate_master_ip(master_params.name,
4340                                                      master_params, ems)
4341       if result.fail_msg:
4342         self.LogWarning("Could not re-enable the master ip on"
4343                         " the master, please restart manually: %s",
4344                         result.fail_msg)
4345
4346
4347 def _UploadHelper(lu, nodes, fname):
4348   """Helper for uploading a file and showing warnings.
4349
4350   """
4351   if os.path.exists(fname):
4352     result = lu.rpc.call_upload_file(nodes, fname)
4353     for to_node, to_result in result.items():
4354       msg = to_result.fail_msg
4355       if msg:
4356         msg = ("Copy of file %s to node %s failed: %s" %
4357                (fname, to_node, msg))
4358         lu.proc.LogWarning(msg)
4359
4360
4361 def _ComputeAncillaryFiles(cluster, redist):
4362   """Compute files external to Ganeti which need to be consistent.
4363
4364   @type redist: boolean
4365   @param redist: Whether to include files which need to be redistributed
4366
4367   """
4368   # Compute files for all nodes
4369   files_all = set([
4370     constants.SSH_KNOWN_HOSTS_FILE,
4371     constants.CONFD_HMAC_KEY,
4372     constants.CLUSTER_DOMAIN_SECRET_FILE,
4373     constants.SPICE_CERT_FILE,
4374     constants.SPICE_CACERT_FILE,
4375     constants.RAPI_USERS_FILE,
4376     ])
4377
4378   if not redist:
4379     files_all.update(constants.ALL_CERT_FILES)
4380     files_all.update(ssconf.SimpleStore().GetFileList())
4381   else:
4382     # we need to ship at least the RAPI certificate
4383     files_all.add(constants.RAPI_CERT_FILE)
4384
4385   if cluster.modify_etc_hosts:
4386     files_all.add(constants.ETC_HOSTS)
4387
4388   if cluster.use_external_mip_script:
4389     files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4390
4391   # Files which are optional, these must:
4392   # - be present in one other category as well
4393   # - either exist or not exist on all nodes of that category (mc, vm all)
4394   files_opt = set([
4395     constants.RAPI_USERS_FILE,
4396     ])
4397
4398   # Files which should only be on master candidates
4399   files_mc = set()
4400
4401   if not redist:
4402     files_mc.add(constants.CLUSTER_CONF_FILE)
4403
4404   # Files which should only be on VM-capable nodes
4405   files_vm = set(filename
4406     for hv_name in cluster.enabled_hypervisors
4407     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4408
4409   files_opt |= set(filename
4410     for hv_name in cluster.enabled_hypervisors
4411     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4412
4413   # Filenames in each category must be unique
4414   all_files_set = files_all | files_mc | files_vm
4415   assert (len(all_files_set) ==
4416           sum(map(len, [files_all, files_mc, files_vm]))), \
4417          "Found file listed in more than one file list"
4418
4419   # Optional files must be present in one other category
4420   assert all_files_set.issuperset(files_opt), \
4421          "Optional file not in a different required list"
4422
4423   return (files_all, files_opt, files_mc, files_vm)
4424
4425
4426 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4427   """Distribute additional files which are part of the cluster configuration.
4428
4429   ConfigWriter takes care of distributing the config and ssconf files, but
4430   there are more files which should be distributed to all nodes. This function
4431   makes sure those are copied.
4432
4433   @param lu: calling logical unit
4434   @param additional_nodes: list of nodes not in the config to distribute to
4435   @type additional_vm: boolean
4436   @param additional_vm: whether the additional nodes are vm-capable or not
4437
4438   """
4439   # Gather target nodes
4440   cluster = lu.cfg.GetClusterInfo()
4441   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4442
4443   online_nodes = lu.cfg.GetOnlineNodeList()
4444   online_set = frozenset(online_nodes)
4445   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4446
4447   if additional_nodes is not None:
4448     online_nodes.extend(additional_nodes)
4449     if additional_vm:
4450       vm_nodes.extend(additional_nodes)
4451
4452   # Never distribute to master node
4453   for nodelist in [online_nodes, vm_nodes]:
4454     if master_info.name in nodelist:
4455       nodelist.remove(master_info.name)
4456
4457   # Gather file lists
4458   (files_all, _, files_mc, files_vm) = \
4459     _ComputeAncillaryFiles(cluster, True)
4460
4461   # Never re-distribute configuration file from here
4462   assert not (constants.CLUSTER_CONF_FILE in files_all or
4463               constants.CLUSTER_CONF_FILE in files_vm)
4464   assert not files_mc, "Master candidates not handled in this function"
4465
4466   filemap = [
4467     (online_nodes, files_all),
4468     (vm_nodes, files_vm),
4469     ]
4470
4471   # Upload the files
4472   for (node_list, files) in filemap:
4473     for fname in files:
4474       _UploadHelper(lu, node_list, fname)
4475
4476
4477 class LUClusterRedistConf(NoHooksLU):
4478   """Force the redistribution of cluster configuration.
4479
4480   This is a very simple LU.
4481
4482   """
4483   REQ_BGL = False
4484
4485   def ExpandNames(self):
4486     self.needed_locks = {
4487       locking.LEVEL_NODE: locking.ALL_SET,
4488     }
4489     self.share_locks[locking.LEVEL_NODE] = 1
4490
4491   def Exec(self, feedback_fn):
4492     """Redistribute the configuration.
4493
4494     """
4495     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4496     _RedistributeAncillaryFiles(self)
4497
4498
4499 class LUClusterActivateMasterIp(NoHooksLU):
4500   """Activate the master IP on the master node.
4501
4502   """
4503   def Exec(self, feedback_fn):
4504     """Activate the master IP.
4505
4506     """
4507     master_params = self.cfg.GetMasterNetworkParameters()
4508     ems = self.cfg.GetUseExternalMipScript()
4509     result = self.rpc.call_node_activate_master_ip(master_params.name,
4510                                                    master_params, ems)
4511     result.Raise("Could not activate the master IP")
4512
4513
4514 class LUClusterDeactivateMasterIp(NoHooksLU):
4515   """Deactivate the master IP on the master node.
4516
4517   """
4518   def Exec(self, feedback_fn):
4519     """Deactivate the master IP.
4520
4521     """
4522     master_params = self.cfg.GetMasterNetworkParameters()
4523     ems = self.cfg.GetUseExternalMipScript()
4524     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4525                                                      master_params, ems)
4526     result.Raise("Could not deactivate the master IP")
4527
4528
4529 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4530   """Sleep and poll for an instance's disk to sync.
4531
4532   """
4533   if not instance.disks or disks is not None and not disks:
4534     return True
4535
4536   disks = _ExpandCheckDisks(instance, disks)
4537
4538   if not oneshot:
4539     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4540
4541   node = instance.primary_node
4542
4543   for dev in disks:
4544     lu.cfg.SetDiskID(dev, node)
4545
4546   # TODO: Convert to utils.Retry
4547
4548   retries = 0
4549   degr_retries = 10 # in seconds, as we sleep 1 second each time
4550   while True:
4551     max_time = 0
4552     done = True
4553     cumul_degraded = False
4554     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4555     msg = rstats.fail_msg
4556     if msg:
4557       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4558       retries += 1
4559       if retries >= 10:
4560         raise errors.RemoteError("Can't contact node %s for mirror data,"
4561                                  " aborting." % node)
4562       time.sleep(6)
4563       continue
4564     rstats = rstats.payload
4565     retries = 0
4566     for i, mstat in enumerate(rstats):
4567       if mstat is None:
4568         lu.LogWarning("Can't compute data for node %s/%s",
4569                            node, disks[i].iv_name)
4570         continue
4571
4572       cumul_degraded = (cumul_degraded or
4573                         (mstat.is_degraded and mstat.sync_percent is None))
4574       if mstat.sync_percent is not None:
4575         done = False
4576         if mstat.estimated_time is not None:
4577           rem_time = ("%s remaining (estimated)" %
4578                       utils.FormatSeconds(mstat.estimated_time))
4579           max_time = mstat.estimated_time
4580         else:
4581           rem_time = "no time estimate"
4582         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4583                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4584
4585     # if we're done but degraded, let's do a few small retries, to
4586     # make sure we see a stable and not transient situation; therefore
4587     # we force restart of the loop
4588     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4589       logging.info("Degraded disks found, %d retries left", degr_retries)
4590       degr_retries -= 1
4591       time.sleep(1)
4592       continue
4593
4594     if done or oneshot:
4595       break
4596
4597     time.sleep(min(60, max_time))
4598
4599   if done:
4600     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4601   return not cumul_degraded
4602
4603
4604 def _BlockdevFind(lu, node, dev, instance):
4605   """Wrapper around call_blockdev_find to annotate diskparams.
4606
4607   @param lu: A reference to the lu object
4608   @param node: The node to call out
4609   @param dev: The device to find
4610   @param instance: The instance object the device belongs to
4611   @returns The result of the rpc call
4612
4613   """
4614   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4615   return lu.rpc.call_blockdev_find(node, disk)
4616
4617
4618 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4619   """Wrapper around L{_CheckDiskConsistencyInner}.
4620
4621   """
4622   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4623   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4624                                     ldisk=ldisk)
4625
4626
4627 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4628                                ldisk=False):
4629   """Check that mirrors are not degraded.
4630
4631   @attention: The device has to be annotated already.
4632
4633   The ldisk parameter, if True, will change the test from the
4634   is_degraded attribute (which represents overall non-ok status for
4635   the device(s)) to the ldisk (representing the local storage status).
4636
4637   """
4638   lu.cfg.SetDiskID(dev, node)
4639
4640   result = True
4641
4642   if on_primary or dev.AssembleOnSecondary():
4643     rstats = lu.rpc.call_blockdev_find(node, dev)
4644     msg = rstats.fail_msg
4645     if msg:
4646       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4647       result = False
4648     elif not rstats.payload:
4649       lu.LogWarning("Can't find disk on node %s", node)
4650       result = False
4651     else:
4652       if ldisk:
4653         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4654       else:
4655         result = result and not rstats.payload.is_degraded
4656
4657   if dev.children:
4658     for child in dev.children:
4659       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4660                                                      on_primary)
4661
4662   return result
4663
4664
4665 class LUOobCommand(NoHooksLU):
4666   """Logical unit for OOB handling.
4667
4668   """
4669   REQ_BGL = False
4670   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4671
4672   def ExpandNames(self):
4673     """Gather locks we need.
4674
4675     """
4676     if self.op.node_names:
4677       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4678       lock_names = self.op.node_names
4679     else:
4680       lock_names = locking.ALL_SET
4681
4682     self.needed_locks = {
4683       locking.LEVEL_NODE: lock_names,
4684       }
4685
4686   def CheckPrereq(self):
4687     """Check prerequisites.
4688
4689     This checks:
4690      - the node exists in the configuration
4691      - OOB is supported
4692
4693     Any errors are signaled by raising errors.OpPrereqError.
4694
4695     """
4696     self.nodes = []
4697     self.master_node = self.cfg.GetMasterNode()
4698
4699     assert self.op.power_delay >= 0.0
4700
4701     if self.op.node_names:
4702       if (self.op.command in self._SKIP_MASTER and
4703           self.master_node in self.op.node_names):
4704         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4705         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4706
4707         if master_oob_handler:
4708           additional_text = ("run '%s %s %s' if you want to operate on the"
4709                              " master regardless") % (master_oob_handler,
4710                                                       self.op.command,
4711                                                       self.master_node)
4712         else:
4713           additional_text = "it does not support out-of-band operations"
4714
4715         raise errors.OpPrereqError(("Operating on the master node %s is not"
4716                                     " allowed for %s; %s") %
4717                                    (self.master_node, self.op.command,
4718                                     additional_text), errors.ECODE_INVAL)
4719     else:
4720       self.op.node_names = self.cfg.GetNodeList()
4721       if self.op.command in self._SKIP_MASTER:
4722         self.op.node_names.remove(self.master_node)
4723
4724     if self.op.command in self._SKIP_MASTER:
4725       assert self.master_node not in self.op.node_names
4726
4727     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4728       if node is None:
4729         raise errors.OpPrereqError("Node %s not found" % node_name,
4730                                    errors.ECODE_NOENT)
4731       else:
4732         self.nodes.append(node)
4733
4734       if (not self.op.ignore_status and
4735           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4736         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4737                                     " not marked offline") % node_name,
4738                                    errors.ECODE_STATE)
4739
4740   def Exec(self, feedback_fn):
4741     """Execute OOB and return result if we expect any.
4742
4743     """
4744     master_node = self.master_node
4745     ret = []
4746
4747     for idx, node in enumerate(utils.NiceSort(self.nodes,
4748                                               key=lambda node: node.name)):
4749       node_entry = [(constants.RS_NORMAL, node.name)]
4750       ret.append(node_entry)
4751
4752       oob_program = _SupportsOob(self.cfg, node)
4753
4754       if not oob_program:
4755         node_entry.append((constants.RS_UNAVAIL, None))
4756         continue
4757
4758       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4759                    self.op.command, oob_program, node.name)
4760       result = self.rpc.call_run_oob(master_node, oob_program,
4761                                      self.op.command, node.name,
4762                                      self.op.timeout)
4763
4764       if result.fail_msg:
4765         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4766                         node.name, result.fail_msg)
4767         node_entry.append((constants.RS_NODATA, None))
4768       else:
4769         try:
4770           self._CheckPayload(result)
4771         except errors.OpExecError, err:
4772           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4773                           node.name, err)
4774           node_entry.append((constants.RS_NODATA, None))
4775         else:
4776           if self.op.command == constants.OOB_HEALTH:
4777             # For health we should log important events
4778             for item, status in result.payload:
4779               if status in [constants.OOB_STATUS_WARNING,
4780                             constants.OOB_STATUS_CRITICAL]:
4781                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4782                                 item, node.name, status)
4783
4784           if self.op.command == constants.OOB_POWER_ON:
4785             node.powered = True
4786           elif self.op.command == constants.OOB_POWER_OFF:
4787             node.powered = False
4788           elif self.op.command == constants.OOB_POWER_STATUS:
4789             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4790             if powered != node.powered:
4791               logging.warning(("Recorded power state (%s) of node '%s' does not"
4792                                " match actual power state (%s)"), node.powered,
4793                               node.name, powered)
4794
4795           # For configuration changing commands we should update the node
4796           if self.op.command in (constants.OOB_POWER_ON,
4797                                  constants.OOB_POWER_OFF):
4798             self.cfg.Update(node, feedback_fn)
4799
4800           node_entry.append((constants.RS_NORMAL, result.payload))
4801
4802           if (self.op.command == constants.OOB_POWER_ON and
4803               idx < len(self.nodes) - 1):
4804             time.sleep(self.op.power_delay)
4805
4806     return ret
4807
4808   def _CheckPayload(self, result):
4809     """Checks if the payload is valid.
4810
4811     @param result: RPC result
4812     @raises errors.OpExecError: If payload is not valid
4813
4814     """
4815     errs = []
4816     if self.op.command == constants.OOB_HEALTH:
4817       if not isinstance(result.payload, list):
4818         errs.append("command 'health' is expected to return a list but got %s" %
4819                     type(result.payload))
4820       else:
4821         for item, status in result.payload:
4822           if status not in constants.OOB_STATUSES:
4823             errs.append("health item '%s' has invalid status '%s'" %
4824                         (item, status))
4825
4826     if self.op.command == constants.OOB_POWER_STATUS:
4827       if not isinstance(result.payload, dict):
4828         errs.append("power-status is expected to return a dict but got %s" %
4829                     type(result.payload))
4830
4831     if self.op.command in [
4832         constants.OOB_POWER_ON,
4833         constants.OOB_POWER_OFF,
4834         constants.OOB_POWER_CYCLE,
4835         ]:
4836       if result.payload is not None:
4837         errs.append("%s is expected to not return payload but got '%s'" %
4838                     (self.op.command, result.payload))
4839
4840     if errs:
4841       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4842                                utils.CommaJoin(errs))
4843
4844
4845 class _OsQuery(_QueryBase):
4846   FIELDS = query.OS_FIELDS
4847
4848   def ExpandNames(self, lu):
4849     # Lock all nodes in shared mode
4850     # Temporary removal of locks, should be reverted later
4851     # TODO: reintroduce locks when they are lighter-weight
4852     lu.needed_locks = {}
4853     #self.share_locks[locking.LEVEL_NODE] = 1
4854     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4855
4856     # The following variables interact with _QueryBase._GetNames
4857     if self.names:
4858       self.wanted = self.names
4859     else:
4860       self.wanted = locking.ALL_SET
4861
4862     self.do_locking = self.use_locking
4863
4864   def DeclareLocks(self, lu, level):
4865     pass
4866
4867   @staticmethod
4868   def _DiagnoseByOS(rlist):
4869     """Remaps a per-node return list into an a per-os per-node dictionary
4870
4871     @param rlist: a map with node names as keys and OS objects as values
4872
4873     @rtype: dict
4874     @return: a dictionary with osnames as keys and as value another
4875         map, with nodes as keys and tuples of (path, status, diagnose,
4876         variants, parameters, api_versions) as values, eg::
4877
4878           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4879                                      (/srv/..., False, "invalid api")],
4880                            "node2": [(/srv/..., True, "", [], [])]}
4881           }
4882
4883     """
4884     all_os = {}
4885     # we build here the list of nodes that didn't fail the RPC (at RPC
4886     # level), so that nodes with a non-responding node daemon don't
4887     # make all OSes invalid
4888     good_nodes = [node_name for node_name in rlist
4889                   if not rlist[node_name].fail_msg]
4890     for node_name, nr in rlist.items():
4891       if nr.fail_msg or not nr.payload:
4892         continue
4893       for (name, path, status, diagnose, variants,
4894            params, api_versions) in nr.payload:
4895         if name not in all_os:
4896           # build a list of nodes for this os containing empty lists
4897           # for each node in node_list
4898           all_os[name] = {}
4899           for nname in good_nodes:
4900             all_os[name][nname] = []
4901         # convert params from [name, help] to (name, help)
4902         params = [tuple(v) for v in params]
4903         all_os[name][node_name].append((path, status, diagnose,
4904                                         variants, params, api_versions))
4905     return all_os
4906
4907   def _GetQueryData(self, lu):
4908     """Computes the list of nodes and their attributes.
4909
4910     """
4911     # Locking is not used
4912     assert not (compat.any(lu.glm.is_owned(level)
4913                            for level in locking.LEVELS
4914                            if level != locking.LEVEL_CLUSTER) or
4915                 self.do_locking or self.use_locking)
4916
4917     valid_nodes = [node.name
4918                    for node in lu.cfg.GetAllNodesInfo().values()
4919                    if not node.offline and node.vm_capable]
4920     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4921     cluster = lu.cfg.GetClusterInfo()
4922
4923     data = {}
4924
4925     for (os_name, os_data) in pol.items():
4926       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4927                           hidden=(os_name in cluster.hidden_os),
4928                           blacklisted=(os_name in cluster.blacklisted_os))
4929
4930       variants = set()
4931       parameters = set()
4932       api_versions = set()
4933
4934       for idx, osl in enumerate(os_data.values()):
4935         info.valid = bool(info.valid and osl and osl[0][1])
4936         if not info.valid:
4937           break
4938
4939         (node_variants, node_params, node_api) = osl[0][3:6]
4940         if idx == 0:
4941           # First entry
4942           variants.update(node_variants)
4943           parameters.update(node_params)
4944           api_versions.update(node_api)
4945         else:
4946           # Filter out inconsistent values
4947           variants.intersection_update(node_variants)
4948           parameters.intersection_update(node_params)
4949           api_versions.intersection_update(node_api)
4950
4951       info.variants = list(variants)
4952       info.parameters = list(parameters)
4953       info.api_versions = list(api_versions)
4954
4955       data[os_name] = info
4956
4957     # Prepare data in requested order
4958     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4959             if name in data]
4960
4961
4962 class LUOsDiagnose(NoHooksLU):
4963   """Logical unit for OS diagnose/query.
4964
4965   """
4966   REQ_BGL = False
4967
4968   @staticmethod
4969   def _BuildFilter(fields, names):
4970     """Builds a filter for querying OSes.
4971
4972     """
4973     name_filter = qlang.MakeSimpleFilter("name", names)
4974
4975     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4976     # respective field is not requested
4977     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4978                      for fname in ["hidden", "blacklisted"]
4979                      if fname not in fields]
4980     if "valid" not in fields:
4981       status_filter.append([qlang.OP_TRUE, "valid"])
4982
4983     if status_filter:
4984       status_filter.insert(0, qlang.OP_AND)
4985     else:
4986       status_filter = None
4987
4988     if name_filter and status_filter:
4989       return [qlang.OP_AND, name_filter, status_filter]
4990     elif name_filter:
4991       return name_filter
4992     else:
4993       return status_filter
4994
4995   def CheckArguments(self):
4996     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4997                        self.op.output_fields, False)
4998
4999   def ExpandNames(self):
5000     self.oq.ExpandNames(self)
5001
5002   def Exec(self, feedback_fn):
5003     return self.oq.OldStyleQuery(self)
5004
5005
5006 class _ExtStorageQuery(_QueryBase):
5007   FIELDS = query.EXTSTORAGE_FIELDS
5008
5009   def ExpandNames(self, lu):
5010     # Lock all nodes in shared mode
5011     # Temporary removal of locks, should be reverted later
5012     # TODO: reintroduce locks when they are lighter-weight
5013     lu.needed_locks = {}
5014     #self.share_locks[locking.LEVEL_NODE] = 1
5015     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5016
5017     # The following variables interact with _QueryBase._GetNames
5018     if self.names:
5019       self.wanted = self.names
5020     else:
5021       self.wanted = locking.ALL_SET
5022
5023     self.do_locking = self.use_locking
5024
5025   def DeclareLocks(self, lu, level):
5026     pass
5027
5028   @staticmethod
5029   def _DiagnoseByProvider(rlist):
5030     """Remaps a per-node return list into an a per-provider per-node dictionary
5031
5032     @param rlist: a map with node names as keys and ExtStorage objects as values
5033
5034     @rtype: dict
5035     @return: a dictionary with extstorage providers as keys and as
5036         value another map, with nodes as keys and tuples of
5037         (path, status, diagnose, parameters) as values, eg::
5038
5039           {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5040                          "node2": [(/srv/..., False, "missing file")]
5041                          "node3": [(/srv/..., True, "", [])]
5042           }
5043
5044     """
5045     all_es = {}
5046     # we build here the list of nodes that didn't fail the RPC (at RPC
5047     # level), so that nodes with a non-responding node daemon don't
5048     # make all OSes invalid
5049     good_nodes = [node_name for node_name in rlist
5050                   if not rlist[node_name].fail_msg]
5051     for node_name, nr in rlist.items():
5052       if nr.fail_msg or not nr.payload:
5053         continue
5054       for (name, path, status, diagnose, params) in nr.payload:
5055         if name not in all_es:
5056           # build a list of nodes for this os containing empty lists
5057           # for each node in node_list
5058           all_es[name] = {}
5059           for nname in good_nodes:
5060             all_es[name][nname] = []
5061         # convert params from [name, help] to (name, help)
5062         params = [tuple(v) for v in params]
5063         all_es[name][node_name].append((path, status, diagnose, params))
5064     return all_es
5065
5066   def _GetQueryData(self, lu):
5067     """Computes the list of nodes and their attributes.
5068
5069     """
5070     # Locking is not used
5071     assert not (compat.any(lu.glm.is_owned(level)
5072                            for level in locking.LEVELS
5073                            if level != locking.LEVEL_CLUSTER) or
5074                 self.do_locking or self.use_locking)
5075
5076     valid_nodes = [node.name
5077                    for node in lu.cfg.GetAllNodesInfo().values()
5078                    if not node.offline and node.vm_capable]
5079     pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5080
5081     data = {}
5082
5083     nodegroup_list = lu.cfg.GetNodeGroupList()
5084
5085     for (es_name, es_data) in pol.items():
5086       # For every provider compute the nodegroup validity.
5087       # To do this we need to check the validity of each node in es_data
5088       # and then construct the corresponding nodegroup dict:
5089       #      { nodegroup1: status
5090       #        nodegroup2: status
5091       #      }
5092       ndgrp_data = {}
5093       for nodegroup in nodegroup_list:
5094         ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5095
5096         nodegroup_nodes = ndgrp.members
5097         nodegroup_name = ndgrp.name
5098         node_statuses = []
5099
5100         for node in nodegroup_nodes:
5101           if node in valid_nodes:
5102             if es_data[node] != []:
5103               node_status = es_data[node][0][1]
5104               node_statuses.append(node_status)
5105             else:
5106               node_statuses.append(False)
5107
5108         if False in node_statuses:
5109           ndgrp_data[nodegroup_name] = False
5110         else:
5111           ndgrp_data[nodegroup_name] = True
5112
5113       # Compute the provider's parameters
5114       parameters = set()
5115       for idx, esl in enumerate(es_data.values()):
5116         valid = bool(esl and esl[0][1])
5117         if not valid:
5118           break
5119
5120         node_params = esl[0][3]
5121         if idx == 0:
5122           # First entry
5123           parameters.update(node_params)
5124         else:
5125           # Filter out inconsistent values
5126           parameters.intersection_update(node_params)
5127
5128       params = list(parameters)
5129
5130       # Now fill all the info for this provider
5131       info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5132                                   nodegroup_status=ndgrp_data,
5133                                   parameters=params)
5134
5135       data[es_name] = info
5136
5137     # Prepare data in requested order
5138     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5139             if name in data]
5140
5141
5142 class LUExtStorageDiagnose(NoHooksLU):
5143   """Logical unit for ExtStorage diagnose/query.
5144
5145   """
5146   REQ_BGL = False
5147
5148   def CheckArguments(self):
5149     self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5150                                self.op.output_fields, False)
5151
5152   def ExpandNames(self):
5153     self.eq.ExpandNames(self)
5154
5155   def Exec(self, feedback_fn):
5156     return self.eq.OldStyleQuery(self)
5157
5158
5159 class LUNodeRemove(LogicalUnit):
5160   """Logical unit for removing a node.
5161
5162   """
5163   HPATH = "node-remove"
5164   HTYPE = constants.HTYPE_NODE
5165
5166   def BuildHooksEnv(self):
5167     """Build hooks env.
5168
5169     """
5170     return {
5171       "OP_TARGET": self.op.node_name,
5172       "NODE_NAME": self.op.node_name,
5173       }
5174
5175   def BuildHooksNodes(self):
5176     """Build hooks nodes.
5177
5178     This doesn't run on the target node in the pre phase as a failed
5179     node would then be impossible to remove.
5180
5181     """
5182     all_nodes = self.cfg.GetNodeList()
5183     try:
5184       all_nodes.remove(self.op.node_name)
5185     except ValueError:
5186       pass
5187     return (all_nodes, all_nodes)
5188
5189   def CheckPrereq(self):
5190     """Check prerequisites.
5191
5192     This checks:
5193      - the node exists in the configuration
5194      - it does not have primary or secondary instances
5195      - it's not the master
5196
5197     Any errors are signaled by raising errors.OpPrereqError.
5198
5199     """
5200     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5201     node = self.cfg.GetNodeInfo(self.op.node_name)
5202     assert node is not None
5203
5204     masternode = self.cfg.GetMasterNode()
5205     if node.name == masternode:
5206       raise errors.OpPrereqError("Node is the master node, failover to another"
5207                                  " node is required", errors.ECODE_INVAL)
5208
5209     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5210       if node.name in instance.all_nodes:
5211         raise errors.OpPrereqError("Instance %s is still running on the node,"
5212                                    " please remove first" % instance_name,
5213                                    errors.ECODE_INVAL)
5214     self.op.node_name = node.name
5215     self.node = node
5216
5217   def Exec(self, feedback_fn):
5218     """Removes the node from the cluster.
5219
5220     """
5221     node = self.node
5222     logging.info("Stopping the node daemon and removing configs from node %s",
5223                  node.name)
5224
5225     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5226
5227     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5228       "Not owning BGL"
5229
5230     # Promote nodes to master candidate as needed
5231     _AdjustCandidatePool(self, exceptions=[node.name])
5232     self.context.RemoveNode(node.name)
5233
5234     # Run post hooks on the node before it's removed
5235     _RunPostHook(self, node.name)
5236
5237     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5238     msg = result.fail_msg
5239     if msg:
5240       self.LogWarning("Errors encountered on the remote node while leaving"
5241                       " the cluster: %s", msg)
5242
5243     # Remove node from our /etc/hosts
5244     if self.cfg.GetClusterInfo().modify_etc_hosts:
5245       master_node = self.cfg.GetMasterNode()
5246       result = self.rpc.call_etc_hosts_modify(master_node,
5247                                               constants.ETC_HOSTS_REMOVE,
5248                                               node.name, None)
5249       result.Raise("Can't update hosts file with new host data")
5250       _RedistributeAncillaryFiles(self)
5251
5252
5253 class _NodeQuery(_QueryBase):
5254   FIELDS = query.NODE_FIELDS
5255
5256   def ExpandNames(self, lu):
5257     lu.needed_locks = {}
5258     lu.share_locks = _ShareAll()
5259
5260     if self.names:
5261       self.wanted = _GetWantedNodes(lu, self.names)
5262     else:
5263       self.wanted = locking.ALL_SET
5264
5265     self.do_locking = (self.use_locking and
5266                        query.NQ_LIVE in self.requested_data)
5267
5268     if self.do_locking:
5269       # If any non-static field is requested we need to lock the nodes
5270       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5271
5272   def DeclareLocks(self, lu, level):
5273     pass
5274
5275   def _GetQueryData(self, lu):
5276     """Computes the list of nodes and their attributes.
5277
5278     """
5279     all_info = lu.cfg.GetAllNodesInfo()
5280
5281     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5282
5283     # Gather data as requested
5284     if query.NQ_LIVE in self.requested_data:
5285       # filter out non-vm_capable nodes
5286       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5287
5288       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5289                                         [lu.cfg.GetHypervisorType()])
5290       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5291                        for (name, nresult) in node_data.items()
5292                        if not nresult.fail_msg and nresult.payload)
5293     else:
5294       live_data = None
5295
5296     if query.NQ_INST in self.requested_data:
5297       node_to_primary = dict([(name, set()) for name in nodenames])
5298       node_to_secondary = dict([(name, set()) for name in nodenames])
5299
5300       inst_data = lu.cfg.GetAllInstancesInfo()
5301
5302       for inst in inst_data.values():
5303         if inst.primary_node in node_to_primary:
5304           node_to_primary[inst.primary_node].add(inst.name)
5305         for secnode in inst.secondary_nodes:
5306           if secnode in node_to_secondary:
5307             node_to_secondary[secnode].add(inst.name)
5308     else:
5309       node_to_primary = None
5310       node_to_secondary = None
5311
5312     if query.NQ_OOB in self.requested_data:
5313       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5314                          for name, node in all_info.iteritems())
5315     else:
5316       oob_support = None
5317
5318     if query.NQ_GROUP in self.requested_data:
5319       groups = lu.cfg.GetAllNodeGroupsInfo()
5320     else:
5321       groups = {}
5322
5323     return query.NodeQueryData([all_info[name] for name in nodenames],
5324                                live_data, lu.cfg.GetMasterNode(),
5325                                node_to_primary, node_to_secondary, groups,
5326                                oob_support, lu.cfg.GetClusterInfo())
5327
5328
5329 class LUNodeQuery(NoHooksLU):
5330   """Logical unit for querying nodes.
5331
5332   """
5333   # pylint: disable=W0142
5334   REQ_BGL = False
5335
5336   def CheckArguments(self):
5337     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5338                          self.op.output_fields, self.op.use_locking)
5339
5340   def ExpandNames(self):
5341     self.nq.ExpandNames(self)
5342
5343   def DeclareLocks(self, level):
5344     self.nq.DeclareLocks(self, level)
5345
5346   def Exec(self, feedback_fn):
5347     return self.nq.OldStyleQuery(self)
5348
5349
5350 class LUNodeQueryvols(NoHooksLU):
5351   """Logical unit for getting volumes on node(s).
5352
5353   """
5354   REQ_BGL = False
5355   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5356   _FIELDS_STATIC = utils.FieldSet("node")
5357
5358   def CheckArguments(self):
5359     _CheckOutputFields(static=self._FIELDS_STATIC,
5360                        dynamic=self._FIELDS_DYNAMIC,
5361                        selected=self.op.output_fields)
5362
5363   def ExpandNames(self):
5364     self.share_locks = _ShareAll()
5365     self.needed_locks = {}
5366
5367     if not self.op.nodes:
5368       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5369     else:
5370       self.needed_locks[locking.LEVEL_NODE] = \
5371         _GetWantedNodes(self, self.op.nodes)
5372
5373   def Exec(self, feedback_fn):
5374     """Computes the list of nodes and their attributes.
5375
5376     """
5377     nodenames = self.owned_locks(locking.LEVEL_NODE)
5378     volumes = self.rpc.call_node_volumes(nodenames)
5379
5380     ilist = self.cfg.GetAllInstancesInfo()
5381     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5382
5383     output = []
5384     for node in nodenames:
5385       nresult = volumes[node]
5386       if nresult.offline:
5387         continue
5388       msg = nresult.fail_msg
5389       if msg:
5390         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5391         continue
5392
5393       node_vols = sorted(nresult.payload,
5394                          key=operator.itemgetter("dev"))
5395
5396       for vol in node_vols:
5397         node_output = []
5398         for field in self.op.output_fields:
5399           if field == "node":
5400             val = node
5401           elif field == "phys":
5402             val = vol["dev"]
5403           elif field == "vg":
5404             val = vol["vg"]
5405           elif field == "name":
5406             val = vol["name"]
5407           elif field == "size":
5408             val = int(float(vol["size"]))
5409           elif field == "instance":
5410             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5411           else:
5412             raise errors.ParameterError(field)
5413           node_output.append(str(val))
5414
5415         output.append(node_output)
5416
5417     return output
5418
5419
5420 class LUNodeQueryStorage(NoHooksLU):
5421   """Logical unit for getting information on storage units on node(s).
5422
5423   """
5424   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5425   REQ_BGL = False
5426
5427   def CheckArguments(self):
5428     _CheckOutputFields(static=self._FIELDS_STATIC,
5429                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5430                        selected=self.op.output_fields)
5431
5432   def ExpandNames(self):
5433     self.share_locks = _ShareAll()
5434     self.needed_locks = {}
5435
5436     if self.op.nodes:
5437       self.needed_locks[locking.LEVEL_NODE] = \
5438         _GetWantedNodes(self, self.op.nodes)
5439     else:
5440       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5441
5442   def Exec(self, feedback_fn):
5443     """Computes the list of nodes and their attributes.
5444
5445     """
5446     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5447
5448     # Always get name to sort by
5449     if constants.SF_NAME in self.op.output_fields:
5450       fields = self.op.output_fields[:]
5451     else:
5452       fields = [constants.SF_NAME] + self.op.output_fields
5453
5454     # Never ask for node or type as it's only known to the LU
5455     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5456       while extra in fields:
5457         fields.remove(extra)
5458
5459     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5460     name_idx = field_idx[constants.SF_NAME]
5461
5462     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5463     data = self.rpc.call_storage_list(self.nodes,
5464                                       self.op.storage_type, st_args,
5465                                       self.op.name, fields)
5466
5467     result = []
5468
5469     for node in utils.NiceSort(self.nodes):
5470       nresult = data[node]
5471       if nresult.offline:
5472         continue
5473
5474       msg = nresult.fail_msg
5475       if msg:
5476         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5477         continue
5478
5479       rows = dict([(row[name_idx], row) for row in nresult.payload])
5480
5481       for name in utils.NiceSort(rows.keys()):
5482         row = rows[name]
5483
5484         out = []
5485
5486         for field in self.op.output_fields:
5487           if field == constants.SF_NODE:
5488             val = node
5489           elif field == constants.SF_TYPE:
5490             val = self.op.storage_type
5491           elif field in field_idx:
5492             val = row[field_idx[field]]
5493           else:
5494             raise errors.ParameterError(field)
5495
5496           out.append(val)
5497
5498         result.append(out)
5499
5500     return result
5501
5502
5503 class _InstanceQuery(_QueryBase):
5504   FIELDS = query.INSTANCE_FIELDS
5505
5506   def ExpandNames(self, lu):
5507     lu.needed_locks = {}
5508     lu.share_locks = _ShareAll()
5509
5510     if self.names:
5511       self.wanted = _GetWantedInstances(lu, self.names)
5512     else:
5513       self.wanted = locking.ALL_SET
5514
5515     self.do_locking = (self.use_locking and
5516                        query.IQ_LIVE in self.requested_data)
5517     if self.do_locking:
5518       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5519       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5520       lu.needed_locks[locking.LEVEL_NODE] = []
5521       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5522
5523     self.do_grouplocks = (self.do_locking and
5524                           query.IQ_NODES in self.requested_data)
5525
5526   def DeclareLocks(self, lu, level):
5527     if self.do_locking:
5528       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5529         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5530
5531         # Lock all groups used by instances optimistically; this requires going
5532         # via the node before it's locked, requiring verification later on
5533         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5534           set(group_uuid
5535               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5536               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5537       elif level == locking.LEVEL_NODE:
5538         lu._LockInstancesNodes() # pylint: disable=W0212
5539
5540   @staticmethod
5541   def _CheckGroupLocks(lu):
5542     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5543     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5544
5545     # Check if node groups for locked instances are still correct
5546     for instance_name in owned_instances:
5547       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5548
5549   def _GetQueryData(self, lu):
5550     """Computes the list of instances and their attributes.
5551
5552     """
5553     if self.do_grouplocks:
5554       self._CheckGroupLocks(lu)
5555
5556     cluster = lu.cfg.GetClusterInfo()
5557     all_info = lu.cfg.GetAllInstancesInfo()
5558
5559     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5560
5561     instance_list = [all_info[name] for name in instance_names]
5562     nodes = frozenset(itertools.chain(*(inst.all_nodes
5563                                         for inst in instance_list)))
5564     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5565     bad_nodes = []
5566     offline_nodes = []
5567     wrongnode_inst = set()
5568
5569     # Gather data as requested
5570     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5571       live_data = {}
5572       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5573       for name in nodes:
5574         result = node_data[name]
5575         if result.offline:
5576           # offline nodes will be in both lists
5577           assert result.fail_msg
5578           offline_nodes.append(name)
5579         if result.fail_msg:
5580           bad_nodes.append(name)
5581         elif result.payload:
5582           for inst in result.payload:
5583             if inst in all_info:
5584               if all_info[inst].primary_node == name:
5585                 live_data.update(result.payload)
5586               else:
5587                 wrongnode_inst.add(inst)
5588             else:
5589               # orphan instance; we don't list it here as we don't
5590               # handle this case yet in the output of instance listing
5591               logging.warning("Orphan instance '%s' found on node %s",
5592                               inst, name)
5593         # else no instance is alive
5594     else:
5595       live_data = {}
5596
5597     if query.IQ_DISKUSAGE in self.requested_data:
5598       disk_usage = dict((inst.name,
5599                          _ComputeDiskSize(inst.disk_template,
5600                                           [{constants.IDISK_SIZE: disk.size}
5601                                            for disk in inst.disks]))
5602                         for inst in instance_list)
5603     else:
5604       disk_usage = None
5605
5606     if query.IQ_CONSOLE in self.requested_data:
5607       consinfo = {}
5608       for inst in instance_list:
5609         if inst.name in live_data:
5610           # Instance is running
5611           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5612         else:
5613           consinfo[inst.name] = None
5614       assert set(consinfo.keys()) == set(instance_names)
5615     else:
5616       consinfo = None
5617
5618     if query.IQ_NODES in self.requested_data:
5619       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5620                                             instance_list)))
5621       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5622       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5623                     for uuid in set(map(operator.attrgetter("group"),
5624                                         nodes.values())))
5625     else:
5626       nodes = None
5627       groups = None
5628
5629     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5630                                    disk_usage, offline_nodes, bad_nodes,
5631                                    live_data, wrongnode_inst, consinfo,
5632                                    nodes, groups)
5633
5634
5635 class LUQuery(NoHooksLU):
5636   """Query for resources/items of a certain kind.
5637
5638   """
5639   # pylint: disable=W0142
5640   REQ_BGL = False
5641
5642   def CheckArguments(self):
5643     qcls = _GetQueryImplementation(self.op.what)
5644
5645     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5646
5647   def ExpandNames(self):
5648     self.impl.ExpandNames(self)
5649
5650   def DeclareLocks(self, level):
5651     self.impl.DeclareLocks(self, level)
5652
5653   def Exec(self, feedback_fn):
5654     return self.impl.NewStyleQuery(self)
5655
5656
5657 class LUQueryFields(NoHooksLU):
5658   """Query for resources/items of a certain kind.
5659
5660   """
5661   # pylint: disable=W0142
5662   REQ_BGL = False
5663
5664   def CheckArguments(self):
5665     self.qcls = _GetQueryImplementation(self.op.what)
5666
5667   def ExpandNames(self):
5668     self.needed_locks = {}
5669
5670   def Exec(self, feedback_fn):
5671     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5672
5673
5674 class LUNodeModifyStorage(NoHooksLU):
5675   """Logical unit for modifying a storage volume on a node.
5676
5677   """
5678   REQ_BGL = False
5679
5680   def CheckArguments(self):
5681     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5682
5683     storage_type = self.op.storage_type
5684
5685     try:
5686       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5687     except KeyError:
5688       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5689                                  " modified" % storage_type,
5690                                  errors.ECODE_INVAL)
5691
5692     diff = set(self.op.changes.keys()) - modifiable
5693     if diff:
5694       raise errors.OpPrereqError("The following fields can not be modified for"
5695                                  " storage units of type '%s': %r" %
5696                                  (storage_type, list(diff)),
5697                                  errors.ECODE_INVAL)
5698
5699   def ExpandNames(self):
5700     self.needed_locks = {
5701       locking.LEVEL_NODE: self.op.node_name,
5702       }
5703
5704   def Exec(self, feedback_fn):
5705     """Computes the list of nodes and their attributes.
5706
5707     """
5708     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5709     result = self.rpc.call_storage_modify(self.op.node_name,
5710                                           self.op.storage_type, st_args,
5711                                           self.op.name, self.op.changes)
5712     result.Raise("Failed to modify storage unit '%s' on %s" %
5713                  (self.op.name, self.op.node_name))
5714
5715
5716 class LUNodeAdd(LogicalUnit):
5717   """Logical unit for adding node to the cluster.
5718
5719   """
5720   HPATH = "node-add"
5721   HTYPE = constants.HTYPE_NODE
5722   _NFLAGS = ["master_capable", "vm_capable"]
5723
5724   def CheckArguments(self):
5725     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5726     # validate/normalize the node name
5727     self.hostname = netutils.GetHostname(name=self.op.node_name,
5728                                          family=self.primary_ip_family)
5729     self.op.node_name = self.hostname.name
5730
5731     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5732       raise errors.OpPrereqError("Cannot readd the master node",
5733                                  errors.ECODE_STATE)
5734
5735     if self.op.readd and self.op.group:
5736       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5737                                  " being readded", errors.ECODE_INVAL)
5738
5739   def BuildHooksEnv(self):
5740     """Build hooks env.
5741
5742     This will run on all nodes before, and on all nodes + the new node after.
5743
5744     """
5745     return {
5746       "OP_TARGET": self.op.node_name,
5747       "NODE_NAME": self.op.node_name,
5748       "NODE_PIP": self.op.primary_ip,
5749       "NODE_SIP": self.op.secondary_ip,
5750       "MASTER_CAPABLE": str(self.op.master_capable),
5751       "VM_CAPABLE": str(self.op.vm_capable),
5752       }
5753
5754   def BuildHooksNodes(self):
5755     """Build hooks nodes.
5756
5757     """
5758     # Exclude added node
5759     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5760     post_nodes = pre_nodes + [self.op.node_name, ]
5761
5762     return (pre_nodes, post_nodes)
5763
5764   def CheckPrereq(self):
5765     """Check prerequisites.
5766
5767     This checks:
5768      - the new node is not already in the config
5769      - it is resolvable
5770      - its parameters (single/dual homed) matches the cluster
5771
5772     Any errors are signaled by raising errors.OpPrereqError.
5773
5774     """
5775     cfg = self.cfg
5776     hostname = self.hostname
5777     node = hostname.name
5778     primary_ip = self.op.primary_ip = hostname.ip
5779     if self.op.secondary_ip is None:
5780       if self.primary_ip_family == netutils.IP6Address.family:
5781         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5782                                    " IPv4 address must be given as secondary",
5783                                    errors.ECODE_INVAL)
5784       self.op.secondary_ip = primary_ip
5785
5786     secondary_ip = self.op.secondary_ip
5787     if not netutils.IP4Address.IsValid(secondary_ip):
5788       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5789                                  " address" % secondary_ip, errors.ECODE_INVAL)
5790
5791     node_list = cfg.GetNodeList()
5792     if not self.op.readd and node in node_list:
5793       raise errors.OpPrereqError("Node %s is already in the configuration" %
5794                                  node, errors.ECODE_EXISTS)
5795     elif self.op.readd and node not in node_list:
5796       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5797                                  errors.ECODE_NOENT)
5798
5799     self.changed_primary_ip = False
5800
5801     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5802       if self.op.readd and node == existing_node_name:
5803         if existing_node.secondary_ip != secondary_ip:
5804           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5805                                      " address configuration as before",
5806                                      errors.ECODE_INVAL)
5807         if existing_node.primary_ip != primary_ip:
5808           self.changed_primary_ip = True
5809
5810         continue
5811
5812       if (existing_node.primary_ip == primary_ip or
5813           existing_node.secondary_ip == primary_ip or
5814           existing_node.primary_ip == secondary_ip or
5815           existing_node.secondary_ip == secondary_ip):
5816         raise errors.OpPrereqError("New node ip address(es) conflict with"
5817                                    " existing node %s" % existing_node.name,
5818                                    errors.ECODE_NOTUNIQUE)
5819
5820     # After this 'if' block, None is no longer a valid value for the
5821     # _capable op attributes
5822     if self.op.readd:
5823       old_node = self.cfg.GetNodeInfo(node)
5824       assert old_node is not None, "Can't retrieve locked node %s" % node
5825       for attr in self._NFLAGS:
5826         if getattr(self.op, attr) is None:
5827           setattr(self.op, attr, getattr(old_node, attr))
5828     else:
5829       for attr in self._NFLAGS:
5830         if getattr(self.op, attr) is None:
5831           setattr(self.op, attr, True)
5832
5833     if self.op.readd and not self.op.vm_capable:
5834       pri, sec = cfg.GetNodeInstances(node)
5835       if pri or sec:
5836         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5837                                    " flag set to false, but it already holds"
5838                                    " instances" % node,
5839                                    errors.ECODE_STATE)
5840
5841     # check that the type of the node (single versus dual homed) is the
5842     # same as for the master
5843     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5844     master_singlehomed = myself.secondary_ip == myself.primary_ip
5845     newbie_singlehomed = secondary_ip == primary_ip
5846     if master_singlehomed != newbie_singlehomed:
5847       if master_singlehomed:
5848         raise errors.OpPrereqError("The master has no secondary ip but the"
5849                                    " new node has one",
5850                                    errors.ECODE_INVAL)
5851       else:
5852         raise errors.OpPrereqError("The master has a secondary ip but the"
5853                                    " new node doesn't have one",
5854                                    errors.ECODE_INVAL)
5855
5856     # checks reachability
5857     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5858       raise errors.OpPrereqError("Node not reachable by ping",
5859                                  errors.ECODE_ENVIRON)
5860
5861     if not newbie_singlehomed:
5862       # check reachability from my secondary ip to newbie's secondary ip
5863       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5864                            source=myself.secondary_ip):
5865         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5866                                    " based ping to node daemon port",
5867                                    errors.ECODE_ENVIRON)
5868
5869     if self.op.readd:
5870       exceptions = [node]
5871     else:
5872       exceptions = []
5873
5874     if self.op.master_capable:
5875       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5876     else:
5877       self.master_candidate = False
5878
5879     if self.op.readd:
5880       self.new_node = old_node
5881     else:
5882       node_group = cfg.LookupNodeGroup(self.op.group)
5883       self.new_node = objects.Node(name=node,
5884                                    primary_ip=primary_ip,
5885                                    secondary_ip=secondary_ip,
5886                                    master_candidate=self.master_candidate,
5887                                    offline=False, drained=False,
5888                                    group=node_group)
5889
5890     if self.op.ndparams:
5891       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5892
5893     if self.op.hv_state:
5894       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5895
5896     if self.op.disk_state:
5897       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5898
5899     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5900     #       it a property on the base class.
5901     result = rpc.DnsOnlyRunner().call_version([node])[node]
5902     result.Raise("Can't get version information from node %s" % node)
5903     if constants.PROTOCOL_VERSION == result.payload:
5904       logging.info("Communication to node %s fine, sw version %s match",
5905                    node, result.payload)
5906     else:
5907       raise errors.OpPrereqError("Version mismatch master version %s,"
5908                                  " node version %s" %
5909                                  (constants.PROTOCOL_VERSION, result.payload),
5910                                  errors.ECODE_ENVIRON)
5911
5912   def Exec(self, feedback_fn):
5913     """Adds the new node to the cluster.
5914
5915     """
5916     new_node = self.new_node
5917     node = new_node.name
5918
5919     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5920       "Not owning BGL"
5921
5922     # We adding a new node so we assume it's powered
5923     new_node.powered = True
5924
5925     # for re-adds, reset the offline/drained/master-candidate flags;
5926     # we need to reset here, otherwise offline would prevent RPC calls
5927     # later in the procedure; this also means that if the re-add
5928     # fails, we are left with a non-offlined, broken node
5929     if self.op.readd:
5930       new_node.drained = new_node.offline = False # pylint: disable=W0201
5931       self.LogInfo("Readding a node, the offline/drained flags were reset")
5932       # if we demote the node, we do cleanup later in the procedure
5933       new_node.master_candidate = self.master_candidate
5934       if self.changed_primary_ip:
5935         new_node.primary_ip = self.op.primary_ip
5936
5937     # copy the master/vm_capable flags
5938     for attr in self._NFLAGS:
5939       setattr(new_node, attr, getattr(self.op, attr))
5940
5941     # notify the user about any possible mc promotion
5942     if new_node.master_candidate:
5943       self.LogInfo("Node will be a master candidate")
5944
5945     if self.op.ndparams:
5946       new_node.ndparams = self.op.ndparams
5947     else:
5948       new_node.ndparams = {}
5949
5950     if self.op.hv_state:
5951       new_node.hv_state_static = self.new_hv_state
5952
5953     if self.op.disk_state:
5954       new_node.disk_state_static = self.new_disk_state
5955
5956     # Add node to our /etc/hosts, and add key to known_hosts
5957     if self.cfg.GetClusterInfo().modify_etc_hosts:
5958       master_node = self.cfg.GetMasterNode()
5959       result = self.rpc.call_etc_hosts_modify(master_node,
5960                                               constants.ETC_HOSTS_ADD,
5961                                               self.hostname.name,
5962                                               self.hostname.ip)
5963       result.Raise("Can't update hosts file with new host data")
5964
5965     if new_node.secondary_ip != new_node.primary_ip:
5966       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5967                                False)
5968
5969     node_verify_list = [self.cfg.GetMasterNode()]
5970     node_verify_param = {
5971       constants.NV_NODELIST: ([node], {}),
5972       # TODO: do a node-net-test as well?
5973     }
5974
5975     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5976                                        self.cfg.GetClusterName())
5977     for verifier in node_verify_list:
5978       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5979       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5980       if nl_payload:
5981         for failed in nl_payload:
5982           feedback_fn("ssh/hostname verification failed"
5983                       " (checking from %s): %s" %
5984                       (verifier, nl_payload[failed]))
5985         raise errors.OpExecError("ssh/hostname verification failed")
5986
5987     if self.op.readd:
5988       _RedistributeAncillaryFiles(self)
5989       self.context.ReaddNode(new_node)
5990       # make sure we redistribute the config
5991       self.cfg.Update(new_node, feedback_fn)
5992       # and make sure the new node will not have old files around
5993       if not new_node.master_candidate:
5994         result = self.rpc.call_node_demote_from_mc(new_node.name)
5995         msg = result.fail_msg
5996         if msg:
5997           self.LogWarning("Node failed to demote itself from master"
5998                           " candidate status: %s" % msg)
5999     else:
6000       _RedistributeAncillaryFiles(self, additional_nodes=[node],
6001                                   additional_vm=self.op.vm_capable)
6002       self.context.AddNode(new_node, self.proc.GetECId())
6003
6004
6005 class LUNodeSetParams(LogicalUnit):
6006   """Modifies the parameters of a node.
6007
6008   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6009       to the node role (as _ROLE_*)
6010   @cvar _R2F: a dictionary from node role to tuples of flags
6011   @cvar _FLAGS: a list of attribute names corresponding to the flags
6012
6013   """
6014   HPATH = "node-modify"
6015   HTYPE = constants.HTYPE_NODE
6016   REQ_BGL = False
6017   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6018   _F2R = {
6019     (True, False, False): _ROLE_CANDIDATE,
6020     (False, True, False): _ROLE_DRAINED,
6021     (False, False, True): _ROLE_OFFLINE,
6022     (False, False, False): _ROLE_REGULAR,
6023     }
6024   _R2F = dict((v, k) for k, v in _F2R.items())
6025   _FLAGS = ["master_candidate", "drained", "offline"]
6026
6027   def CheckArguments(self):
6028     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6029     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6030                 self.op.master_capable, self.op.vm_capable,
6031                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6032                 self.op.disk_state]
6033     if all_mods.count(None) == len(all_mods):
6034       raise errors.OpPrereqError("Please pass at least one modification",
6035                                  errors.ECODE_INVAL)
6036     if all_mods.count(True) > 1:
6037       raise errors.OpPrereqError("Can't set the node into more than one"
6038                                  " state at the same time",
6039                                  errors.ECODE_INVAL)
6040
6041     # Boolean value that tells us whether we might be demoting from MC
6042     self.might_demote = (self.op.master_candidate == False or
6043                          self.op.offline == True or
6044                          self.op.drained == True or
6045                          self.op.master_capable == False)
6046
6047     if self.op.secondary_ip:
6048       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6049         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6050                                    " address" % self.op.secondary_ip,
6051                                    errors.ECODE_INVAL)
6052
6053     self.lock_all = self.op.auto_promote and self.might_demote
6054     self.lock_instances = self.op.secondary_ip is not None
6055
6056   def _InstanceFilter(self, instance):
6057     """Filter for getting affected instances.
6058
6059     """
6060     return (instance.disk_template in constants.DTS_INT_MIRROR and
6061             self.op.node_name in instance.all_nodes)
6062
6063   def ExpandNames(self):
6064     if self.lock_all:
6065       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
6066     else:
6067       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
6068
6069     # Since modifying a node can have severe effects on currently running
6070     # operations the resource lock is at least acquired in shared mode
6071     self.needed_locks[locking.LEVEL_NODE_RES] = \
6072       self.needed_locks[locking.LEVEL_NODE]
6073
6074     # Get node resource and instance locks in shared mode; they are not used
6075     # for anything but read-only access
6076     self.share_locks[locking.LEVEL_NODE_RES] = 1
6077     self.share_locks[locking.LEVEL_INSTANCE] = 1
6078
6079     if self.lock_instances:
6080       self.needed_locks[locking.LEVEL_INSTANCE] = \
6081         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6082
6083   def BuildHooksEnv(self):
6084     """Build hooks env.
6085
6086     This runs on the master node.
6087
6088     """
6089     return {
6090       "OP_TARGET": self.op.node_name,
6091       "MASTER_CANDIDATE": str(self.op.master_candidate),
6092       "OFFLINE": str(self.op.offline),
6093       "DRAINED": str(self.op.drained),
6094       "MASTER_CAPABLE": str(self.op.master_capable),
6095       "VM_CAPABLE": str(self.op.vm_capable),
6096       }
6097
6098   def BuildHooksNodes(self):
6099     """Build hooks nodes.
6100
6101     """
6102     nl = [self.cfg.GetMasterNode(), self.op.node_name]
6103     return (nl, nl)
6104
6105   def CheckPrereq(self):
6106     """Check prerequisites.
6107
6108     This only checks the instance list against the existing names.
6109
6110     """
6111     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6112
6113     if self.lock_instances:
6114       affected_instances = \
6115         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6116
6117       # Verify instance locks
6118       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6119       wanted_instances = frozenset(affected_instances.keys())
6120       if wanted_instances - owned_instances:
6121         raise errors.OpPrereqError("Instances affected by changing node %s's"
6122                                    " secondary IP address have changed since"
6123                                    " locks were acquired, wanted '%s', have"
6124                                    " '%s'; retry the operation" %
6125                                    (self.op.node_name,
6126                                     utils.CommaJoin(wanted_instances),
6127                                     utils.CommaJoin(owned_instances)),
6128                                    errors.ECODE_STATE)
6129     else:
6130       affected_instances = None
6131
6132     if (self.op.master_candidate is not None or
6133         self.op.drained is not None or
6134         self.op.offline is not None):
6135       # we can't change the master's node flags
6136       if self.op.node_name == self.cfg.GetMasterNode():
6137         raise errors.OpPrereqError("The master role can be changed"
6138                                    " only via master-failover",
6139                                    errors.ECODE_INVAL)
6140
6141     if self.op.master_candidate and not node.master_capable:
6142       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6143                                  " it a master candidate" % node.name,
6144                                  errors.ECODE_STATE)
6145
6146     if self.op.vm_capable == False:
6147       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6148       if ipri or isec:
6149         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6150                                    " the vm_capable flag" % node.name,
6151                                    errors.ECODE_STATE)
6152
6153     if node.master_candidate and self.might_demote and not self.lock_all:
6154       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6155       # check if after removing the current node, we're missing master
6156       # candidates
6157       (mc_remaining, mc_should, _) = \
6158           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6159       if mc_remaining < mc_should:
6160         raise errors.OpPrereqError("Not enough master candidates, please"
6161                                    " pass auto promote option to allow"
6162                                    " promotion (--auto-promote or RAPI"
6163                                    " auto_promote=True)", errors.ECODE_STATE)
6164
6165     self.old_flags = old_flags = (node.master_candidate,
6166                                   node.drained, node.offline)
6167     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6168     self.old_role = old_role = self._F2R[old_flags]
6169
6170     # Check for ineffective changes
6171     for attr in self._FLAGS:
6172       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
6173         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6174         setattr(self.op, attr, None)
6175
6176     # Past this point, any flag change to False means a transition
6177     # away from the respective state, as only real changes are kept
6178
6179     # TODO: We might query the real power state if it supports OOB
6180     if _SupportsOob(self.cfg, node):
6181       if self.op.offline is False and not (node.powered or
6182                                            self.op.powered == True):
6183         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6184                                     " offline status can be reset") %
6185                                    self.op.node_name)
6186     elif self.op.powered is not None:
6187       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6188                                   " as it does not support out-of-band"
6189                                   " handling") % self.op.node_name)
6190
6191     # If we're being deofflined/drained, we'll MC ourself if needed
6192     if (self.op.drained == False or self.op.offline == False or
6193         (self.op.master_capable and not node.master_capable)):
6194       if _DecideSelfPromotion(self):
6195         self.op.master_candidate = True
6196         self.LogInfo("Auto-promoting node to master candidate")
6197
6198     # If we're no longer master capable, we'll demote ourselves from MC
6199     if self.op.master_capable == False and node.master_candidate:
6200       self.LogInfo("Demoting from master candidate")
6201       self.op.master_candidate = False
6202
6203     # Compute new role
6204     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6205     if self.op.master_candidate:
6206       new_role = self._ROLE_CANDIDATE
6207     elif self.op.drained:
6208       new_role = self._ROLE_DRAINED
6209     elif self.op.offline:
6210       new_role = self._ROLE_OFFLINE
6211     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6212       # False is still in new flags, which means we're un-setting (the
6213       # only) True flag
6214       new_role = self._ROLE_REGULAR
6215     else: # no new flags, nothing, keep old role
6216       new_role = old_role
6217
6218     self.new_role = new_role
6219
6220     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6221       # Trying to transition out of offline status
6222       result = self.rpc.call_version([node.name])[node.name]
6223       if result.fail_msg:
6224         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6225                                    " to report its version: %s" %
6226                                    (node.name, result.fail_msg),
6227                                    errors.ECODE_STATE)
6228       else:
6229         self.LogWarning("Transitioning node from offline to online state"
6230                         " without using re-add. Please make sure the node"
6231                         " is healthy!")
6232
6233     if self.op.secondary_ip:
6234       # Ok even without locking, because this can't be changed by any LU
6235       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6236       master_singlehomed = master.secondary_ip == master.primary_ip
6237       if master_singlehomed and self.op.secondary_ip:
6238         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6239                                    " homed cluster", errors.ECODE_INVAL)
6240
6241       assert not (frozenset(affected_instances) -
6242                   self.owned_locks(locking.LEVEL_INSTANCE))
6243
6244       if node.offline:
6245         if affected_instances:
6246           raise errors.OpPrereqError("Cannot change secondary IP address:"
6247                                      " offline node has instances (%s)"
6248                                      " configured to use it" %
6249                                      utils.CommaJoin(affected_instances.keys()))
6250       else:
6251         # On online nodes, check that no instances are running, and that
6252         # the node has the new ip and we can reach it.
6253         for instance in affected_instances.values():
6254           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6255                               msg="cannot change secondary ip")
6256
6257         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6258         if master.name != node.name:
6259           # check reachability from master secondary ip to new secondary ip
6260           if not netutils.TcpPing(self.op.secondary_ip,
6261                                   constants.DEFAULT_NODED_PORT,
6262                                   source=master.secondary_ip):
6263             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6264                                        " based ping to node daemon port",
6265                                        errors.ECODE_ENVIRON)
6266
6267     if self.op.ndparams:
6268       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6269       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6270       self.new_ndparams = new_ndparams
6271
6272     if self.op.hv_state:
6273       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6274                                                  self.node.hv_state_static)
6275
6276     if self.op.disk_state:
6277       self.new_disk_state = \
6278         _MergeAndVerifyDiskState(self.op.disk_state,
6279                                  self.node.disk_state_static)
6280
6281   def Exec(self, feedback_fn):
6282     """Modifies a node.
6283
6284     """
6285     node = self.node
6286     old_role = self.old_role
6287     new_role = self.new_role
6288
6289     result = []
6290
6291     if self.op.ndparams:
6292       node.ndparams = self.new_ndparams
6293
6294     if self.op.powered is not None:
6295       node.powered = self.op.powered
6296
6297     if self.op.hv_state:
6298       node.hv_state_static = self.new_hv_state
6299
6300     if self.op.disk_state:
6301       node.disk_state_static = self.new_disk_state
6302
6303     for attr in ["master_capable", "vm_capable"]:
6304       val = getattr(self.op, attr)
6305       if val is not None:
6306         setattr(node, attr, val)
6307         result.append((attr, str(val)))
6308
6309     if new_role != old_role:
6310       # Tell the node to demote itself, if no longer MC and not offline
6311       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6312         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6313         if msg:
6314           self.LogWarning("Node failed to demote itself: %s", msg)
6315
6316       new_flags = self._R2F[new_role]
6317       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6318         if of != nf:
6319           result.append((desc, str(nf)))
6320       (node.master_candidate, node.drained, node.offline) = new_flags
6321
6322       # we locked all nodes, we adjust the CP before updating this node
6323       if self.lock_all:
6324         _AdjustCandidatePool(self, [node.name])
6325
6326     if self.op.secondary_ip:
6327       node.secondary_ip = self.op.secondary_ip
6328       result.append(("secondary_ip", self.op.secondary_ip))
6329
6330     # this will trigger configuration file update, if needed
6331     self.cfg.Update(node, feedback_fn)
6332
6333     # this will trigger job queue propagation or cleanup if the mc
6334     # flag changed
6335     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6336       self.context.ReaddNode(node)
6337
6338     return result
6339
6340
6341 class LUNodePowercycle(NoHooksLU):
6342   """Powercycles a node.
6343
6344   """
6345   REQ_BGL = False
6346
6347   def CheckArguments(self):
6348     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6349     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6350       raise errors.OpPrereqError("The node is the master and the force"
6351                                  " parameter was not set",
6352                                  errors.ECODE_INVAL)
6353
6354   def ExpandNames(self):
6355     """Locking for PowercycleNode.
6356
6357     This is a last-resort option and shouldn't block on other
6358     jobs. Therefore, we grab no locks.
6359
6360     """
6361     self.needed_locks = {}
6362
6363   def Exec(self, feedback_fn):
6364     """Reboots a node.
6365
6366     """
6367     result = self.rpc.call_node_powercycle(self.op.node_name,
6368                                            self.cfg.GetHypervisorType())
6369     result.Raise("Failed to schedule the reboot")
6370     return result.payload
6371
6372
6373 class LUClusterQuery(NoHooksLU):
6374   """Query cluster configuration.
6375
6376   """
6377   REQ_BGL = False
6378
6379   def ExpandNames(self):
6380     self.needed_locks = {}
6381
6382   def Exec(self, feedback_fn):
6383     """Return cluster config.
6384
6385     """
6386     cluster = self.cfg.GetClusterInfo()
6387     os_hvp = {}
6388
6389     # Filter just for enabled hypervisors
6390     for os_name, hv_dict in cluster.os_hvp.items():
6391       os_hvp[os_name] = {}
6392       for hv_name, hv_params in hv_dict.items():
6393         if hv_name in cluster.enabled_hypervisors:
6394           os_hvp[os_name][hv_name] = hv_params
6395
6396     # Convert ip_family to ip_version
6397     primary_ip_version = constants.IP4_VERSION
6398     if cluster.primary_ip_family == netutils.IP6Address.family:
6399       primary_ip_version = constants.IP6_VERSION
6400
6401     result = {
6402       "software_version": constants.RELEASE_VERSION,
6403       "protocol_version": constants.PROTOCOL_VERSION,
6404       "config_version": constants.CONFIG_VERSION,
6405       "os_api_version": max(constants.OS_API_VERSIONS),
6406       "export_version": constants.EXPORT_VERSION,
6407       "architecture": runtime.GetArchInfo(),
6408       "name": cluster.cluster_name,
6409       "master": cluster.master_node,
6410       "default_hypervisor": cluster.primary_hypervisor,
6411       "enabled_hypervisors": cluster.enabled_hypervisors,
6412       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6413                         for hypervisor_name in cluster.enabled_hypervisors]),
6414       "os_hvp": os_hvp,
6415       "beparams": cluster.beparams,
6416       "osparams": cluster.osparams,
6417       "ipolicy": cluster.ipolicy,
6418       "nicparams": cluster.nicparams,
6419       "ndparams": cluster.ndparams,
6420       "diskparams": cluster.diskparams,
6421       "candidate_pool_size": cluster.candidate_pool_size,
6422       "master_netdev": cluster.master_netdev,
6423       "master_netmask": cluster.master_netmask,
6424       "use_external_mip_script": cluster.use_external_mip_script,
6425       "volume_group_name": cluster.volume_group_name,
6426       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6427       "file_storage_dir": cluster.file_storage_dir,
6428       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6429       "maintain_node_health": cluster.maintain_node_health,
6430       "ctime": cluster.ctime,
6431       "mtime": cluster.mtime,
6432       "uuid": cluster.uuid,
6433       "tags": list(cluster.GetTags()),
6434       "uid_pool": cluster.uid_pool,
6435       "default_iallocator": cluster.default_iallocator,
6436       "reserved_lvs": cluster.reserved_lvs,
6437       "primary_ip_version": primary_ip_version,
6438       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6439       "hidden_os": cluster.hidden_os,
6440       "blacklisted_os": cluster.blacklisted_os,
6441       }
6442
6443     return result
6444
6445
6446 class LUClusterConfigQuery(NoHooksLU):
6447   """Return configuration values.
6448
6449   """
6450   REQ_BGL = False
6451
6452   def CheckArguments(self):
6453     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6454
6455   def ExpandNames(self):
6456     self.cq.ExpandNames(self)
6457
6458   def DeclareLocks(self, level):
6459     self.cq.DeclareLocks(self, level)
6460
6461   def Exec(self, feedback_fn):
6462     result = self.cq.OldStyleQuery(self)
6463
6464     assert len(result) == 1
6465
6466     return result[0]
6467
6468
6469 class _ClusterQuery(_QueryBase):
6470   FIELDS = query.CLUSTER_FIELDS
6471
6472   #: Do not sort (there is only one item)
6473   SORT_FIELD = None
6474
6475   def ExpandNames(self, lu):
6476     lu.needed_locks = {}
6477
6478     # The following variables interact with _QueryBase._GetNames
6479     self.wanted = locking.ALL_SET
6480     self.do_locking = self.use_locking
6481
6482     if self.do_locking:
6483       raise errors.OpPrereqError("Can not use locking for cluster queries",
6484                                  errors.ECODE_INVAL)
6485
6486   def DeclareLocks(self, lu, level):
6487     pass
6488
6489   def _GetQueryData(self, lu):
6490     """Computes the list of nodes and their attributes.
6491
6492     """
6493     # Locking is not used
6494     assert not (compat.any(lu.glm.is_owned(level)
6495                            for level in locking.LEVELS
6496                            if level != locking.LEVEL_CLUSTER) or
6497                 self.do_locking or self.use_locking)
6498
6499     if query.CQ_CONFIG in self.requested_data:
6500       cluster = lu.cfg.GetClusterInfo()
6501     else:
6502       cluster = NotImplemented
6503
6504     if query.CQ_QUEUE_DRAINED in self.requested_data:
6505       drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6506     else:
6507       drain_flag = NotImplemented
6508
6509     if query.CQ_WATCHER_PAUSE in self.requested_data:
6510       watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6511     else:
6512       watcher_pause = NotImplemented
6513
6514     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6515
6516
6517 class LUInstanceActivateDisks(NoHooksLU):
6518   """Bring up an instance's disks.
6519
6520   """
6521   REQ_BGL = False
6522
6523   def ExpandNames(self):
6524     self._ExpandAndLockInstance()
6525     self.needed_locks[locking.LEVEL_NODE] = []
6526     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6527
6528   def DeclareLocks(self, level):
6529     if level == locking.LEVEL_NODE:
6530       self._LockInstancesNodes()
6531
6532   def CheckPrereq(self):
6533     """Check prerequisites.
6534
6535     This checks that the instance is in the cluster.
6536
6537     """
6538     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6539     assert self.instance is not None, \
6540       "Cannot retrieve locked instance %s" % self.op.instance_name
6541     _CheckNodeOnline(self, self.instance.primary_node)
6542
6543   def Exec(self, feedback_fn):
6544     """Activate the disks.
6545
6546     """
6547     disks_ok, disks_info = \
6548               _AssembleInstanceDisks(self, self.instance,
6549                                      ignore_size=self.op.ignore_size)
6550     if not disks_ok:
6551       raise errors.OpExecError("Cannot activate block devices")
6552
6553     return disks_info
6554
6555
6556 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6557                            ignore_size=False, check=True):
6558   """Prepare the block devices for an instance.
6559
6560   This sets up the block devices on all nodes.
6561
6562   @type lu: L{LogicalUnit}
6563   @param lu: the logical unit on whose behalf we execute
6564   @type instance: L{objects.Instance}
6565   @param instance: the instance for whose disks we assemble
6566   @type disks: list of L{objects.Disk} or None
6567   @param disks: which disks to assemble (or all, if None)
6568   @type ignore_secondaries: boolean
6569   @param ignore_secondaries: if true, errors on secondary nodes
6570       won't result in an error return from the function
6571   @type ignore_size: boolean
6572   @param ignore_size: if true, the current known size of the disk
6573       will not be used during the disk activation, useful for cases
6574       when the size is wrong
6575   @return: False if the operation failed, otherwise a list of
6576       (host, instance_visible_name, node_visible_name)
6577       with the mapping from node devices to instance devices
6578
6579   """
6580   device_info = []
6581   disks_ok = True
6582   iname = instance.name
6583   if check:
6584     disks = _ExpandCheckDisks(instance, disks)
6585
6586   # With the two passes mechanism we try to reduce the window of
6587   # opportunity for the race condition of switching DRBD to primary
6588   # before handshaking occured, but we do not eliminate it
6589
6590   # The proper fix would be to wait (with some limits) until the
6591   # connection has been made and drbd transitions from WFConnection
6592   # into any other network-connected state (Connected, SyncTarget,
6593   # SyncSource, etc.)
6594
6595   # 1st pass, assemble on all nodes in secondary mode
6596   for idx, inst_disk in enumerate(disks):
6597     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6598       if ignore_size:
6599         node_disk = node_disk.Copy()
6600         node_disk.UnsetSize()
6601       lu.cfg.SetDiskID(node_disk, node)
6602       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6603                                              False, idx)
6604       msg = result.fail_msg
6605       if msg:
6606         is_offline_secondary = (node in instance.secondary_nodes and
6607                                 result.offline)
6608         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6609                            " (is_primary=False, pass=1): %s",
6610                            inst_disk.iv_name, node, msg)
6611         if not (ignore_secondaries or is_offline_secondary):
6612           disks_ok = False
6613
6614   # FIXME: race condition on drbd migration to primary
6615
6616   # 2nd pass, do only the primary node
6617   for idx, inst_disk in enumerate(disks):
6618     dev_path = None
6619
6620     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6621       if node != instance.primary_node:
6622         continue
6623       if ignore_size:
6624         node_disk = node_disk.Copy()
6625         node_disk.UnsetSize()
6626       lu.cfg.SetDiskID(node_disk, node)
6627       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6628                                              True, idx)
6629       msg = result.fail_msg
6630       if msg:
6631         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6632                            " (is_primary=True, pass=2): %s",
6633                            inst_disk.iv_name, node, msg)
6634         disks_ok = False
6635       else:
6636         dev_path = result.payload
6637
6638     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6639
6640   # leave the disks configured for the primary node
6641   # this is a workaround that would be fixed better by
6642   # improving the logical/physical id handling
6643   for disk in disks:
6644     lu.cfg.SetDiskID(disk, instance.primary_node)
6645
6646   return disks_ok, device_info
6647
6648
6649 def _StartInstanceDisks(lu, instance, force):
6650   """Start the disks of an instance.
6651
6652   """
6653   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6654                                            ignore_secondaries=force)
6655   if not disks_ok:
6656     _ShutdownInstanceDisks(lu, instance)
6657     if force is not None and not force:
6658       lu.proc.LogWarning("", hint="If the message above refers to a"
6659                          " secondary node,"
6660                          " you can retry the operation using '--force'.")
6661     raise errors.OpExecError("Disk consistency error")
6662
6663
6664 class LUInstanceDeactivateDisks(NoHooksLU):
6665   """Shutdown an instance's disks.
6666
6667   """
6668   REQ_BGL = False
6669
6670   def ExpandNames(self):
6671     self._ExpandAndLockInstance()
6672     self.needed_locks[locking.LEVEL_NODE] = []
6673     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6674
6675   def DeclareLocks(self, level):
6676     if level == locking.LEVEL_NODE:
6677       self._LockInstancesNodes()
6678
6679   def CheckPrereq(self):
6680     """Check prerequisites.
6681
6682     This checks that the instance is in the cluster.
6683
6684     """
6685     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6686     assert self.instance is not None, \
6687       "Cannot retrieve locked instance %s" % self.op.instance_name
6688
6689   def Exec(self, feedback_fn):
6690     """Deactivate the disks
6691
6692     """
6693     instance = self.instance
6694     if self.op.force:
6695       _ShutdownInstanceDisks(self, instance)
6696     else:
6697       _SafeShutdownInstanceDisks(self, instance)
6698
6699
6700 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6701   """Shutdown block devices of an instance.
6702
6703   This function checks if an instance is running, before calling
6704   _ShutdownInstanceDisks.
6705
6706   """
6707   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6708   _ShutdownInstanceDisks(lu, instance, disks=disks)
6709
6710
6711 def _ExpandCheckDisks(instance, disks):
6712   """Return the instance disks selected by the disks list
6713
6714   @type disks: list of L{objects.Disk} or None
6715   @param disks: selected disks
6716   @rtype: list of L{objects.Disk}
6717   @return: selected instance disks to act on
6718
6719   """
6720   if disks is None:
6721     return instance.disks
6722   else:
6723     if not set(disks).issubset(instance.disks):
6724       raise errors.ProgrammerError("Can only act on disks belonging to the"
6725                                    " target instance")
6726     return disks
6727
6728
6729 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6730   """Shutdown block devices of an instance.
6731
6732   This does the shutdown on all nodes of the instance.
6733
6734   If the ignore_primary is false, errors on the primary node are
6735   ignored.
6736
6737   """
6738   all_result = True
6739   disks = _ExpandCheckDisks(instance, disks)
6740
6741   for disk in disks:
6742     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6743       lu.cfg.SetDiskID(top_disk, node)
6744       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6745       msg = result.fail_msg
6746       if msg:
6747         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6748                       disk.iv_name, node, msg)
6749         if ((node == instance.primary_node and not ignore_primary) or
6750             (node != instance.primary_node and not result.offline)):
6751           all_result = False
6752   return all_result
6753
6754
6755 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6756   """Checks if a node has enough free memory.
6757
6758   This function check if a given node has the needed amount of free
6759   memory. In case the node has less memory or we cannot get the
6760   information from the node, this function raise an OpPrereqError
6761   exception.
6762
6763   @type lu: C{LogicalUnit}
6764   @param lu: a logical unit from which we get configuration data
6765   @type node: C{str}
6766   @param node: the node to check
6767   @type reason: C{str}
6768   @param reason: string to use in the error message
6769   @type requested: C{int}
6770   @param requested: the amount of memory in MiB to check for
6771   @type hypervisor_name: C{str}
6772   @param hypervisor_name: the hypervisor to ask for memory stats
6773   @rtype: integer
6774   @return: node current free memory
6775   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6776       we cannot check the node
6777
6778   """
6779   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6780   nodeinfo[node].Raise("Can't get data from node %s" % node,
6781                        prereq=True, ecode=errors.ECODE_ENVIRON)
6782   (_, _, (hv_info, )) = nodeinfo[node].payload
6783
6784   free_mem = hv_info.get("memory_free", None)
6785   if not isinstance(free_mem, int):
6786     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6787                                " was '%s'" % (node, free_mem),
6788                                errors.ECODE_ENVIRON)
6789   if requested > free_mem:
6790     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6791                                " needed %s MiB, available %s MiB" %
6792                                (node, reason, requested, free_mem),
6793                                errors.ECODE_NORES)
6794   return free_mem
6795
6796
6797 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6798   """Checks if nodes have enough free disk space in the all VGs.
6799
6800   This function check if all given nodes have the needed amount of
6801   free disk. In case any node has less disk or we cannot get the
6802   information from the node, this function raise an OpPrereqError
6803   exception.
6804
6805   @type lu: C{LogicalUnit}
6806   @param lu: a logical unit from which we get configuration data
6807   @type nodenames: C{list}
6808   @param nodenames: the list of node names to check
6809   @type req_sizes: C{dict}
6810   @param req_sizes: the hash of vg and corresponding amount of disk in
6811       MiB to check for
6812   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6813       or we cannot check the node
6814
6815   """
6816   for vg, req_size in req_sizes.items():
6817     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6818
6819
6820 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6821   """Checks if nodes have enough free disk space in the specified VG.
6822
6823   This function check if all given nodes have the needed amount of
6824   free disk. In case any node has less disk or we cannot get the
6825   information from the node, this function raise an OpPrereqError
6826   exception.
6827
6828   @type lu: C{LogicalUnit}
6829   @param lu: a logical unit from which we get configuration data
6830   @type nodenames: C{list}
6831   @param nodenames: the list of node names to check
6832   @type vg: C{str}
6833   @param vg: the volume group to check
6834   @type requested: C{int}
6835   @param requested: the amount of disk in MiB to check for
6836   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6837       or we cannot check the node
6838
6839   """
6840   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6841   for node in nodenames:
6842     info = nodeinfo[node]
6843     info.Raise("Cannot get current information from node %s" % node,
6844                prereq=True, ecode=errors.ECODE_ENVIRON)
6845     (_, (vg_info, ), _) = info.payload
6846     vg_free = vg_info.get("vg_free", None)
6847     if not isinstance(vg_free, int):
6848       raise errors.OpPrereqError("Can't compute free disk space on node"
6849                                  " %s for vg %s, result was '%s'" %
6850                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6851     if requested > vg_free:
6852       raise errors.OpPrereqError("Not enough disk space on target node %s"
6853                                  " vg %s: required %d MiB, available %d MiB" %
6854                                  (node, vg, requested, vg_free),
6855                                  errors.ECODE_NORES)
6856
6857
6858 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6859   """Checks if nodes have enough physical CPUs
6860
6861   This function checks if all given nodes have the needed number of
6862   physical CPUs. In case any node has less CPUs or we cannot get the
6863   information from the node, this function raises an OpPrereqError
6864   exception.
6865
6866   @type lu: C{LogicalUnit}
6867   @param lu: a logical unit from which we get configuration data
6868   @type nodenames: C{list}
6869   @param nodenames: the list of node names to check
6870   @type requested: C{int}
6871   @param requested: the minimum acceptable number of physical CPUs
6872   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6873       or we cannot check the node
6874
6875   """
6876   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6877   for node in nodenames:
6878     info = nodeinfo[node]
6879     info.Raise("Cannot get current information from node %s" % node,
6880                prereq=True, ecode=errors.ECODE_ENVIRON)
6881     (_, _, (hv_info, )) = info.payload
6882     num_cpus = hv_info.get("cpu_total", None)
6883     if not isinstance(num_cpus, int):
6884       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6885                                  " on node %s, result was '%s'" %
6886                                  (node, num_cpus), errors.ECODE_ENVIRON)
6887     if requested > num_cpus:
6888       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6889                                  "required" % (node, num_cpus, requested),
6890                                  errors.ECODE_NORES)
6891
6892
6893 class LUInstanceStartup(LogicalUnit):
6894   """Starts an instance.
6895
6896   """
6897   HPATH = "instance-start"
6898   HTYPE = constants.HTYPE_INSTANCE
6899   REQ_BGL = False
6900
6901   def CheckArguments(self):
6902     # extra beparams
6903     if self.op.beparams:
6904       # fill the beparams dict
6905       objects.UpgradeBeParams(self.op.beparams)
6906       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6907
6908   def ExpandNames(self):
6909     self._ExpandAndLockInstance()
6910     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6911
6912   def DeclareLocks(self, level):
6913     if level == locking.LEVEL_NODE_RES:
6914       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6915
6916   def BuildHooksEnv(self):
6917     """Build hooks env.
6918
6919     This runs on master, primary and secondary nodes of the instance.
6920
6921     """
6922     env = {
6923       "FORCE": self.op.force,
6924       }
6925
6926     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6927
6928     return env
6929
6930   def BuildHooksNodes(self):
6931     """Build hooks nodes.
6932
6933     """
6934     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6935     return (nl, nl)
6936
6937   def CheckPrereq(self):
6938     """Check prerequisites.
6939
6940     This checks that the instance is in the cluster.
6941
6942     """
6943     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6944     assert self.instance is not None, \
6945       "Cannot retrieve locked instance %s" % self.op.instance_name
6946
6947     # extra hvparams
6948     if self.op.hvparams:
6949       # check hypervisor parameter syntax (locally)
6950       cluster = self.cfg.GetClusterInfo()
6951       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6952       filled_hvp = cluster.FillHV(instance)
6953       filled_hvp.update(self.op.hvparams)
6954       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6955       hv_type.CheckParameterSyntax(filled_hvp)
6956       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6957
6958     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6959
6960     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6961
6962     if self.primary_offline and self.op.ignore_offline_nodes:
6963       self.proc.LogWarning("Ignoring offline primary node")
6964
6965       if self.op.hvparams or self.op.beparams:
6966         self.proc.LogWarning("Overridden parameters are ignored")
6967     else:
6968       _CheckNodeOnline(self, instance.primary_node)
6969
6970       bep = self.cfg.GetClusterInfo().FillBE(instance)
6971       bep.update(self.op.beparams)
6972
6973       # check bridges existence
6974       _CheckInstanceBridgesExist(self, instance)
6975
6976       remote_info = self.rpc.call_instance_info(instance.primary_node,
6977                                                 instance.name,
6978                                                 instance.hypervisor)
6979       remote_info.Raise("Error checking node %s" % instance.primary_node,
6980                         prereq=True, ecode=errors.ECODE_ENVIRON)
6981       if not remote_info.payload: # not running already
6982         _CheckNodeFreeMemory(self, instance.primary_node,
6983                              "starting instance %s" % instance.name,
6984                              bep[constants.BE_MINMEM], instance.hypervisor)
6985
6986   def Exec(self, feedback_fn):
6987     """Start the instance.
6988
6989     """
6990     instance = self.instance
6991     force = self.op.force
6992
6993     if not self.op.no_remember:
6994       self.cfg.MarkInstanceUp(instance.name)
6995
6996     if self.primary_offline:
6997       assert self.op.ignore_offline_nodes
6998       self.proc.LogInfo("Primary node offline, marked instance as started")
6999     else:
7000       node_current = instance.primary_node
7001
7002       _StartInstanceDisks(self, instance, force)
7003
7004       result = \
7005         self.rpc.call_instance_start(node_current,
7006                                      (instance, self.op.hvparams,
7007                                       self.op.beparams),
7008                                      self.op.startup_paused)
7009       msg = result.fail_msg
7010       if msg:
7011         _ShutdownInstanceDisks(self, instance)
7012         raise errors.OpExecError("Could not start instance: %s" % msg)
7013
7014
7015 class LUInstanceReboot(LogicalUnit):
7016   """Reboot an instance.
7017
7018   """
7019   HPATH = "instance-reboot"
7020   HTYPE = constants.HTYPE_INSTANCE
7021   REQ_BGL = False
7022
7023   def ExpandNames(self):
7024     self._ExpandAndLockInstance()
7025
7026   def BuildHooksEnv(self):
7027     """Build hooks env.
7028
7029     This runs on master, primary and secondary nodes of the instance.
7030
7031     """
7032     env = {
7033       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7034       "REBOOT_TYPE": self.op.reboot_type,
7035       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7036       }
7037
7038     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7039
7040     return env
7041
7042   def BuildHooksNodes(self):
7043     """Build hooks nodes.
7044
7045     """
7046     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7047     return (nl, nl)
7048
7049   def CheckPrereq(self):
7050     """Check prerequisites.
7051
7052     This checks that the instance is in the cluster.
7053
7054     """
7055     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7056     assert self.instance is not None, \
7057       "Cannot retrieve locked instance %s" % self.op.instance_name
7058     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7059     _CheckNodeOnline(self, instance.primary_node)
7060
7061     # check bridges existence
7062     _CheckInstanceBridgesExist(self, instance)
7063
7064   def Exec(self, feedback_fn):
7065     """Reboot the instance.
7066
7067     """
7068     instance = self.instance
7069     ignore_secondaries = self.op.ignore_secondaries
7070     reboot_type = self.op.reboot_type
7071
7072     remote_info = self.rpc.call_instance_info(instance.primary_node,
7073                                               instance.name,
7074                                               instance.hypervisor)
7075     remote_info.Raise("Error checking node %s" % instance.primary_node)
7076     instance_running = bool(remote_info.payload)
7077
7078     node_current = instance.primary_node
7079
7080     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7081                                             constants.INSTANCE_REBOOT_HARD]:
7082       for disk in instance.disks:
7083         self.cfg.SetDiskID(disk, node_current)
7084       result = self.rpc.call_instance_reboot(node_current, instance,
7085                                              reboot_type,
7086                                              self.op.shutdown_timeout)
7087       result.Raise("Could not reboot instance")
7088     else:
7089       if instance_running:
7090         result = self.rpc.call_instance_shutdown(node_current, instance,
7091                                                  self.op.shutdown_timeout)
7092         result.Raise("Could not shutdown instance for full reboot")
7093         _ShutdownInstanceDisks(self, instance)
7094       else:
7095         self.LogInfo("Instance %s was already stopped, starting now",
7096                      instance.name)
7097       _StartInstanceDisks(self, instance, ignore_secondaries)
7098       result = self.rpc.call_instance_start(node_current,
7099                                             (instance, None, None), False)
7100       msg = result.fail_msg
7101       if msg:
7102         _ShutdownInstanceDisks(self, instance)
7103         raise errors.OpExecError("Could not start instance for"
7104                                  " full reboot: %s" % msg)
7105
7106     self.cfg.MarkInstanceUp(instance.name)
7107
7108
7109 class LUInstanceShutdown(LogicalUnit):
7110   """Shutdown an instance.
7111
7112   """
7113   HPATH = "instance-stop"
7114   HTYPE = constants.HTYPE_INSTANCE
7115   REQ_BGL = False
7116
7117   def ExpandNames(self):
7118     self._ExpandAndLockInstance()
7119
7120   def BuildHooksEnv(self):
7121     """Build hooks env.
7122
7123     This runs on master, primary and secondary nodes of the instance.
7124
7125     """
7126     env = _BuildInstanceHookEnvByObject(self, self.instance)
7127     env["TIMEOUT"] = self.op.timeout
7128     return env
7129
7130   def BuildHooksNodes(self):
7131     """Build hooks nodes.
7132
7133     """
7134     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7135     return (nl, nl)
7136
7137   def CheckPrereq(self):
7138     """Check prerequisites.
7139
7140     This checks that the instance is in the cluster.
7141
7142     """
7143     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7144     assert self.instance is not None, \
7145       "Cannot retrieve locked instance %s" % self.op.instance_name
7146
7147     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7148
7149     self.primary_offline = \
7150       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7151
7152     if self.primary_offline and self.op.ignore_offline_nodes:
7153       self.proc.LogWarning("Ignoring offline primary node")
7154     else:
7155       _CheckNodeOnline(self, self.instance.primary_node)
7156
7157   def Exec(self, feedback_fn):
7158     """Shutdown the instance.
7159
7160     """
7161     instance = self.instance
7162     node_current = instance.primary_node
7163     timeout = self.op.timeout
7164
7165     if not self.op.no_remember:
7166       self.cfg.MarkInstanceDown(instance.name)
7167
7168     if self.primary_offline:
7169       assert self.op.ignore_offline_nodes
7170       self.proc.LogInfo("Primary node offline, marked instance as stopped")
7171     else:
7172       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7173       msg = result.fail_msg
7174       if msg:
7175         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7176
7177       _ShutdownInstanceDisks(self, instance)
7178
7179
7180 class LUInstanceReinstall(LogicalUnit):
7181   """Reinstall an instance.
7182
7183   """
7184   HPATH = "instance-reinstall"
7185   HTYPE = constants.HTYPE_INSTANCE
7186   REQ_BGL = False
7187
7188   def ExpandNames(self):
7189     self._ExpandAndLockInstance()
7190
7191   def BuildHooksEnv(self):
7192     """Build hooks env.
7193
7194     This runs on master, primary and secondary nodes of the instance.
7195
7196     """
7197     return _BuildInstanceHookEnvByObject(self, self.instance)
7198
7199   def BuildHooksNodes(self):
7200     """Build hooks nodes.
7201
7202     """
7203     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7204     return (nl, nl)
7205
7206   def CheckPrereq(self):
7207     """Check prerequisites.
7208
7209     This checks that the instance is in the cluster and is not running.
7210
7211     """
7212     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7213     assert instance is not None, \
7214       "Cannot retrieve locked instance %s" % self.op.instance_name
7215     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7216                      " offline, cannot reinstall")
7217
7218     if instance.disk_template == constants.DT_DISKLESS:
7219       raise errors.OpPrereqError("Instance '%s' has no disks" %
7220                                  self.op.instance_name,
7221                                  errors.ECODE_INVAL)
7222     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7223
7224     if self.op.os_type is not None:
7225       # OS verification
7226       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7227       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7228       instance_os = self.op.os_type
7229     else:
7230       instance_os = instance.os
7231
7232     nodelist = list(instance.all_nodes)
7233
7234     if self.op.osparams:
7235       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7236       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7237       self.os_inst = i_osdict # the new dict (without defaults)
7238     else:
7239       self.os_inst = None
7240
7241     self.instance = instance
7242
7243   def Exec(self, feedback_fn):
7244     """Reinstall the instance.
7245
7246     """
7247     inst = self.instance
7248
7249     if self.op.os_type is not None:
7250       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7251       inst.os = self.op.os_type
7252       # Write to configuration
7253       self.cfg.Update(inst, feedback_fn)
7254
7255     _StartInstanceDisks(self, inst, None)
7256     try:
7257       feedback_fn("Running the instance OS create scripts...")
7258       # FIXME: pass debug option from opcode to backend
7259       result = self.rpc.call_instance_os_add(inst.primary_node,
7260                                              (inst, self.os_inst), True,
7261                                              self.op.debug_level)
7262       result.Raise("Could not install OS for instance %s on node %s" %
7263                    (inst.name, inst.primary_node))
7264     finally:
7265       _ShutdownInstanceDisks(self, inst)
7266
7267
7268 class LUInstanceRecreateDisks(LogicalUnit):
7269   """Recreate an instance's missing disks.
7270
7271   """
7272   HPATH = "instance-recreate-disks"
7273   HTYPE = constants.HTYPE_INSTANCE
7274   REQ_BGL = False
7275
7276   _MODIFYABLE = frozenset([
7277     constants.IDISK_SIZE,
7278     constants.IDISK_MODE,
7279     ])
7280
7281   # New or changed disk parameters may have different semantics
7282   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7283     constants.IDISK_ADOPT,
7284
7285     # TODO: Implement support changing VG while recreating
7286     constants.IDISK_VG,
7287     constants.IDISK_METAVG,
7288     constants.IDISK_PROVIDER,
7289     ]))
7290
7291   def CheckArguments(self):
7292     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7293       # Normalize and convert deprecated list of disk indices
7294       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7295
7296     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7297     if duplicates:
7298       raise errors.OpPrereqError("Some disks have been specified more than"
7299                                  " once: %s" % utils.CommaJoin(duplicates),
7300                                  errors.ECODE_INVAL)
7301
7302     for (idx, params) in self.op.disks:
7303       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7304       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7305       if unsupported:
7306         raise errors.OpPrereqError("Parameters for disk %s try to change"
7307                                    " unmodifyable parameter(s): %s" %
7308                                    (idx, utils.CommaJoin(unsupported)),
7309                                    errors.ECODE_INVAL)
7310
7311   def ExpandNames(self):
7312     self._ExpandAndLockInstance()
7313     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7314     if self.op.nodes:
7315       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7316       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7317     else:
7318       self.needed_locks[locking.LEVEL_NODE] = []
7319     self.needed_locks[locking.LEVEL_NODE_RES] = []
7320
7321   def DeclareLocks(self, level):
7322     if level == locking.LEVEL_NODE:
7323       # if we replace the nodes, we only need to lock the old primary,
7324       # otherwise we need to lock all nodes for disk re-creation
7325       primary_only = bool(self.op.nodes)
7326       self._LockInstancesNodes(primary_only=primary_only)
7327     elif level == locking.LEVEL_NODE_RES:
7328       # Copy node locks
7329       self.needed_locks[locking.LEVEL_NODE_RES] = \
7330         self.needed_locks[locking.LEVEL_NODE][:]
7331
7332   def BuildHooksEnv(self):
7333     """Build hooks env.
7334
7335     This runs on master, primary and secondary nodes of the instance.
7336
7337     """
7338     return _BuildInstanceHookEnvByObject(self, self.instance)
7339
7340   def BuildHooksNodes(self):
7341     """Build hooks nodes.
7342
7343     """
7344     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7345     return (nl, nl)
7346
7347   def CheckPrereq(self):
7348     """Check prerequisites.
7349
7350     This checks that the instance is in the cluster and is not running.
7351
7352     """
7353     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7354     assert instance is not None, \
7355       "Cannot retrieve locked instance %s" % self.op.instance_name
7356     if self.op.nodes:
7357       if len(self.op.nodes) != len(instance.all_nodes):
7358         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7359                                    " %d replacement nodes were specified" %
7360                                    (instance.name, len(instance.all_nodes),
7361                                     len(self.op.nodes)),
7362                                    errors.ECODE_INVAL)
7363       assert instance.disk_template != constants.DT_DRBD8 or \
7364           len(self.op.nodes) == 2
7365       assert instance.disk_template != constants.DT_PLAIN or \
7366           len(self.op.nodes) == 1
7367       primary_node = self.op.nodes[0]
7368     else:
7369       primary_node = instance.primary_node
7370     _CheckNodeOnline(self, primary_node)
7371
7372     if instance.disk_template == constants.DT_DISKLESS:
7373       raise errors.OpPrereqError("Instance '%s' has no disks" %
7374                                  self.op.instance_name, errors.ECODE_INVAL)
7375
7376     # if we replace nodes *and* the old primary is offline, we don't
7377     # check
7378     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7379     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7380     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7381     if not (self.op.nodes and old_pnode.offline):
7382       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7383                           msg="cannot recreate disks")
7384
7385     if self.op.disks:
7386       self.disks = dict(self.op.disks)
7387     else:
7388       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7389
7390     maxidx = max(self.disks.keys())
7391     if maxidx >= len(instance.disks):
7392       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7393                                  errors.ECODE_INVAL)
7394
7395     if (self.op.nodes and
7396         sorted(self.disks.keys()) != range(len(instance.disks))):
7397       raise errors.OpPrereqError("Can't recreate disks partially and"
7398                                  " change the nodes at the same time",
7399                                  errors.ECODE_INVAL)
7400
7401     self.instance = instance
7402
7403   def Exec(self, feedback_fn):
7404     """Recreate the disks.
7405
7406     """
7407     instance = self.instance
7408
7409     assert (self.owned_locks(locking.LEVEL_NODE) ==
7410             self.owned_locks(locking.LEVEL_NODE_RES))
7411
7412     to_skip = []
7413     mods = [] # keeps track of needed changes
7414
7415     for idx, disk in enumerate(instance.disks):
7416       try:
7417         changes = self.disks[idx]
7418       except KeyError:
7419         # Disk should not be recreated
7420         to_skip.append(idx)
7421         continue
7422
7423       # update secondaries for disks, if needed
7424       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7425         # need to update the nodes and minors
7426         assert len(self.op.nodes) == 2
7427         assert len(disk.logical_id) == 6 # otherwise disk internals
7428                                          # have changed
7429         (_, _, old_port, _, _, old_secret) = disk.logical_id
7430         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7431         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7432                   new_minors[0], new_minors[1], old_secret)
7433         assert len(disk.logical_id) == len(new_id)
7434       else:
7435         new_id = None
7436
7437       mods.append((idx, new_id, changes))
7438
7439     # now that we have passed all asserts above, we can apply the mods
7440     # in a single run (to avoid partial changes)
7441     for idx, new_id, changes in mods:
7442       disk = instance.disks[idx]
7443       if new_id is not None:
7444         assert disk.dev_type == constants.LD_DRBD8
7445         disk.logical_id = new_id
7446       if changes:
7447         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7448                     mode=changes.get(constants.IDISK_MODE, None))
7449
7450     # change primary node, if needed
7451     if self.op.nodes:
7452       instance.primary_node = self.op.nodes[0]
7453       self.LogWarning("Changing the instance's nodes, you will have to"
7454                       " remove any disks left on the older nodes manually")
7455
7456     if self.op.nodes:
7457       self.cfg.Update(instance, feedback_fn)
7458
7459     _CreateDisks(self, instance, to_skip=to_skip)
7460
7461
7462 class LUInstanceRename(LogicalUnit):
7463   """Rename an instance.
7464
7465   """
7466   HPATH = "instance-rename"
7467   HTYPE = constants.HTYPE_INSTANCE
7468
7469   def CheckArguments(self):
7470     """Check arguments.
7471
7472     """
7473     if self.op.ip_check and not self.op.name_check:
7474       # TODO: make the ip check more flexible and not depend on the name check
7475       raise errors.OpPrereqError("IP address check requires a name check",
7476                                  errors.ECODE_INVAL)
7477
7478   def BuildHooksEnv(self):
7479     """Build hooks env.
7480
7481     This runs on master, primary and secondary nodes of the instance.
7482
7483     """
7484     env = _BuildInstanceHookEnvByObject(self, self.instance)
7485     env["INSTANCE_NEW_NAME"] = self.op.new_name
7486     return env
7487
7488   def BuildHooksNodes(self):
7489     """Build hooks nodes.
7490
7491     """
7492     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7493     return (nl, nl)
7494
7495   def CheckPrereq(self):
7496     """Check prerequisites.
7497
7498     This checks that the instance is in the cluster and is not running.
7499
7500     """
7501     self.op.instance_name = _ExpandInstanceName(self.cfg,
7502                                                 self.op.instance_name)
7503     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7504     assert instance is not None
7505     _CheckNodeOnline(self, instance.primary_node)
7506     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7507                         msg="cannot rename")
7508     self.instance = instance
7509
7510     new_name = self.op.new_name
7511     if self.op.name_check:
7512       hostname = netutils.GetHostname(name=new_name)
7513       if hostname.name != new_name:
7514         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7515                      hostname.name)
7516       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7517         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7518                                     " same as given hostname '%s'") %
7519                                     (hostname.name, self.op.new_name),
7520                                     errors.ECODE_INVAL)
7521       new_name = self.op.new_name = hostname.name
7522       if (self.op.ip_check and
7523           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7524         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7525                                    (hostname.ip, new_name),
7526                                    errors.ECODE_NOTUNIQUE)
7527
7528     instance_list = self.cfg.GetInstanceList()
7529     if new_name in instance_list and new_name != instance.name:
7530       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7531                                  new_name, errors.ECODE_EXISTS)
7532
7533   def Exec(self, feedback_fn):
7534     """Rename the instance.
7535
7536     """
7537     inst = self.instance
7538     old_name = inst.name
7539
7540     rename_file_storage = False
7541     if (inst.disk_template in constants.DTS_FILEBASED and
7542         self.op.new_name != inst.name):
7543       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7544       rename_file_storage = True
7545
7546     self.cfg.RenameInstance(inst.name, self.op.new_name)
7547     # Change the instance lock. This is definitely safe while we hold the BGL.
7548     # Otherwise the new lock would have to be added in acquired mode.
7549     assert self.REQ_BGL
7550     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7551     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7552
7553     # re-read the instance from the configuration after rename
7554     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7555
7556     if rename_file_storage:
7557       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7558       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7559                                                      old_file_storage_dir,
7560                                                      new_file_storage_dir)
7561       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7562                    " (but the instance has been renamed in Ganeti)" %
7563                    (inst.primary_node, old_file_storage_dir,
7564                     new_file_storage_dir))
7565
7566     _StartInstanceDisks(self, inst, None)
7567     try:
7568       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7569                                                  old_name, self.op.debug_level)
7570       msg = result.fail_msg
7571       if msg:
7572         msg = ("Could not run OS rename script for instance %s on node %s"
7573                " (but the instance has been renamed in Ganeti): %s" %
7574                (inst.name, inst.primary_node, msg))
7575         self.proc.LogWarning(msg)
7576     finally:
7577       _ShutdownInstanceDisks(self, inst)
7578
7579     return inst.name
7580
7581
7582 class LUInstanceRemove(LogicalUnit):
7583   """Remove an instance.
7584
7585   """
7586   HPATH = "instance-remove"
7587   HTYPE = constants.HTYPE_INSTANCE
7588   REQ_BGL = False
7589
7590   def ExpandNames(self):
7591     self._ExpandAndLockInstance()
7592     self.needed_locks[locking.LEVEL_NODE] = []
7593     self.needed_locks[locking.LEVEL_NODE_RES] = []
7594     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7595
7596   def DeclareLocks(self, level):
7597     if level == locking.LEVEL_NODE:
7598       self._LockInstancesNodes()
7599     elif level == locking.LEVEL_NODE_RES:
7600       # Copy node locks
7601       self.needed_locks[locking.LEVEL_NODE_RES] = \
7602         self.needed_locks[locking.LEVEL_NODE][:]
7603
7604   def BuildHooksEnv(self):
7605     """Build hooks env.
7606
7607     This runs on master, primary and secondary nodes of the instance.
7608
7609     """
7610     env = _BuildInstanceHookEnvByObject(self, self.instance)
7611     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7612     return env
7613
7614   def BuildHooksNodes(self):
7615     """Build hooks nodes.
7616
7617     """
7618     nl = [self.cfg.GetMasterNode()]
7619     nl_post = list(self.instance.all_nodes) + nl
7620     return (nl, nl_post)
7621
7622   def CheckPrereq(self):
7623     """Check prerequisites.
7624
7625     This checks that the instance is in the cluster.
7626
7627     """
7628     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7629     assert self.instance is not None, \
7630       "Cannot retrieve locked instance %s" % self.op.instance_name
7631
7632   def Exec(self, feedback_fn):
7633     """Remove the instance.
7634
7635     """
7636     instance = self.instance
7637     logging.info("Shutting down instance %s on node %s",
7638                  instance.name, instance.primary_node)
7639
7640     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7641                                              self.op.shutdown_timeout)
7642     msg = result.fail_msg
7643     if msg:
7644       if self.op.ignore_failures:
7645         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7646       else:
7647         raise errors.OpExecError("Could not shutdown instance %s on"
7648                                  " node %s: %s" %
7649                                  (instance.name, instance.primary_node, msg))
7650
7651     assert (self.owned_locks(locking.LEVEL_NODE) ==
7652             self.owned_locks(locking.LEVEL_NODE_RES))
7653     assert not (set(instance.all_nodes) -
7654                 self.owned_locks(locking.LEVEL_NODE)), \
7655       "Not owning correct locks"
7656
7657     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7658
7659
7660 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7661   """Utility function to remove an instance.
7662
7663   """
7664   logging.info("Removing block devices for instance %s", instance.name)
7665
7666   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7667     if not ignore_failures:
7668       raise errors.OpExecError("Can't remove instance's disks")
7669     feedback_fn("Warning: can't remove instance's disks")
7670
7671   logging.info("Removing instance %s out of cluster config", instance.name)
7672
7673   lu.cfg.RemoveInstance(instance.name)
7674
7675   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7676     "Instance lock removal conflict"
7677
7678   # Remove lock for the instance
7679   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7680
7681
7682 class LUInstanceQuery(NoHooksLU):
7683   """Logical unit for querying instances.
7684
7685   """
7686   # pylint: disable=W0142
7687   REQ_BGL = False
7688
7689   def CheckArguments(self):
7690     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7691                              self.op.output_fields, self.op.use_locking)
7692
7693   def ExpandNames(self):
7694     self.iq.ExpandNames(self)
7695
7696   def DeclareLocks(self, level):
7697     self.iq.DeclareLocks(self, level)
7698
7699   def Exec(self, feedback_fn):
7700     return self.iq.OldStyleQuery(self)
7701
7702
7703 class LUInstanceFailover(LogicalUnit):
7704   """Failover an instance.
7705
7706   """
7707   HPATH = "instance-failover"
7708   HTYPE = constants.HTYPE_INSTANCE
7709   REQ_BGL = False
7710
7711   def CheckArguments(self):
7712     """Check the arguments.
7713
7714     """
7715     self.iallocator = getattr(self.op, "iallocator", None)
7716     self.target_node = getattr(self.op, "target_node", None)
7717
7718   def ExpandNames(self):
7719     self._ExpandAndLockInstance()
7720
7721     if self.op.target_node is not None:
7722       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7723
7724     self.needed_locks[locking.LEVEL_NODE] = []
7725     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7726
7727     self.needed_locks[locking.LEVEL_NODE_RES] = []
7728     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7729
7730     ignore_consistency = self.op.ignore_consistency
7731     shutdown_timeout = self.op.shutdown_timeout
7732     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7733                                        cleanup=False,
7734                                        failover=True,
7735                                        ignore_consistency=ignore_consistency,
7736                                        shutdown_timeout=shutdown_timeout,
7737                                        ignore_ipolicy=self.op.ignore_ipolicy)
7738     self.tasklets = [self._migrater]
7739
7740   def DeclareLocks(self, level):
7741     if level == locking.LEVEL_NODE:
7742       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7743       if instance.disk_template in constants.DTS_EXT_MIRROR:
7744         if self.op.target_node is None:
7745           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7746         else:
7747           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7748                                                    self.op.target_node]
7749         del self.recalculate_locks[locking.LEVEL_NODE]
7750       else:
7751         self._LockInstancesNodes()
7752     elif level == locking.LEVEL_NODE_RES:
7753       # Copy node locks
7754       self.needed_locks[locking.LEVEL_NODE_RES] = \
7755         self.needed_locks[locking.LEVEL_NODE][:]
7756
7757   def BuildHooksEnv(self):
7758     """Build hooks env.
7759
7760     This runs on master, primary and secondary nodes of the instance.
7761
7762     """
7763     instance = self._migrater.instance
7764     source_node = instance.primary_node
7765     target_node = self.op.target_node
7766     env = {
7767       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7768       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7769       "OLD_PRIMARY": source_node,
7770       "NEW_PRIMARY": target_node,
7771       }
7772
7773     if instance.disk_template in constants.DTS_INT_MIRROR:
7774       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7775       env["NEW_SECONDARY"] = source_node
7776     else:
7777       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7778
7779     env.update(_BuildInstanceHookEnvByObject(self, instance))
7780
7781     return env
7782
7783   def BuildHooksNodes(self):
7784     """Build hooks nodes.
7785
7786     """
7787     instance = self._migrater.instance
7788     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7789     return (nl, nl + [instance.primary_node])
7790
7791
7792 class LUInstanceMigrate(LogicalUnit):
7793   """Migrate an instance.
7794
7795   This is migration without shutting down, compared to the failover,
7796   which is done with shutdown.
7797
7798   """
7799   HPATH = "instance-migrate"
7800   HTYPE = constants.HTYPE_INSTANCE
7801   REQ_BGL = False
7802
7803   def ExpandNames(self):
7804     self._ExpandAndLockInstance()
7805
7806     if self.op.target_node is not None:
7807       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7808
7809     self.needed_locks[locking.LEVEL_NODE] = []
7810     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7811
7812     self.needed_locks[locking.LEVEL_NODE] = []
7813     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7814
7815     self._migrater = \
7816       TLMigrateInstance(self, self.op.instance_name,
7817                         cleanup=self.op.cleanup,
7818                         failover=False,
7819                         fallback=self.op.allow_failover,
7820                         allow_runtime_changes=self.op.allow_runtime_changes,
7821                         ignore_ipolicy=self.op.ignore_ipolicy)
7822     self.tasklets = [self._migrater]
7823
7824   def DeclareLocks(self, level):
7825     if level == locking.LEVEL_NODE:
7826       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7827       if instance.disk_template in constants.DTS_EXT_MIRROR:
7828         if self.op.target_node is None:
7829           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7830         else:
7831           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7832                                                    self.op.target_node]
7833         del self.recalculate_locks[locking.LEVEL_NODE]
7834       else:
7835         self._LockInstancesNodes()
7836     elif level == locking.LEVEL_NODE_RES:
7837       # Copy node locks
7838       self.needed_locks[locking.LEVEL_NODE_RES] = \
7839         self.needed_locks[locking.LEVEL_NODE][:]
7840
7841   def BuildHooksEnv(self):
7842     """Build hooks env.
7843
7844     This runs on master, primary and secondary nodes of the instance.
7845
7846     """
7847     instance = self._migrater.instance
7848     source_node = instance.primary_node
7849     target_node = self.op.target_node
7850     env = _BuildInstanceHookEnvByObject(self, instance)
7851     env.update({
7852       "MIGRATE_LIVE": self._migrater.live,
7853       "MIGRATE_CLEANUP": self.op.cleanup,
7854       "OLD_PRIMARY": source_node,
7855       "NEW_PRIMARY": target_node,
7856       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7857       })
7858
7859     if instance.disk_template in constants.DTS_INT_MIRROR:
7860       env["OLD_SECONDARY"] = target_node
7861       env["NEW_SECONDARY"] = source_node
7862     else:
7863       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7864
7865     return env
7866
7867   def BuildHooksNodes(self):
7868     """Build hooks nodes.
7869
7870     """
7871     instance = self._migrater.instance
7872     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7873     return (nl, nl + [instance.primary_node])
7874
7875
7876 class LUInstanceMove(LogicalUnit):
7877   """Move an instance by data-copying.
7878
7879   """
7880   HPATH = "instance-move"
7881   HTYPE = constants.HTYPE_INSTANCE
7882   REQ_BGL = False
7883
7884   def ExpandNames(self):
7885     self._ExpandAndLockInstance()
7886     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7887     self.op.target_node = target_node
7888     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7889     self.needed_locks[locking.LEVEL_NODE_RES] = []
7890     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7891
7892   def DeclareLocks(self, level):
7893     if level == locking.LEVEL_NODE:
7894       self._LockInstancesNodes(primary_only=True)
7895     elif level == locking.LEVEL_NODE_RES:
7896       # Copy node locks
7897       self.needed_locks[locking.LEVEL_NODE_RES] = \
7898         self.needed_locks[locking.LEVEL_NODE][:]
7899
7900   def BuildHooksEnv(self):
7901     """Build hooks env.
7902
7903     This runs on master, primary and secondary nodes of the instance.
7904
7905     """
7906     env = {
7907       "TARGET_NODE": self.op.target_node,
7908       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7909       }
7910     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7911     return env
7912
7913   def BuildHooksNodes(self):
7914     """Build hooks nodes.
7915
7916     """
7917     nl = [
7918       self.cfg.GetMasterNode(),
7919       self.instance.primary_node,
7920       self.op.target_node,
7921       ]
7922     return (nl, nl)
7923
7924   def CheckPrereq(self):
7925     """Check prerequisites.
7926
7927     This checks that the instance is in the cluster.
7928
7929     """
7930     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7931     assert self.instance is not None, \
7932       "Cannot retrieve locked instance %s" % self.op.instance_name
7933
7934     node = self.cfg.GetNodeInfo(self.op.target_node)
7935     assert node is not None, \
7936       "Cannot retrieve locked node %s" % self.op.target_node
7937
7938     self.target_node = target_node = node.name
7939
7940     if target_node == instance.primary_node:
7941       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7942                                  (instance.name, target_node),
7943                                  errors.ECODE_STATE)
7944
7945     bep = self.cfg.GetClusterInfo().FillBE(instance)
7946
7947     for idx, dsk in enumerate(instance.disks):
7948       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7949         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7950                                    " cannot copy" % idx, errors.ECODE_STATE)
7951
7952     _CheckNodeOnline(self, target_node)
7953     _CheckNodeNotDrained(self, target_node)
7954     _CheckNodeVmCapable(self, target_node)
7955     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7956                                      self.cfg.GetNodeGroup(node.group))
7957     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7958                             ignore=self.op.ignore_ipolicy)
7959
7960     if instance.admin_state == constants.ADMINST_UP:
7961       # check memory requirements on the secondary node
7962       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7963                            instance.name, bep[constants.BE_MAXMEM],
7964                            instance.hypervisor)
7965     else:
7966       self.LogInfo("Not checking memory on the secondary node as"
7967                    " instance will not be started")
7968
7969     # check bridge existance
7970     _CheckInstanceBridgesExist(self, instance, node=target_node)
7971
7972   def Exec(self, feedback_fn):
7973     """Move an instance.
7974
7975     The move is done by shutting it down on its present node, copying
7976     the data over (slow) and starting it on the new node.
7977
7978     """
7979     instance = self.instance
7980
7981     source_node = instance.primary_node
7982     target_node = self.target_node
7983
7984     self.LogInfo("Shutting down instance %s on source node %s",
7985                  instance.name, source_node)
7986
7987     assert (self.owned_locks(locking.LEVEL_NODE) ==
7988             self.owned_locks(locking.LEVEL_NODE_RES))
7989
7990     result = self.rpc.call_instance_shutdown(source_node, instance,
7991                                              self.op.shutdown_timeout)
7992     msg = result.fail_msg
7993     if msg:
7994       if self.op.ignore_consistency:
7995         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7996                              " Proceeding anyway. Please make sure node"
7997                              " %s is down. Error details: %s",
7998                              instance.name, source_node, source_node, msg)
7999       else:
8000         raise errors.OpExecError("Could not shutdown instance %s on"
8001                                  " node %s: %s" %
8002                                  (instance.name, source_node, msg))
8003
8004     # create the target disks
8005     try:
8006       _CreateDisks(self, instance, target_node=target_node)
8007     except errors.OpExecError:
8008       self.LogWarning("Device creation failed, reverting...")
8009       try:
8010         _RemoveDisks(self, instance, target_node=target_node)
8011       finally:
8012         self.cfg.ReleaseDRBDMinors(instance.name)
8013         raise
8014
8015     cluster_name = self.cfg.GetClusterInfo().cluster_name
8016
8017     errs = []
8018     # activate, get path, copy the data over
8019     for idx, disk in enumerate(instance.disks):
8020       self.LogInfo("Copying data for disk %d", idx)
8021       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8022                                                instance.name, True, idx)
8023       if result.fail_msg:
8024         self.LogWarning("Can't assemble newly created disk %d: %s",
8025                         idx, result.fail_msg)
8026         errs.append(result.fail_msg)
8027         break
8028       dev_path = result.payload
8029       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8030                                              target_node, dev_path,
8031                                              cluster_name)
8032       if result.fail_msg:
8033         self.LogWarning("Can't copy data over for disk %d: %s",
8034                         idx, result.fail_msg)
8035         errs.append(result.fail_msg)
8036         break
8037
8038     if errs:
8039       self.LogWarning("Some disks failed to copy, aborting")
8040       try:
8041         _RemoveDisks(self, instance, target_node=target_node)
8042       finally:
8043         self.cfg.ReleaseDRBDMinors(instance.name)
8044         raise errors.OpExecError("Errors during disk copy: %s" %
8045                                  (",".join(errs),))
8046
8047     instance.primary_node = target_node
8048     self.cfg.Update(instance, feedback_fn)
8049
8050     self.LogInfo("Removing the disks on the original node")
8051     _RemoveDisks(self, instance, target_node=source_node)
8052
8053     # Only start the instance if it's marked as up
8054     if instance.admin_state == constants.ADMINST_UP:
8055       self.LogInfo("Starting instance %s on node %s",
8056                    instance.name, target_node)
8057
8058       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8059                                            ignore_secondaries=True)
8060       if not disks_ok:
8061         _ShutdownInstanceDisks(self, instance)
8062         raise errors.OpExecError("Can't activate the instance's disks")
8063
8064       result = self.rpc.call_instance_start(target_node,
8065                                             (instance, None, None), False)
8066       msg = result.fail_msg
8067       if msg:
8068         _ShutdownInstanceDisks(self, instance)
8069         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8070                                  (instance.name, target_node, msg))
8071
8072
8073 class LUNodeMigrate(LogicalUnit):
8074   """Migrate all instances from a node.
8075
8076   """
8077   HPATH = "node-migrate"
8078   HTYPE = constants.HTYPE_NODE
8079   REQ_BGL = False
8080
8081   def CheckArguments(self):
8082     pass
8083
8084   def ExpandNames(self):
8085     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8086
8087     self.share_locks = _ShareAll()
8088     self.needed_locks = {
8089       locking.LEVEL_NODE: [self.op.node_name],
8090       }
8091
8092   def BuildHooksEnv(self):
8093     """Build hooks env.
8094
8095     This runs on the master, the primary and all the secondaries.
8096
8097     """
8098     return {
8099       "NODE_NAME": self.op.node_name,
8100       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8101       }
8102
8103   def BuildHooksNodes(self):
8104     """Build hooks nodes.
8105
8106     """
8107     nl = [self.cfg.GetMasterNode()]
8108     return (nl, nl)
8109
8110   def CheckPrereq(self):
8111     pass
8112
8113   def Exec(self, feedback_fn):
8114     # Prepare jobs for migration instances
8115     allow_runtime_changes = self.op.allow_runtime_changes
8116     jobs = [
8117       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8118                                  mode=self.op.mode,
8119                                  live=self.op.live,
8120                                  iallocator=self.op.iallocator,
8121                                  target_node=self.op.target_node,
8122                                  allow_runtime_changes=allow_runtime_changes,
8123                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8124       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8125       ]
8126
8127     # TODO: Run iallocator in this opcode and pass correct placement options to
8128     # OpInstanceMigrate. Since other jobs can modify the cluster between
8129     # running the iallocator and the actual migration, a good consistency model
8130     # will have to be found.
8131
8132     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8133             frozenset([self.op.node_name]))
8134
8135     return ResultWithJobs(jobs)
8136
8137
8138 class TLMigrateInstance(Tasklet):
8139   """Tasklet class for instance migration.
8140
8141   @type live: boolean
8142   @ivar live: whether the migration will be done live or non-live;
8143       this variable is initalized only after CheckPrereq has run
8144   @type cleanup: boolean
8145   @ivar cleanup: Wheater we cleanup from a failed migration
8146   @type iallocator: string
8147   @ivar iallocator: The iallocator used to determine target_node
8148   @type target_node: string
8149   @ivar target_node: If given, the target_node to reallocate the instance to
8150   @type failover: boolean
8151   @ivar failover: Whether operation results in failover or migration
8152   @type fallback: boolean
8153   @ivar fallback: Whether fallback to failover is allowed if migration not
8154                   possible
8155   @type ignore_consistency: boolean
8156   @ivar ignore_consistency: Wheter we should ignore consistency between source
8157                             and target node
8158   @type shutdown_timeout: int
8159   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8160   @type ignore_ipolicy: bool
8161   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8162
8163   """
8164
8165   # Constants
8166   _MIGRATION_POLL_INTERVAL = 1      # seconds
8167   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8168
8169   def __init__(self, lu, instance_name, cleanup=False,
8170                failover=False, fallback=False,
8171                ignore_consistency=False,
8172                allow_runtime_changes=True,
8173                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8174                ignore_ipolicy=False):
8175     """Initializes this class.
8176
8177     """
8178     Tasklet.__init__(self, lu)
8179
8180     # Parameters
8181     self.instance_name = instance_name
8182     self.cleanup = cleanup
8183     self.live = False # will be overridden later
8184     self.failover = failover
8185     self.fallback = fallback
8186     self.ignore_consistency = ignore_consistency
8187     self.shutdown_timeout = shutdown_timeout
8188     self.ignore_ipolicy = ignore_ipolicy
8189     self.allow_runtime_changes = allow_runtime_changes
8190
8191   def CheckPrereq(self):
8192     """Check prerequisites.
8193
8194     This checks that the instance is in the cluster.
8195
8196     """
8197     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8198     instance = self.cfg.GetInstanceInfo(instance_name)
8199     assert instance is not None
8200     self.instance = instance
8201     cluster = self.cfg.GetClusterInfo()
8202
8203     if (not self.cleanup and
8204         not instance.admin_state == constants.ADMINST_UP and
8205         not self.failover and self.fallback):
8206       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8207                       " switching to failover")
8208       self.failover = True
8209
8210     if instance.disk_template not in constants.DTS_MIRRORED:
8211       if self.failover:
8212         text = "failovers"
8213       else:
8214         text = "migrations"
8215       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8216                                  " %s" % (instance.disk_template, text),
8217                                  errors.ECODE_STATE)
8218
8219     if instance.disk_template in constants.DTS_EXT_MIRROR:
8220       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8221
8222       if self.lu.op.iallocator:
8223         self._RunAllocator()
8224       else:
8225         # We set set self.target_node as it is required by
8226         # BuildHooksEnv
8227         self.target_node = self.lu.op.target_node
8228
8229       # Check that the target node is correct in terms of instance policy
8230       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8231       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8232       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8233       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8234                               ignore=self.ignore_ipolicy)
8235
8236       # self.target_node is already populated, either directly or by the
8237       # iallocator run
8238       target_node = self.target_node
8239       if self.target_node == instance.primary_node:
8240         raise errors.OpPrereqError("Cannot migrate instance %s"
8241                                    " to its primary (%s)" %
8242                                    (instance.name, instance.primary_node))
8243
8244       if len(self.lu.tasklets) == 1:
8245         # It is safe to release locks only when we're the only tasklet
8246         # in the LU
8247         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8248                       keep=[instance.primary_node, self.target_node])
8249
8250     else:
8251       secondary_nodes = instance.secondary_nodes
8252       if not secondary_nodes:
8253         raise errors.ConfigurationError("No secondary node but using"
8254                                         " %s disk template" %
8255                                         instance.disk_template)
8256       target_node = secondary_nodes[0]
8257       if self.lu.op.iallocator or (self.lu.op.target_node and
8258                                    self.lu.op.target_node != target_node):
8259         if self.failover:
8260           text = "failed over"
8261         else:
8262           text = "migrated"
8263         raise errors.OpPrereqError("Instances with disk template %s cannot"
8264                                    " be %s to arbitrary nodes"
8265                                    " (neither an iallocator nor a target"
8266                                    " node can be passed)" %
8267                                    (instance.disk_template, text),
8268                                    errors.ECODE_INVAL)
8269       nodeinfo = self.cfg.GetNodeInfo(target_node)
8270       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8271       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8272       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8273                               ignore=self.ignore_ipolicy)
8274
8275     i_be = cluster.FillBE(instance)
8276
8277     # check memory requirements on the secondary node
8278     if (not self.cleanup and
8279          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8280       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8281                                                "migrating instance %s" %
8282                                                instance.name,
8283                                                i_be[constants.BE_MINMEM],
8284                                                instance.hypervisor)
8285     else:
8286       self.lu.LogInfo("Not checking memory on the secondary node as"
8287                       " instance will not be started")
8288
8289     # check if failover must be forced instead of migration
8290     if (not self.cleanup and not self.failover and
8291         i_be[constants.BE_ALWAYS_FAILOVER]):
8292       if self.fallback:
8293         self.lu.LogInfo("Instance configured to always failover; fallback"
8294                         " to failover")
8295         self.failover = True
8296       else:
8297         raise errors.OpPrereqError("This instance has been configured to"
8298                                    " always failover, please allow failover",
8299                                    errors.ECODE_STATE)
8300
8301     # check bridge existance
8302     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8303
8304     if not self.cleanup:
8305       _CheckNodeNotDrained(self.lu, target_node)
8306       if not self.failover:
8307         result = self.rpc.call_instance_migratable(instance.primary_node,
8308                                                    instance)
8309         if result.fail_msg and self.fallback:
8310           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8311                           " failover")
8312           self.failover = True
8313         else:
8314           result.Raise("Can't migrate, please use failover",
8315                        prereq=True, ecode=errors.ECODE_STATE)
8316
8317     assert not (self.failover and self.cleanup)
8318
8319     if not self.failover:
8320       if self.lu.op.live is not None and self.lu.op.mode is not None:
8321         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8322                                    " parameters are accepted",
8323                                    errors.ECODE_INVAL)
8324       if self.lu.op.live is not None:
8325         if self.lu.op.live:
8326           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8327         else:
8328           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8329         # reset the 'live' parameter to None so that repeated
8330         # invocations of CheckPrereq do not raise an exception
8331         self.lu.op.live = None
8332       elif self.lu.op.mode is None:
8333         # read the default value from the hypervisor
8334         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8335         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8336
8337       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8338     else:
8339       # Failover is never live
8340       self.live = False
8341
8342     if not (self.failover or self.cleanup):
8343       remote_info = self.rpc.call_instance_info(instance.primary_node,
8344                                                 instance.name,
8345                                                 instance.hypervisor)
8346       remote_info.Raise("Error checking instance on node %s" %
8347                         instance.primary_node)
8348       instance_running = bool(remote_info.payload)
8349       if instance_running:
8350         self.current_mem = int(remote_info.payload["memory"])
8351
8352   def _RunAllocator(self):
8353     """Run the allocator based on input opcode.
8354
8355     """
8356     # FIXME: add a self.ignore_ipolicy option
8357     ial = IAllocator(self.cfg, self.rpc,
8358                      mode=constants.IALLOCATOR_MODE_RELOC,
8359                      name=self.instance_name,
8360                      relocate_from=[self.instance.primary_node],
8361                      )
8362
8363     ial.Run(self.lu.op.iallocator)
8364
8365     if not ial.success:
8366       raise errors.OpPrereqError("Can't compute nodes using"
8367                                  " iallocator '%s': %s" %
8368                                  (self.lu.op.iallocator, ial.info),
8369                                  errors.ECODE_NORES)
8370     if len(ial.result) != ial.required_nodes:
8371       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8372                                  " of nodes (%s), required %s" %
8373                                  (self.lu.op.iallocator, len(ial.result),
8374                                   ial.required_nodes), errors.ECODE_FAULT)
8375     self.target_node = ial.result[0]
8376     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8377                  self.instance_name, self.lu.op.iallocator,
8378                  utils.CommaJoin(ial.result))
8379
8380   def _WaitUntilSync(self):
8381     """Poll with custom rpc for disk sync.
8382
8383     This uses our own step-based rpc call.
8384
8385     """
8386     self.feedback_fn("* wait until resync is done")
8387     all_done = False
8388     while not all_done:
8389       all_done = True
8390       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8391                                             self.nodes_ip,
8392                                             (self.instance.disks,
8393                                              self.instance))
8394       min_percent = 100
8395       for node, nres in result.items():
8396         nres.Raise("Cannot resync disks on node %s" % node)
8397         node_done, node_percent = nres.payload
8398         all_done = all_done and node_done
8399         if node_percent is not None:
8400           min_percent = min(min_percent, node_percent)
8401       if not all_done:
8402         if min_percent < 100:
8403           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8404         time.sleep(2)
8405
8406   def _EnsureSecondary(self, node):
8407     """Demote a node to secondary.
8408
8409     """
8410     self.feedback_fn("* switching node %s to secondary mode" % node)
8411
8412     for dev in self.instance.disks:
8413       self.cfg.SetDiskID(dev, node)
8414
8415     result = self.rpc.call_blockdev_close(node, self.instance.name,
8416                                           self.instance.disks)
8417     result.Raise("Cannot change disk to secondary on node %s" % node)
8418
8419   def _GoStandalone(self):
8420     """Disconnect from the network.
8421
8422     """
8423     self.feedback_fn("* changing into standalone mode")
8424     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8425                                                self.instance.disks)
8426     for node, nres in result.items():
8427       nres.Raise("Cannot disconnect disks node %s" % node)
8428
8429   def _GoReconnect(self, multimaster):
8430     """Reconnect to the network.
8431
8432     """
8433     if multimaster:
8434       msg = "dual-master"
8435     else:
8436       msg = "single-master"
8437     self.feedback_fn("* changing disks into %s mode" % msg)
8438     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8439                                            (self.instance.disks, self.instance),
8440                                            self.instance.name, multimaster)
8441     for node, nres in result.items():
8442       nres.Raise("Cannot change disks config on node %s" % node)
8443
8444   def _ExecCleanup(self):
8445     """Try to cleanup after a failed migration.
8446
8447     The cleanup is done by:
8448       - check that the instance is running only on one node
8449         (and update the config if needed)
8450       - change disks on its secondary node to secondary
8451       - wait until disks are fully synchronized
8452       - disconnect from the network
8453       - change disks into single-master mode
8454       - wait again until disks are fully synchronized
8455
8456     """
8457     instance = self.instance
8458     target_node = self.target_node
8459     source_node = self.source_node
8460
8461     # check running on only one node
8462     self.feedback_fn("* checking where the instance actually runs"
8463                      " (if this hangs, the hypervisor might be in"
8464                      " a bad state)")
8465     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8466     for node, result in ins_l.items():
8467       result.Raise("Can't contact node %s" % node)
8468
8469     runningon_source = instance.name in ins_l[source_node].payload
8470     runningon_target = instance.name in ins_l[target_node].payload
8471
8472     if runningon_source and runningon_target:
8473       raise errors.OpExecError("Instance seems to be running on two nodes,"
8474                                " or the hypervisor is confused; you will have"
8475                                " to ensure manually that it runs only on one"
8476                                " and restart this operation")
8477
8478     if not (runningon_source or runningon_target):
8479       raise errors.OpExecError("Instance does not seem to be running at all;"
8480                                " in this case it's safer to repair by"
8481                                " running 'gnt-instance stop' to ensure disk"
8482                                " shutdown, and then restarting it")
8483
8484     if runningon_target:
8485       # the migration has actually succeeded, we need to update the config
8486       self.feedback_fn("* instance running on secondary node (%s),"
8487                        " updating config" % target_node)
8488       instance.primary_node = target_node
8489       self.cfg.Update(instance, self.feedback_fn)
8490       demoted_node = source_node
8491     else:
8492       self.feedback_fn("* instance confirmed to be running on its"
8493                        " primary node (%s)" % source_node)
8494       demoted_node = target_node
8495
8496     if instance.disk_template in constants.DTS_INT_MIRROR:
8497       self._EnsureSecondary(demoted_node)
8498       try:
8499         self._WaitUntilSync()
8500       except errors.OpExecError:
8501         # we ignore here errors, since if the device is standalone, it
8502         # won't be able to sync
8503         pass
8504       self._GoStandalone()
8505       self._GoReconnect(False)
8506       self._WaitUntilSync()
8507
8508     self.feedback_fn("* done")
8509
8510   def _RevertDiskStatus(self):
8511     """Try to revert the disk status after a failed migration.
8512
8513     """
8514     target_node = self.target_node
8515     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8516       return
8517
8518     try:
8519       self._EnsureSecondary(target_node)
8520       self._GoStandalone()
8521       self._GoReconnect(False)
8522       self._WaitUntilSync()
8523     except errors.OpExecError, err:
8524       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8525                          " please try to recover the instance manually;"
8526                          " error '%s'" % str(err))
8527
8528   def _AbortMigration(self):
8529     """Call the hypervisor code to abort a started migration.
8530
8531     """
8532     instance = self.instance
8533     target_node = self.target_node
8534     source_node = self.source_node
8535     migration_info = self.migration_info
8536
8537     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8538                                                                  instance,
8539                                                                  migration_info,
8540                                                                  False)
8541     abort_msg = abort_result.fail_msg
8542     if abort_msg:
8543       logging.error("Aborting migration failed on target node %s: %s",
8544                     target_node, abort_msg)
8545       # Don't raise an exception here, as we stil have to try to revert the
8546       # disk status, even if this step failed.
8547
8548     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8549         instance, False, self.live)
8550     abort_msg = abort_result.fail_msg
8551     if abort_msg:
8552       logging.error("Aborting migration failed on source node %s: %s",
8553                     source_node, abort_msg)
8554
8555   def _ExecMigration(self):
8556     """Migrate an instance.
8557
8558     The migrate is done by:
8559       - change the disks into dual-master mode
8560       - wait until disks are fully synchronized again
8561       - migrate the instance
8562       - change disks on the new secondary node (the old primary) to secondary
8563       - wait until disks are fully synchronized
8564       - change disks into single-master mode
8565
8566     """
8567     instance = self.instance
8568     target_node = self.target_node
8569     source_node = self.source_node
8570
8571     # Check for hypervisor version mismatch and warn the user.
8572     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8573                                        None, [self.instance.hypervisor])
8574     for ninfo in nodeinfo.values():
8575       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8576                   ninfo.node)
8577     (_, _, (src_info, )) = nodeinfo[source_node].payload
8578     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8579
8580     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8581         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8582       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8583       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8584       if src_version != dst_version:
8585         self.feedback_fn("* warning: hypervisor version mismatch between"
8586                          " source (%s) and target (%s) node" %
8587                          (src_version, dst_version))
8588
8589     self.feedback_fn("* checking disk consistency between source and target")
8590     for (idx, dev) in enumerate(instance.disks):
8591       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8592         raise errors.OpExecError("Disk %s is degraded or not fully"
8593                                  " synchronized on target node,"
8594                                  " aborting migration" % idx)
8595
8596     if self.current_mem > self.tgt_free_mem:
8597       if not self.allow_runtime_changes:
8598         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8599                                  " free memory to fit instance %s on target"
8600                                  " node %s (have %dMB, need %dMB)" %
8601                                  (instance.name, target_node,
8602                                   self.tgt_free_mem, self.current_mem))
8603       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8604       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8605                                                      instance,
8606                                                      self.tgt_free_mem)
8607       rpcres.Raise("Cannot modify instance runtime memory")
8608
8609     # First get the migration information from the remote node
8610     result = self.rpc.call_migration_info(source_node, instance)
8611     msg = result.fail_msg
8612     if msg:
8613       log_err = ("Failed fetching source migration information from %s: %s" %
8614                  (source_node, msg))
8615       logging.error(log_err)
8616       raise errors.OpExecError(log_err)
8617
8618     self.migration_info = migration_info = result.payload
8619
8620     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8621       # Then switch the disks to master/master mode
8622       self._EnsureSecondary(target_node)
8623       self._GoStandalone()
8624       self._GoReconnect(True)
8625       self._WaitUntilSync()
8626
8627     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8628     result = self.rpc.call_accept_instance(target_node,
8629                                            instance,
8630                                            migration_info,
8631                                            self.nodes_ip[target_node])
8632
8633     msg = result.fail_msg
8634     if msg:
8635       logging.error("Instance pre-migration failed, trying to revert"
8636                     " disk status: %s", msg)
8637       self.feedback_fn("Pre-migration failed, aborting")
8638       self._AbortMigration()
8639       self._RevertDiskStatus()
8640       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8641                                (instance.name, msg))
8642
8643     self.feedback_fn("* migrating instance to %s" % target_node)
8644     result = self.rpc.call_instance_migrate(source_node, instance,
8645                                             self.nodes_ip[target_node],
8646                                             self.live)
8647     msg = result.fail_msg
8648     if msg:
8649       logging.error("Instance migration failed, trying to revert"
8650                     " disk status: %s", msg)
8651       self.feedback_fn("Migration failed, aborting")
8652       self._AbortMigration()
8653       self._RevertDiskStatus()
8654       raise errors.OpExecError("Could not migrate instance %s: %s" %
8655                                (instance.name, msg))
8656
8657     self.feedback_fn("* starting memory transfer")
8658     last_feedback = time.time()
8659     while True:
8660       result = self.rpc.call_instance_get_migration_status(source_node,
8661                                                            instance)
8662       msg = result.fail_msg
8663       ms = result.payload   # MigrationStatus instance
8664       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8665         logging.error("Instance migration failed, trying to revert"
8666                       " disk status: %s", msg)
8667         self.feedback_fn("Migration failed, aborting")
8668         self._AbortMigration()
8669         self._RevertDiskStatus()
8670         raise errors.OpExecError("Could not migrate instance %s: %s" %
8671                                  (instance.name, msg))
8672
8673       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8674         self.feedback_fn("* memory transfer complete")
8675         break
8676
8677       if (utils.TimeoutExpired(last_feedback,
8678                                self._MIGRATION_FEEDBACK_INTERVAL) and
8679           ms.transferred_ram is not None):
8680         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8681         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8682         last_feedback = time.time()
8683
8684       time.sleep(self._MIGRATION_POLL_INTERVAL)
8685
8686     result = self.rpc.call_instance_finalize_migration_src(source_node,
8687                                                            instance,
8688                                                            True,
8689                                                            self.live)
8690     msg = result.fail_msg
8691     if msg:
8692       logging.error("Instance migration succeeded, but finalization failed"
8693                     " on the source node: %s", msg)
8694       raise errors.OpExecError("Could not finalize instance migration: %s" %
8695                                msg)
8696
8697     instance.primary_node = target_node
8698
8699     # distribute new instance config to the other nodes
8700     self.cfg.Update(instance, self.feedback_fn)
8701
8702     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8703                                                            instance,
8704                                                            migration_info,
8705                                                            True)
8706     msg = result.fail_msg
8707     if msg:
8708       logging.error("Instance migration succeeded, but finalization failed"
8709                     " on the target node: %s", msg)
8710       raise errors.OpExecError("Could not finalize instance migration: %s" %
8711                                msg)
8712
8713     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8714       self._EnsureSecondary(source_node)
8715       self._WaitUntilSync()
8716       self._GoStandalone()
8717       self._GoReconnect(False)
8718       self._WaitUntilSync()
8719
8720     # If the instance's disk template is `rbd' or `ext' and there was a
8721     # successful migration, unmap the device from the source node.
8722     if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
8723       disks = _ExpandCheckDisks(instance, instance.disks)
8724       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8725       for disk in disks:
8726         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8727         msg = result.fail_msg
8728         if msg:
8729           logging.error("Migration was successful, but couldn't unmap the"
8730                         " block device %s on source node %s: %s",
8731                         disk.iv_name, source_node, msg)
8732           logging.error("You need to unmap the device %s manually on %s",
8733                         disk.iv_name, source_node)
8734
8735     self.feedback_fn("* done")
8736
8737   def _ExecFailover(self):
8738     """Failover an instance.
8739
8740     The failover is done by shutting it down on its present node and
8741     starting it on the secondary.
8742
8743     """
8744     instance = self.instance
8745     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8746
8747     source_node = instance.primary_node
8748     target_node = self.target_node
8749
8750     if instance.admin_state == constants.ADMINST_UP:
8751       self.feedback_fn("* checking disk consistency between source and target")
8752       for (idx, dev) in enumerate(instance.disks):
8753         # for drbd, these are drbd over lvm
8754         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8755                                      False):
8756           if primary_node.offline:
8757             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8758                              " target node %s" %
8759                              (primary_node.name, idx, target_node))
8760           elif not self.ignore_consistency:
8761             raise errors.OpExecError("Disk %s is degraded on target node,"
8762                                      " aborting failover" % idx)
8763     else:
8764       self.feedback_fn("* not checking disk consistency as instance is not"
8765                        " running")
8766
8767     self.feedback_fn("* shutting down instance on source node")
8768     logging.info("Shutting down instance %s on node %s",
8769                  instance.name, source_node)
8770
8771     result = self.rpc.call_instance_shutdown(source_node, instance,
8772                                              self.shutdown_timeout)
8773     msg = result.fail_msg
8774     if msg:
8775       if self.ignore_consistency or primary_node.offline:
8776         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8777                            " proceeding anyway; please make sure node"
8778                            " %s is down; error details: %s",
8779                            instance.name, source_node, source_node, msg)
8780       else:
8781         raise errors.OpExecError("Could not shutdown instance %s on"
8782                                  " node %s: %s" %
8783                                  (instance.name, source_node, msg))
8784
8785     self.feedback_fn("* deactivating the instance's disks on source node")
8786     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8787       raise errors.OpExecError("Can't shut down the instance's disks")
8788
8789     instance.primary_node = target_node
8790     # distribute new instance config to the other nodes
8791     self.cfg.Update(instance, self.feedback_fn)
8792
8793     # Only start the instance if it's marked as up
8794     if instance.admin_state == constants.ADMINST_UP:
8795       self.feedback_fn("* activating the instance's disks on target node %s" %
8796                        target_node)
8797       logging.info("Starting instance %s on node %s",
8798                    instance.name, target_node)
8799
8800       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8801                                            ignore_secondaries=True)
8802       if not disks_ok:
8803         _ShutdownInstanceDisks(self.lu, instance)
8804         raise errors.OpExecError("Can't activate the instance's disks")
8805
8806       self.feedback_fn("* starting the instance on the target node %s" %
8807                        target_node)
8808       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8809                                             False)
8810       msg = result.fail_msg
8811       if msg:
8812         _ShutdownInstanceDisks(self.lu, instance)
8813         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8814                                  (instance.name, target_node, msg))
8815
8816   def Exec(self, feedback_fn):
8817     """Perform the migration.
8818
8819     """
8820     self.feedback_fn = feedback_fn
8821     self.source_node = self.instance.primary_node
8822
8823     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8824     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8825       self.target_node = self.instance.secondary_nodes[0]
8826       # Otherwise self.target_node has been populated either
8827       # directly, or through an iallocator.
8828
8829     self.all_nodes = [self.source_node, self.target_node]
8830     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8831                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8832
8833     if self.failover:
8834       feedback_fn("Failover instance %s" % self.instance.name)
8835       self._ExecFailover()
8836     else:
8837       feedback_fn("Migrating instance %s" % self.instance.name)
8838
8839       if self.cleanup:
8840         return self._ExecCleanup()
8841       else:
8842         return self._ExecMigration()
8843
8844
8845 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8846                     force_open):
8847   """Wrapper around L{_CreateBlockDevInner}.
8848
8849   This method annotates the root device first.
8850
8851   """
8852   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8853   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8854                               force_open)
8855
8856
8857 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8858                          info, force_open):
8859   """Create a tree of block devices on a given node.
8860
8861   If this device type has to be created on secondaries, create it and
8862   all its children.
8863
8864   If not, just recurse to children keeping the same 'force' value.
8865
8866   @attention: The device has to be annotated already.
8867
8868   @param lu: the lu on whose behalf we execute
8869   @param node: the node on which to create the device
8870   @type instance: L{objects.Instance}
8871   @param instance: the instance which owns the device
8872   @type device: L{objects.Disk}
8873   @param device: the device to create
8874   @type force_create: boolean
8875   @param force_create: whether to force creation of this device; this
8876       will be change to True whenever we find a device which has
8877       CreateOnSecondary() attribute
8878   @param info: the extra 'metadata' we should attach to the device
8879       (this will be represented as a LVM tag)
8880   @type force_open: boolean
8881   @param force_open: this parameter will be passes to the
8882       L{backend.BlockdevCreate} function where it specifies
8883       whether we run on primary or not, and it affects both
8884       the child assembly and the device own Open() execution
8885
8886   """
8887   if device.CreateOnSecondary():
8888     force_create = True
8889
8890   if device.children:
8891     for child in device.children:
8892       _CreateBlockDevInner(lu, node, instance, child, force_create,
8893                            info, force_open)
8894
8895   if not force_create:
8896     return
8897
8898   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8899
8900
8901 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8902   """Create a single block device on a given node.
8903
8904   This will not recurse over children of the device, so they must be
8905   created in advance.
8906
8907   @param lu: the lu on whose behalf we execute
8908   @param node: the node on which to create the device
8909   @type instance: L{objects.Instance}
8910   @param instance: the instance which owns the device
8911   @type device: L{objects.Disk}
8912   @param device: the device to create
8913   @param info: the extra 'metadata' we should attach to the device
8914       (this will be represented as a LVM tag)
8915   @type force_open: boolean
8916   @param force_open: this parameter will be passes to the
8917       L{backend.BlockdevCreate} function where it specifies
8918       whether we run on primary or not, and it affects both
8919       the child assembly and the device own Open() execution
8920
8921   """
8922   lu.cfg.SetDiskID(device, node)
8923   result = lu.rpc.call_blockdev_create(node, device, device.size,
8924                                        instance.name, force_open, info)
8925   result.Raise("Can't create block device %s on"
8926                " node %s for instance %s" % (device, node, instance.name))
8927   if device.physical_id is None:
8928     device.physical_id = result.payload
8929
8930
8931 def _GenerateUniqueNames(lu, exts):
8932   """Generate a suitable LV name.
8933
8934   This will generate a logical volume name for the given instance.
8935
8936   """
8937   results = []
8938   for val in exts:
8939     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8940     results.append("%s%s" % (new_id, val))
8941   return results
8942
8943 def _GetPCIInfo(lu, dev_type):
8944
8945   if lu.op.hotplug:
8946     if hasattr(lu, 'hotplug_info'):
8947       info = lu.hotplug_info
8948     elif hasattr(lu, 'instance') and hasattr(lu.instance, 'hotplug_info'):
8949       return lu.cfg.GetPCIInfo(lu.instance.name, dev_type)
8950
8951     if info:
8952       idx = getattr(info, dev_type)
8953       setattr(info, dev_type, idx+1)
8954       pci = info.pci_pool.pop()
8955       return idx, pci
8956
8957   return None, None
8958
8959
8960 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8961                          iv_name, p_minor, s_minor):
8962   """Generate a drbd8 device complete with its children.
8963
8964   """
8965   assert len(vgnames) == len(names) == 2
8966   port = lu.cfg.AllocatePort()
8967   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8968
8969   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8970                           logical_id=(vgnames[0], names[0]),
8971                           params={})
8972   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8973                           logical_id=(vgnames[1], names[1]),
8974                           params={})
8975
8976   disk_idx, pci = _GetPCIInfo(lu, 'disks')
8977   drbd_dev = objects.Disk(idx=disk_idx, pci=pci,
8978                           dev_type=constants.LD_DRBD8, size=size,
8979                           logical_id=(primary, secondary, port,
8980                                       p_minor, s_minor,
8981                                       shared_secret),
8982                           children=[dev_data, dev_meta],
8983                           iv_name=iv_name, params={})
8984   return drbd_dev
8985
8986
8987 _DISK_TEMPLATE_NAME_PREFIX = {
8988   constants.DT_PLAIN: "",
8989   constants.DT_RBD: ".rbd",
8990   constants.DT_EXT: ".ext",
8991   }
8992
8993
8994 _DISK_TEMPLATE_DEVICE_TYPE = {
8995   constants.DT_PLAIN: constants.LD_LV,
8996   constants.DT_FILE: constants.LD_FILE,
8997   constants.DT_SHARED_FILE: constants.LD_FILE,
8998   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8999   constants.DT_RBD: constants.LD_RBD,
9000   constants.DT_EXT: constants.LD_EXT,
9001   }
9002
9003
9004 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
9005     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
9006     feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9007     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9008   """Generate the entire disk layout for a given template type.
9009
9010   """
9011   #TODO: compute space requirements
9012
9013   vgname = lu.cfg.GetVGName()
9014   disk_count = len(disk_info)
9015   disks = []
9016
9017   if template_name == constants.DT_DISKLESS:
9018     pass
9019   elif template_name == constants.DT_DRBD8:
9020     if len(secondary_nodes) != 1:
9021       raise errors.ProgrammerError("Wrong template configuration")
9022     remote_node = secondary_nodes[0]
9023     minors = lu.cfg.AllocateDRBDMinor(
9024       [primary_node, remote_node] * len(disk_info), instance_name)
9025
9026     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9027                                                        full_disk_params)
9028     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9029
9030     names = []
9031     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9032                                                for i in range(disk_count)]):
9033       names.append(lv_prefix + "_data")
9034       names.append(lv_prefix + "_meta")
9035     for idx, disk in enumerate(disk_info):
9036       disk_index = idx + base_index
9037       data_vg = disk.get(constants.IDISK_VG, vgname)
9038       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9039       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9040                                       disk[constants.IDISK_SIZE],
9041                                       [data_vg, meta_vg],
9042                                       names[idx * 2:idx * 2 + 2],
9043                                       "disk/%d" % disk_index,
9044                                       minors[idx * 2], minors[idx * 2 + 1])
9045       disk_dev.mode = disk[constants.IDISK_MODE]
9046       disks.append(disk_dev)
9047   else:
9048     if secondary_nodes:
9049       raise errors.ProgrammerError("Wrong template configuration")
9050
9051     if template_name == constants.DT_FILE:
9052       _req_file_storage()
9053     elif template_name == constants.DT_SHARED_FILE:
9054       _req_shr_file_storage()
9055
9056     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9057     if name_prefix is None:
9058       names = None
9059     else:
9060       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9061                                         (name_prefix, base_index + i)
9062                                         for i in range(disk_count)])
9063
9064     if template_name == constants.DT_PLAIN:
9065       def logical_id_fn(idx, _, disk):
9066         vg = disk.get(constants.IDISK_VG, vgname)
9067         return (vg, names[idx])
9068     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9069       logical_id_fn = \
9070         lambda _, disk_index, disk: (file_driver,
9071                                      "%s/disk%d" % (file_storage_dir,
9072                                                     disk_index))
9073     elif template_name == constants.DT_BLOCK:
9074       logical_id_fn = \
9075         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9076                                        disk[constants.IDISK_ADOPT])
9077     elif template_name == constants.DT_RBD:
9078       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9079     elif template_name == constants.DT_EXT:
9080       def logical_id_fn(idx, _, disk):
9081         provider = disk.get(constants.IDISK_PROVIDER, None)
9082         if provider is None:
9083           raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9084                                        " not found", constants.DT_EXT,
9085                                        constants.IDISK_PROVIDER)
9086         return (provider, names[idx])
9087     else:
9088       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9089
9090     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9091
9092     for idx, disk in enumerate(disk_info):
9093       params={}
9094       # Only for the Ext template add disk_info to params
9095       if template_name == constants.DT_EXT:
9096         params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9097         for key in disk:
9098           if key not in constants.IDISK_PARAMS:
9099             params[key] = disk[key]
9100       disk_index = idx + base_index
9101       size = disk[constants.IDISK_SIZE]
9102       feedback_fn("* disk %s, size %s" %
9103                   (disk_index, utils.FormatUnit(size, "h")))
9104
9105       disk_idx, pci = _GetPCIInfo(lu, 'disks')
9106
9107       disks.append(objects.Disk(dev_type=dev_type, size=size,
9108                                 logical_id=logical_id_fn(idx, disk_index, disk),
9109                                 iv_name="disk/%d" % disk_index,
9110                                 mode=disk[constants.IDISK_MODE],
9111                                 params=params, idx=disk_idx, pci=pci))
9112
9113   return disks
9114
9115
9116 def _GetInstanceInfoText(instance):
9117   """Compute that text that should be added to the disk's metadata.
9118
9119   """
9120   return "originstname+%s" % instance.name
9121
9122
9123 def _CalcEta(time_taken, written, total_size):
9124   """Calculates the ETA based on size written and total size.
9125
9126   @param time_taken: The time taken so far
9127   @param written: amount written so far
9128   @param total_size: The total size of data to be written
9129   @return: The remaining time in seconds
9130
9131   """
9132   avg_time = time_taken / float(written)
9133   return (total_size - written) * avg_time
9134
9135
9136 def _WipeDisks(lu, instance):
9137   """Wipes instance disks.
9138
9139   @type lu: L{LogicalUnit}
9140   @param lu: the logical unit on whose behalf we execute
9141   @type instance: L{objects.Instance}
9142   @param instance: the instance whose disks we should create
9143   @return: the success of the wipe
9144
9145   """
9146   node = instance.primary_node
9147
9148   for device in instance.disks:
9149     lu.cfg.SetDiskID(device, node)
9150
9151   logging.info("Pause sync of instance %s disks", instance.name)
9152   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9153                                                   (instance.disks, instance),
9154                                                   True)
9155   result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
9156
9157   for idx, success in enumerate(result.payload):
9158     if not success:
9159       logging.warn("pause-sync of instance %s for disks %d failed",
9160                    instance.name, idx)
9161
9162   try:
9163     for idx, device in enumerate(instance.disks):
9164       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9165       # MAX_WIPE_CHUNK at max
9166       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9167                             constants.MIN_WIPE_CHUNK_PERCENT)
9168       # we _must_ make this an int, otherwise rounding errors will
9169       # occur
9170       wipe_chunk_size = int(wipe_chunk_size)
9171
9172       lu.LogInfo("* Wiping disk %d", idx)
9173       logging.info("Wiping disk %d for instance %s, node %s using"
9174                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9175
9176       offset = 0
9177       size = device.size
9178       last_output = 0
9179       start_time = time.time()
9180
9181       while offset < size:
9182         wipe_size = min(wipe_chunk_size, size - offset)
9183         logging.debug("Wiping disk %d, offset %s, chunk %s",
9184                       idx, offset, wipe_size)
9185         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9186                                            wipe_size)
9187         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9188                      (idx, offset, wipe_size))
9189         now = time.time()
9190         offset += wipe_size
9191         if now - last_output >= 60:
9192           eta = _CalcEta(now - start_time, offset, size)
9193           lu.LogInfo(" - done: %.1f%% ETA: %s" %
9194                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
9195           last_output = now
9196   finally:
9197     logging.info("Resume sync of instance %s disks", instance.name)
9198
9199     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9200                                                     (instance.disks, instance),
9201                                                     False)
9202
9203     if result.fail_msg:
9204       lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9205                     " please have a look at the status and troubleshoot"
9206                     " the issue: %s", node, result.fail_msg)
9207     else:
9208       for idx, success in enumerate(result.payload):
9209         if not success:
9210           lu.LogWarning("Resume sync of disk %d failed, please have a"
9211                         " look at the status and troubleshoot the issue", idx)
9212           logging.warn("resume-sync of instance %s for disks %d failed",
9213                        instance.name, idx)
9214
9215
9216 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9217   """Create all disks for an instance.
9218
9219   This abstracts away some work from AddInstance.
9220
9221   @type lu: L{LogicalUnit}
9222   @param lu: the logical unit on whose behalf we execute
9223   @type instance: L{objects.Instance}
9224   @param instance: the instance whose disks we should create
9225   @type to_skip: list
9226   @param to_skip: list of indices to skip
9227   @type target_node: string
9228   @param target_node: if passed, overrides the target node for creation
9229   @rtype: boolean
9230   @return: the success of the creation
9231
9232   """
9233   info = _GetInstanceInfoText(instance)
9234   if target_node is None:
9235     pnode = instance.primary_node
9236     all_nodes = instance.all_nodes
9237   else:
9238     pnode = target_node
9239     all_nodes = [pnode]
9240
9241   if instance.disk_template in constants.DTS_FILEBASED:
9242     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9243     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9244
9245     result.Raise("Failed to create directory '%s' on"
9246                  " node %s" % (file_storage_dir, pnode))
9247
9248   # Note: this needs to be kept in sync with adding of disks in
9249   # LUInstanceSetParams
9250   for idx, device in enumerate(instance.disks):
9251     if to_skip and idx in to_skip:
9252       continue
9253     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9254     #HARDCODE
9255     for node in all_nodes:
9256       f_create = node == pnode
9257       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9258
9259
9260 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9261   """Remove all disks for an instance.
9262
9263   This abstracts away some work from `AddInstance()` and
9264   `RemoveInstance()`. Note that in case some of the devices couldn't
9265   be removed, the removal will continue with the other ones (compare
9266   with `_CreateDisks()`).
9267
9268   @type lu: L{LogicalUnit}
9269   @param lu: the logical unit on whose behalf we execute
9270   @type instance: L{objects.Instance}
9271   @param instance: the instance whose disks we should remove
9272   @type target_node: string
9273   @param target_node: used to override the node on which to remove the disks
9274   @rtype: boolean
9275   @return: the success of the removal
9276
9277   """
9278   logging.info("Removing block devices for instance %s", instance.name)
9279
9280   all_result = True
9281   ports_to_release = set()
9282   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9283   for (idx, device) in enumerate(anno_disks):
9284     if target_node:
9285       edata = [(target_node, device)]
9286     else:
9287       edata = device.ComputeNodeTree(instance.primary_node)
9288     for node, disk in edata:
9289       lu.cfg.SetDiskID(disk, node)
9290       result = lu.rpc.call_blockdev_remove(node, disk)
9291       if result.fail_msg:
9292         lu.LogWarning("Could not remove disk %s on node %s,"
9293                       " continuing anyway: %s", idx, node, result.fail_msg)
9294         if not (result.offline and node != instance.primary_node):
9295           all_result = False
9296
9297     # if this is a DRBD disk, return its port to the pool
9298     if device.dev_type in constants.LDS_DRBD:
9299       ports_to_release.add(device.logical_id[2])
9300
9301   if all_result or ignore_failures:
9302     for port in ports_to_release:
9303       lu.cfg.AddTcpUdpPort(port)
9304
9305   if instance.disk_template == constants.DT_FILE:
9306     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9307     if target_node:
9308       tgt = target_node
9309     else:
9310       tgt = instance.primary_node
9311     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9312     if result.fail_msg:
9313       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9314                     file_storage_dir, instance.primary_node, result.fail_msg)
9315       all_result = False
9316
9317   return all_result
9318
9319
9320 def _ComputeDiskSizePerVG(disk_template, disks):
9321   """Compute disk size requirements in the volume group
9322
9323   """
9324   def _compute(disks, payload):
9325     """Universal algorithm.
9326
9327     """
9328     vgs = {}
9329     for disk in disks:
9330       vgs[disk[constants.IDISK_VG]] = \
9331         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9332
9333     return vgs
9334
9335   # Required free disk space as a function of disk and swap space
9336   req_size_dict = {
9337     constants.DT_DISKLESS: {},
9338     constants.DT_PLAIN: _compute(disks, 0),
9339     # 128 MB are added for drbd metadata for each disk
9340     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9341     constants.DT_FILE: {},
9342     constants.DT_SHARED_FILE: {},
9343   }
9344
9345   if disk_template not in req_size_dict:
9346     raise errors.ProgrammerError("Disk template '%s' size requirement"
9347                                  " is unknown" % disk_template)
9348
9349   return req_size_dict[disk_template]
9350
9351
9352 def _ComputeDiskSize(disk_template, disks):
9353   """Compute disk size requirements according to disk template
9354
9355   """
9356   # Required free disk space as a function of disk and swap space
9357   req_size_dict = {
9358     constants.DT_DISKLESS: None,
9359     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9360     # 128 MB are added for drbd metadata for each disk
9361     constants.DT_DRBD8:
9362       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9363     constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9364     constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9365     constants.DT_BLOCK: 0,
9366     constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9367     constants.DT_EXT: sum(d[constants.IDISK_SIZE] for d in disks),
9368   }
9369
9370   if disk_template not in req_size_dict:
9371     raise errors.ProgrammerError("Disk template '%s' size requirement"
9372                                  " is unknown" % disk_template)
9373
9374   return req_size_dict[disk_template]
9375
9376
9377 def _FilterVmNodes(lu, nodenames):
9378   """Filters out non-vm_capable nodes from a list.
9379
9380   @type lu: L{LogicalUnit}
9381   @param lu: the logical unit for which we check
9382   @type nodenames: list
9383   @param nodenames: the list of nodes on which we should check
9384   @rtype: list
9385   @return: the list of vm-capable nodes
9386
9387   """
9388   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9389   return [name for name in nodenames if name not in vm_nodes]
9390
9391
9392 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9393   """Hypervisor parameter validation.
9394
9395   This function abstract the hypervisor parameter validation to be
9396   used in both instance create and instance modify.
9397
9398   @type lu: L{LogicalUnit}
9399   @param lu: the logical unit for which we check
9400   @type nodenames: list
9401   @param nodenames: the list of nodes on which we should check
9402   @type hvname: string
9403   @param hvname: the name of the hypervisor we should use
9404   @type hvparams: dict
9405   @param hvparams: the parameters which we need to check
9406   @raise errors.OpPrereqError: if the parameters are not valid
9407
9408   """
9409   nodenames = _FilterVmNodes(lu, nodenames)
9410
9411   cluster = lu.cfg.GetClusterInfo()
9412   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9413
9414   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9415   for node in nodenames:
9416     info = hvinfo[node]
9417     if info.offline:
9418       continue
9419     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9420
9421
9422 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9423   """OS parameters validation.
9424
9425   @type lu: L{LogicalUnit}
9426   @param lu: the logical unit for which we check
9427   @type required: boolean
9428   @param required: whether the validation should fail if the OS is not
9429       found
9430   @type nodenames: list
9431   @param nodenames: the list of nodes on which we should check
9432   @type osname: string
9433   @param osname: the name of the hypervisor we should use
9434   @type osparams: dict
9435   @param osparams: the parameters which we need to check
9436   @raise errors.OpPrereqError: if the parameters are not valid
9437
9438   """
9439   nodenames = _FilterVmNodes(lu, nodenames)
9440   result = lu.rpc.call_os_validate(nodenames, required, osname,
9441                                    [constants.OS_VALIDATE_PARAMETERS],
9442                                    osparams)
9443   for node, nres in result.items():
9444     # we don't check for offline cases since this should be run only
9445     # against the master node and/or an instance's nodes
9446     nres.Raise("OS Parameters validation failed on node %s" % node)
9447     if not nres.payload:
9448       lu.LogInfo("OS %s not found on node %s, validation skipped",
9449                  osname, node)
9450
9451
9452 class LUInstanceCreate(LogicalUnit):
9453   """Create an instance.
9454
9455   """
9456   HPATH = "instance-add"
9457   HTYPE = constants.HTYPE_INSTANCE
9458   REQ_BGL = False
9459
9460   def CheckArguments(self):
9461     """Check arguments.
9462
9463     """
9464     # do not require name_check to ease forward/backward compatibility
9465     # for tools
9466     if self.op.no_install and self.op.start:
9467       self.LogInfo("No-installation mode selected, disabling startup")
9468       self.op.start = False
9469     # validate/normalize the instance name
9470     self.op.instance_name = \
9471       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9472
9473     if self.op.ip_check and not self.op.name_check:
9474       # TODO: make the ip check more flexible and not depend on the name check
9475       raise errors.OpPrereqError("Cannot do IP address check without a name"
9476                                  " check", errors.ECODE_INVAL)
9477
9478     # check nics' parameter names
9479     for nic in self.op.nics:
9480       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9481
9482     # check disks. parameter names and consistent adopt/no-adopt strategy
9483     has_adopt = has_no_adopt = False
9484     for disk in self.op.disks:
9485       if self.op.disk_template != constants.DT_EXT:
9486         utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9487       if constants.IDISK_ADOPT in disk:
9488         has_adopt = True
9489       else:
9490         has_no_adopt = True
9491     if has_adopt and has_no_adopt:
9492       raise errors.OpPrereqError("Either all disks are adopted or none is",
9493                                  errors.ECODE_INVAL)
9494     if has_adopt:
9495       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9496         raise errors.OpPrereqError("Disk adoption is not supported for the"
9497                                    " '%s' disk template" %
9498                                    self.op.disk_template,
9499                                    errors.ECODE_INVAL)
9500       if self.op.iallocator is not None:
9501         raise errors.OpPrereqError("Disk adoption not allowed with an"
9502                                    " iallocator script", errors.ECODE_INVAL)
9503       if self.op.mode == constants.INSTANCE_IMPORT:
9504         raise errors.OpPrereqError("Disk adoption not allowed for"
9505                                    " instance import", errors.ECODE_INVAL)
9506     else:
9507       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9508         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9509                                    " but no 'adopt' parameter given" %
9510                                    self.op.disk_template,
9511                                    errors.ECODE_INVAL)
9512
9513     self.adopt_disks = has_adopt
9514
9515     # instance name verification
9516     if self.op.name_check:
9517       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9518       self.op.instance_name = self.hostname1.name
9519       # used in CheckPrereq for ip ping check
9520       self.check_ip = self.hostname1.ip
9521     else:
9522       self.check_ip = None
9523
9524     # file storage checks
9525     if (self.op.file_driver and
9526         not self.op.file_driver in constants.FILE_DRIVER):
9527       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9528                                  self.op.file_driver, errors.ECODE_INVAL)
9529
9530     if self.op.disk_template == constants.DT_FILE:
9531       opcodes.RequireFileStorage()
9532     elif self.op.disk_template == constants.DT_SHARED_FILE:
9533       opcodes.RequireSharedFileStorage()
9534
9535     ### Node/iallocator related checks
9536     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9537
9538     if self.op.pnode is not None:
9539       if self.op.disk_template in constants.DTS_INT_MIRROR:
9540         if self.op.snode is None:
9541           raise errors.OpPrereqError("The networked disk templates need"
9542                                      " a mirror node", errors.ECODE_INVAL)
9543       elif self.op.snode:
9544         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9545                         " template")
9546         self.op.snode = None
9547
9548     self._cds = _GetClusterDomainSecret()
9549
9550     if self.op.mode == constants.INSTANCE_IMPORT:
9551       # On import force_variant must be True, because if we forced it at
9552       # initial install, our only chance when importing it back is that it
9553       # works again!
9554       self.op.force_variant = True
9555
9556       if self.op.no_install:
9557         self.LogInfo("No-installation mode has no effect during import")
9558
9559     elif self.op.mode == constants.INSTANCE_CREATE:
9560       if self.op.os_type is None:
9561         raise errors.OpPrereqError("No guest OS specified",
9562                                    errors.ECODE_INVAL)
9563       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9564         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9565                                    " installation" % self.op.os_type,
9566                                    errors.ECODE_STATE)
9567       if self.op.disk_template is None:
9568         raise errors.OpPrereqError("No disk template specified",
9569                                    errors.ECODE_INVAL)
9570
9571     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9572       # Check handshake to ensure both clusters have the same domain secret
9573       src_handshake = self.op.source_handshake
9574       if not src_handshake:
9575         raise errors.OpPrereqError("Missing source handshake",
9576                                    errors.ECODE_INVAL)
9577
9578       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9579                                                            src_handshake)
9580       if errmsg:
9581         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9582                                    errors.ECODE_INVAL)
9583
9584       # Load and check source CA
9585       self.source_x509_ca_pem = self.op.source_x509_ca
9586       if not self.source_x509_ca_pem:
9587         raise errors.OpPrereqError("Missing source X509 CA",
9588                                    errors.ECODE_INVAL)
9589
9590       try:
9591         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9592                                                     self._cds)
9593       except OpenSSL.crypto.Error, err:
9594         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9595                                    (err, ), errors.ECODE_INVAL)
9596
9597       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9598       if errcode is not None:
9599         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9600                                    errors.ECODE_INVAL)
9601
9602       self.source_x509_ca = cert
9603
9604       src_instance_name = self.op.source_instance_name
9605       if not src_instance_name:
9606         raise errors.OpPrereqError("Missing source instance name",
9607                                    errors.ECODE_INVAL)
9608
9609       self.source_instance_name = \
9610           netutils.GetHostname(name=src_instance_name).name
9611
9612     else:
9613       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9614                                  self.op.mode, errors.ECODE_INVAL)
9615
9616   def ExpandNames(self):
9617     """ExpandNames for CreateInstance.
9618
9619     Figure out the right locks for instance creation.
9620
9621     """
9622     self.needed_locks = {}
9623
9624     instance_name = self.op.instance_name
9625     # this is just a preventive check, but someone might still add this
9626     # instance in the meantime, and creation will fail at lock-add time
9627     if instance_name in self.cfg.GetInstanceList():
9628       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9629                                  instance_name, errors.ECODE_EXISTS)
9630
9631     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9632
9633     if self.op.iallocator:
9634       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9635       # specifying a group on instance creation and then selecting nodes from
9636       # that group
9637       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9638       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9639     else:
9640       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9641       nodelist = [self.op.pnode]
9642       if self.op.snode is not None:
9643         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9644         nodelist.append(self.op.snode)
9645       self.needed_locks[locking.LEVEL_NODE] = nodelist
9646       # Lock resources of instance's primary and secondary nodes (copy to
9647       # prevent accidential modification)
9648       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9649
9650     # in case of import lock the source node too
9651     if self.op.mode == constants.INSTANCE_IMPORT:
9652       src_node = self.op.src_node
9653       src_path = self.op.src_path
9654
9655       if src_path is None:
9656         self.op.src_path = src_path = self.op.instance_name
9657
9658       if src_node is None:
9659         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9660         self.op.src_node = None
9661         if os.path.isabs(src_path):
9662           raise errors.OpPrereqError("Importing an instance from a path"
9663                                      " requires a source node option",
9664                                      errors.ECODE_INVAL)
9665       else:
9666         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9667         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9668           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9669         if not os.path.isabs(src_path):
9670           self.op.src_path = src_path = \
9671             utils.PathJoin(constants.EXPORT_DIR, src_path)
9672
9673   def _RunAllocator(self):
9674     """Run the allocator based on input opcode.
9675
9676     """
9677     #TODO Export network to iallocator so that it chooses a pnode
9678     #     in a nodegroup that has the desired network connected to
9679     nics = [n.ToDict() for n in self.nics]
9680     ial = IAllocator(self.cfg, self.rpc,
9681                      mode=constants.IALLOCATOR_MODE_ALLOC,
9682                      name=self.op.instance_name,
9683                      disk_template=self.op.disk_template,
9684                      tags=self.op.tags,
9685                      os=self.op.os_type,
9686                      vcpus=self.be_full[constants.BE_VCPUS],
9687                      memory=self.be_full[constants.BE_MAXMEM],
9688                      spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9689                      disks=self.disks,
9690                      nics=nics,
9691                      hypervisor=self.op.hypervisor,
9692                      )
9693
9694     ial.Run(self.op.iallocator)
9695
9696     if not ial.success:
9697       raise errors.OpPrereqError("Can't compute nodes using"
9698                                  " iallocator '%s': %s" %
9699                                  (self.op.iallocator, ial.info),
9700                                  errors.ECODE_NORES)
9701     if len(ial.result) != ial.required_nodes:
9702       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9703                                  " of nodes (%s), required %s" %
9704                                  (self.op.iallocator, len(ial.result),
9705                                   ial.required_nodes), errors.ECODE_FAULT)
9706     self.op.pnode = ial.result[0]
9707     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9708                  self.op.instance_name, self.op.iallocator,
9709                  utils.CommaJoin(ial.result))
9710     if ial.required_nodes == 2:
9711       self.op.snode = ial.result[1]
9712
9713   def BuildHooksEnv(self):
9714     """Build hooks env.
9715
9716     This runs on master, primary and secondary nodes of the instance.
9717
9718     """
9719     env = {
9720       "ADD_MODE": self.op.mode,
9721       }
9722     if self.op.mode == constants.INSTANCE_IMPORT:
9723       env["SRC_NODE"] = self.op.src_node
9724       env["SRC_PATH"] = self.op.src_path
9725       env["SRC_IMAGES"] = self.src_images
9726
9727     env.update(_BuildInstanceHookEnv(
9728       name=self.op.instance_name,
9729       primary_node=self.op.pnode,
9730       secondary_nodes=self.secondaries,
9731       status=self.op.start,
9732       os_type=self.op.os_type,
9733       minmem=self.be_full[constants.BE_MINMEM],
9734       maxmem=self.be_full[constants.BE_MAXMEM],
9735       vcpus=self.be_full[constants.BE_VCPUS],
9736       nics=_NICListToTuple(self, self.nics),
9737       disk_template=self.op.disk_template,
9738       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9739              for d in self.disks],
9740       bep=self.be_full,
9741       hvp=self.hv_full,
9742       hypervisor_name=self.op.hypervisor,
9743       tags=self.op.tags,
9744     ))
9745
9746     return env
9747
9748   def BuildHooksNodes(self):
9749     """Build hooks nodes.
9750
9751     """
9752     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9753     return nl, nl
9754
9755   def _ReadExportInfo(self):
9756     """Reads the export information from disk.
9757
9758     It will override the opcode source node and path with the actual
9759     information, if these two were not specified before.
9760
9761     @return: the export information
9762
9763     """
9764     assert self.op.mode == constants.INSTANCE_IMPORT
9765
9766     src_node = self.op.src_node
9767     src_path = self.op.src_path
9768
9769     if src_node is None:
9770       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9771       exp_list = self.rpc.call_export_list(locked_nodes)
9772       found = False
9773       for node in exp_list:
9774         if exp_list[node].fail_msg:
9775           continue
9776         if src_path in exp_list[node].payload:
9777           found = True
9778           self.op.src_node = src_node = node
9779           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9780                                                        src_path)
9781           break
9782       if not found:
9783         raise errors.OpPrereqError("No export found for relative path %s" %
9784                                     src_path, errors.ECODE_INVAL)
9785
9786     _CheckNodeOnline(self, src_node)
9787     result = self.rpc.call_export_info(src_node, src_path)
9788     result.Raise("No export or invalid export found in dir %s" % src_path)
9789
9790     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9791     if not export_info.has_section(constants.INISECT_EXP):
9792       raise errors.ProgrammerError("Corrupted export config",
9793                                    errors.ECODE_ENVIRON)
9794
9795     ei_version = export_info.get(constants.INISECT_EXP, "version")
9796     if (int(ei_version) != constants.EXPORT_VERSION):
9797       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9798                                  (ei_version, constants.EXPORT_VERSION),
9799                                  errors.ECODE_ENVIRON)
9800     return export_info
9801
9802   def _ReadExportParams(self, einfo):
9803     """Use export parameters as defaults.
9804
9805     In case the opcode doesn't specify (as in override) some instance
9806     parameters, then try to use them from the export information, if
9807     that declares them.
9808
9809     """
9810     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9811
9812     if self.op.disk_template is None:
9813       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9814         self.op.disk_template = einfo.get(constants.INISECT_INS,
9815                                           "disk_template")
9816         if self.op.disk_template not in constants.DISK_TEMPLATES:
9817           raise errors.OpPrereqError("Disk template specified in configuration"
9818                                      " file is not one of the allowed values:"
9819                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9820       else:
9821         raise errors.OpPrereqError("No disk template specified and the export"
9822                                    " is missing the disk_template information",
9823                                    errors.ECODE_INVAL)
9824
9825     if not self.op.disks:
9826       disks = []
9827       # TODO: import the disk iv_name too
9828       for idx in range(constants.MAX_DISKS):
9829         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9830           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9831           disks.append({constants.IDISK_SIZE: disk_sz})
9832       self.op.disks = disks
9833       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9834         raise errors.OpPrereqError("No disk info specified and the export"
9835                                    " is missing the disk information",
9836                                    errors.ECODE_INVAL)
9837
9838     if not self.op.nics:
9839       nics = []
9840       for idx in range(constants.MAX_NICS):
9841         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9842           ndict = {}
9843           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9844             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9845             ndict[name] = v
9846           nics.append(ndict)
9847         else:
9848           break
9849       self.op.nics = nics
9850
9851     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9852       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9853
9854     if (self.op.hypervisor is None and
9855         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9856       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9857
9858     if einfo.has_section(constants.INISECT_HYP):
9859       # use the export parameters but do not override the ones
9860       # specified by the user
9861       for name, value in einfo.items(constants.INISECT_HYP):
9862         if name not in self.op.hvparams:
9863           self.op.hvparams[name] = value
9864
9865     if einfo.has_section(constants.INISECT_BEP):
9866       # use the parameters, without overriding
9867       for name, value in einfo.items(constants.INISECT_BEP):
9868         if name not in self.op.beparams:
9869           self.op.beparams[name] = value
9870         # Compatibility for the old "memory" be param
9871         if name == constants.BE_MEMORY:
9872           if constants.BE_MAXMEM not in self.op.beparams:
9873             self.op.beparams[constants.BE_MAXMEM] = value
9874           if constants.BE_MINMEM not in self.op.beparams:
9875             self.op.beparams[constants.BE_MINMEM] = value
9876     else:
9877       # try to read the parameters old style, from the main section
9878       for name in constants.BES_PARAMETERS:
9879         if (name not in self.op.beparams and
9880             einfo.has_option(constants.INISECT_INS, name)):
9881           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9882
9883     if einfo.has_section(constants.INISECT_OSP):
9884       # use the parameters, without overriding
9885       for name, value in einfo.items(constants.INISECT_OSP):
9886         if name not in self.op.osparams:
9887           self.op.osparams[name] = value
9888
9889   def _RevertToDefaults(self, cluster):
9890     """Revert the instance parameters to the default values.
9891
9892     """
9893     # hvparams
9894     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9895     for name in self.op.hvparams.keys():
9896       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9897         del self.op.hvparams[name]
9898     # beparams
9899     be_defs = cluster.SimpleFillBE({})
9900     for name in self.op.beparams.keys():
9901       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9902         del self.op.beparams[name]
9903     # nic params
9904     nic_defs = cluster.SimpleFillNIC({})
9905     for nic in self.op.nics:
9906       for name in constants.NICS_PARAMETERS:
9907         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9908           del nic[name]
9909     # osparams
9910     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9911     for name in self.op.osparams.keys():
9912       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9913         del self.op.osparams[name]
9914
9915   def _CalculateFileStorageDir(self):
9916     """Calculate final instance file storage dir.
9917
9918     """
9919     # file storage dir calculation/check
9920     self.instance_file_storage_dir = None
9921     if self.op.disk_template in constants.DTS_FILEBASED:
9922       # build the full file storage dir path
9923       joinargs = []
9924
9925       if self.op.disk_template == constants.DT_SHARED_FILE:
9926         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9927       else:
9928         get_fsd_fn = self.cfg.GetFileStorageDir
9929
9930       cfg_storagedir = get_fsd_fn()
9931       if not cfg_storagedir:
9932         raise errors.OpPrereqError("Cluster file storage dir not defined")
9933       joinargs.append(cfg_storagedir)
9934
9935       if self.op.file_storage_dir is not None:
9936         joinargs.append(self.op.file_storage_dir)
9937
9938       joinargs.append(self.op.instance_name)
9939
9940       # pylint: disable=W0142
9941       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9942
9943   def CheckPrereq(self): # pylint: disable=R0914
9944     """Check prerequisites.
9945
9946     """
9947     self._CalculateFileStorageDir()
9948
9949     if self.op.mode == constants.INSTANCE_IMPORT:
9950       export_info = self._ReadExportInfo()
9951       self._ReadExportParams(export_info)
9952       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9953     else:
9954       self._old_instance_name = None
9955
9956     if (not self.cfg.GetVGName() and
9957         self.op.disk_template not in constants.DTS_NOT_LVM):
9958       raise errors.OpPrereqError("Cluster does not support lvm-based"
9959                                  " instances", errors.ECODE_STATE)
9960
9961     if (self.op.hypervisor is None or
9962         self.op.hypervisor == constants.VALUE_AUTO):
9963       self.op.hypervisor = self.cfg.GetHypervisorType()
9964
9965     cluster = self.cfg.GetClusterInfo()
9966     enabled_hvs = cluster.enabled_hypervisors
9967     if self.op.hypervisor not in enabled_hvs:
9968       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9969                                  " cluster (%s)" % (self.op.hypervisor,
9970                                   ",".join(enabled_hvs)),
9971                                  errors.ECODE_STATE)
9972
9973     # Check tag validity
9974     for tag in self.op.tags:
9975       objects.TaggableObject.ValidateTag(tag)
9976
9977     # check hypervisor parameter syntax (locally)
9978     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9979     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9980                                       self.op.hvparams)
9981     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9982     hv_type.CheckParameterSyntax(filled_hvp)
9983     self.hv_full = filled_hvp
9984     # check that we don't specify global parameters on an instance
9985     _CheckGlobalHvParams(self.op.hvparams)
9986
9987     # fill and remember the beparams dict
9988     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9989     for param, value in self.op.beparams.iteritems():
9990       if value == constants.VALUE_AUTO:
9991         self.op.beparams[param] = default_beparams[param]
9992     objects.UpgradeBeParams(self.op.beparams)
9993     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9994     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9995
9996     # build os parameters
9997     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9998
9999     # now that hvp/bep are in final format, let's reset to defaults,
10000     # if told to do so
10001     if self.op.identify_defaults:
10002       self._RevertToDefaults(cluster)
10003
10004     self.hotplug_info = None
10005     if self.op.hotplug:
10006       self.hotplug_info = objects.HotplugInfo(disks=0, nics=0,
10007                                               pci_pool=list(range(16,32)))
10008     # NIC buildup
10009     self.nics = []
10010     for idx, nic in enumerate(self.op.nics):
10011       nic_mode_req = nic.get(constants.INIC_MODE, None)
10012       nic_mode = nic_mode_req
10013       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
10014         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
10015
10016       net = nic.get(constants.INIC_NETWORK, None)
10017       link = nic.get(constants.NIC_LINK, None)
10018       ip = nic.get(constants.INIC_IP, None)
10019
10020       if net is None or net.lower() == constants.VALUE_NONE:
10021         net = None
10022       else:
10023         if nic_mode_req is not None or link is not None:
10024           raise errors.OpPrereqError("If network is given, no mode or link"
10025                                      " is allowed to be passed",
10026                                      errors.ECODE_INVAL)
10027
10028       # ip validity checks
10029       if ip is None or ip.lower() == constants.VALUE_NONE:
10030         nic_ip = None
10031       elif ip.lower() == constants.VALUE_AUTO:
10032         if not self.op.name_check:
10033           raise errors.OpPrereqError("IP address set to auto but name checks"
10034                                      " have been skipped",
10035                                      errors.ECODE_INVAL)
10036         nic_ip = self.hostname1.ip
10037       else:
10038         # We defer pool operations until later, so that the iallocator has
10039         # filled in the instance's node(s) dimara
10040         if ip.lower() == constants.NIC_IP_POOL:
10041           if net is None:
10042             raise errors.OpPrereqError("if ip=pool, parameter network"
10043                                        " must be passed too",
10044                                        errors.ECODE_INVAL)
10045
10046         elif not netutils.IPAddress.IsValid(ip):
10047           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
10048                                      errors.ECODE_INVAL)
10049
10050         nic_ip = ip
10051
10052       # TODO: check the ip address for uniqueness
10053       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
10054         raise errors.OpPrereqError("Routed nic mode requires an ip address",
10055                                    errors.ECODE_INVAL)
10056
10057       # MAC address verification
10058       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
10059       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10060         mac = utils.NormalizeAndValidateMac(mac)
10061
10062         try:
10063           self.cfg.ReserveMAC(mac, self.proc.GetECId())
10064         except errors.ReservationError:
10065           raise errors.OpPrereqError("MAC address %s already in use"
10066                                      " in cluster" % mac,
10067                                      errors.ECODE_NOTUNIQUE)
10068
10069       #  Build nic parameters
10070       nicparams = {}
10071       if nic_mode_req:
10072         nicparams[constants.NIC_MODE] = nic_mode
10073       if link:
10074         nicparams[constants.NIC_LINK] = link
10075
10076       check_params = cluster.SimpleFillNIC(nicparams)
10077       objects.NIC.CheckParameterSyntax(check_params)
10078       nic_idx, pci = _GetPCIInfo(self, 'nics')
10079       self.nics.append(objects.NIC(idx=nic_idx, pci=pci,
10080                                    mac=mac, ip=nic_ip, network=net,
10081                                    nicparams=check_params))
10082
10083     # disk checks/pre-build
10084     default_vg = self.cfg.GetVGName()
10085     self.disks = []
10086     for disk in self.op.disks:
10087       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
10088       if mode not in constants.DISK_ACCESS_SET:
10089         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
10090                                    mode, errors.ECODE_INVAL)
10091       size = disk.get(constants.IDISK_SIZE, None)
10092       if size is None:
10093         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
10094       try:
10095         size = int(size)
10096       except (TypeError, ValueError):
10097         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
10098                                    errors.ECODE_INVAL)
10099
10100       ext_provider = disk.get(constants.IDISK_PROVIDER, None)
10101       if ext_provider and self.op.disk_template != constants.DT_EXT:
10102         raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10103                                    " disk template, not %s" %
10104                                    (constants.IDISK_PROVIDER, constants.DT_EXT,
10105                                    self.op.disk_template), errors.ECODE_INVAL)
10106
10107       data_vg = disk.get(constants.IDISK_VG, default_vg)
10108       new_disk = {
10109         constants.IDISK_SIZE: size,
10110         constants.IDISK_MODE: mode,
10111         constants.IDISK_VG: data_vg,
10112         }
10113
10114       if constants.IDISK_METAVG in disk:
10115         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10116       if constants.IDISK_ADOPT in disk:
10117         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10118
10119       # For extstorage, demand the `provider' option and add any
10120       # additional parameters (ext-params) to the dict
10121       if self.op.disk_template == constants.DT_EXT:
10122         if ext_provider:
10123           new_disk[constants.IDISK_PROVIDER] = ext_provider
10124           for key in disk:
10125             if key not in constants.IDISK_PARAMS:
10126               new_disk[key] = disk[key]
10127         else:
10128           raise errors.OpPrereqError("Missing provider for template '%s'" %
10129                                      constants.DT_EXT, errors.ECODE_INVAL)
10130
10131       self.disks.append(new_disk)
10132
10133     if self.op.mode == constants.INSTANCE_IMPORT:
10134       disk_images = []
10135       for idx in range(len(self.disks)):
10136         option = "disk%d_dump" % idx
10137         if export_info.has_option(constants.INISECT_INS, option):
10138           # FIXME: are the old os-es, disk sizes, etc. useful?
10139           export_name = export_info.get(constants.INISECT_INS, option)
10140           image = utils.PathJoin(self.op.src_path, export_name)
10141           disk_images.append(image)
10142         else:
10143           disk_images.append(False)
10144
10145       self.src_images = disk_images
10146
10147       if self.op.instance_name == self._old_instance_name:
10148         for idx, nic in enumerate(self.nics):
10149           if nic.mac == constants.VALUE_AUTO:
10150             nic_mac_ini = "nic%d_mac" % idx
10151             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10152
10153     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10154
10155     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10156     if self.op.ip_check:
10157       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10158         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10159                                    (self.check_ip, self.op.instance_name),
10160                                    errors.ECODE_NOTUNIQUE)
10161
10162     #### mac address generation
10163     # By generating here the mac address both the allocator and the hooks get
10164     # the real final mac address rather than the 'auto' or 'generate' value.
10165     # There is a race condition between the generation and the instance object
10166     # creation, which means that we know the mac is valid now, but we're not
10167     # sure it will be when we actually add the instance. If things go bad
10168     # adding the instance will abort because of a duplicate mac, and the
10169     # creation job will fail.
10170     for nic in self.nics:
10171       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10172         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10173
10174     #### allocator run
10175
10176     if self.op.iallocator is not None:
10177       self._RunAllocator()
10178
10179     # Release all unneeded node locks
10180     _ReleaseLocks(self, locking.LEVEL_NODE,
10181                   keep=filter(None, [self.op.pnode, self.op.snode,
10182                                      self.op.src_node]))
10183     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10184                   keep=filter(None, [self.op.pnode, self.op.snode,
10185                                      self.op.src_node]))
10186
10187     #### node related checks
10188
10189     # check primary node
10190     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10191     assert self.pnode is not None, \
10192       "Cannot retrieve locked node %s" % self.op.pnode
10193     if pnode.offline:
10194       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10195                                  pnode.name, errors.ECODE_STATE)
10196     if pnode.drained:
10197       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10198                                  pnode.name, errors.ECODE_STATE)
10199     if not pnode.vm_capable:
10200       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10201                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10202
10203     self.secondaries = []
10204
10205     # Fill in any IPs from IP pools. This must happen here, because we need to
10206     # know the nic's primary node, as specified by the iallocator
10207     for idx, nic in enumerate(self.nics):
10208       net = nic.network
10209       if net is not None:
10210         netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10211         if netparams is None:
10212           raise errors.OpPrereqError("No netparams found for network"
10213                                      " %s. Propably not connected to"
10214                                      " node's %s nodegroup" %
10215                                      (net, self.pnode.name),
10216                                      errors.ECODE_INVAL)
10217         self.LogInfo("NIC/%d inherits netparams %s" %
10218                      (idx, netparams.values()))
10219         nic.nicparams = dict(netparams)
10220         if nic.ip is not None:
10221           filled_params = cluster.SimpleFillNIC(nic.nicparams)
10222           if nic.ip.lower() == constants.NIC_IP_POOL:
10223             try:
10224               nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10225             except errors.ReservationError:
10226               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10227                                          " from the address pool" % idx,
10228                                          errors.ECODE_STATE)
10229             self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10230           else:
10231             try:
10232               self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10233             except errors.ReservationError:
10234               raise errors.OpPrereqError("IP address %s already in use"
10235                                          " or does not belong to network %s" %
10236                                          (nic.ip, net),
10237                                          errors.ECODE_NOTUNIQUE)
10238       else:
10239         # net is None, ip None or given
10240         if self.op.conflicts_check:
10241           _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10242
10243
10244     # mirror node verification
10245     if self.op.disk_template in constants.DTS_INT_MIRROR:
10246       if self.op.snode == pnode.name:
10247         raise errors.OpPrereqError("The secondary node cannot be the"
10248                                    " primary node", errors.ECODE_INVAL)
10249       _CheckNodeOnline(self, self.op.snode)
10250       _CheckNodeNotDrained(self, self.op.snode)
10251       _CheckNodeVmCapable(self, self.op.snode)
10252       self.secondaries.append(self.op.snode)
10253
10254       snode = self.cfg.GetNodeInfo(self.op.snode)
10255       if pnode.group != snode.group:
10256         self.LogWarning("The primary and secondary nodes are in two"
10257                         " different node groups; the disk parameters"
10258                         " from the first disk's node group will be"
10259                         " used")
10260
10261     nodenames = [pnode.name] + self.secondaries
10262
10263     # Verify instance specs
10264     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10265     ispec = {
10266       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10267       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10268       constants.ISPEC_DISK_COUNT: len(self.disks),
10269       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10270       constants.ISPEC_NIC_COUNT: len(self.nics),
10271       constants.ISPEC_SPINDLE_USE: spindle_use,
10272       }
10273
10274     group_info = self.cfg.GetNodeGroup(pnode.group)
10275     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
10276     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10277     if not self.op.ignore_ipolicy and res:
10278       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10279                                   " policy: %s") % (pnode.group,
10280                                                     utils.CommaJoin(res)),
10281                                   errors.ECODE_INVAL)
10282
10283     if not self.adopt_disks:
10284       if self.op.disk_template == constants.DT_RBD:
10285         # _CheckRADOSFreeSpace() is just a placeholder.
10286         # Any function that checks prerequisites can be placed here.
10287         # Check if there is enough space on the RADOS cluster.
10288         _CheckRADOSFreeSpace()
10289       elif self.op.disk_template == constants.DT_EXT:
10290         # FIXME: Function that checks prereqs if needed
10291         pass
10292       else:
10293         # Check lv size requirements, if not adopting
10294         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10295         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10296
10297     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10298       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10299                                 disk[constants.IDISK_ADOPT])
10300                      for disk in self.disks])
10301       if len(all_lvs) != len(self.disks):
10302         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10303                                    errors.ECODE_INVAL)
10304       for lv_name in all_lvs:
10305         try:
10306           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10307           # to ReserveLV uses the same syntax
10308           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10309         except errors.ReservationError:
10310           raise errors.OpPrereqError("LV named %s used by another instance" %
10311                                      lv_name, errors.ECODE_NOTUNIQUE)
10312
10313       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10314       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10315
10316       node_lvs = self.rpc.call_lv_list([pnode.name],
10317                                        vg_names.payload.keys())[pnode.name]
10318       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10319       node_lvs = node_lvs.payload
10320
10321       delta = all_lvs.difference(node_lvs.keys())
10322       if delta:
10323         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10324                                    utils.CommaJoin(delta),
10325                                    errors.ECODE_INVAL)
10326       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10327       if online_lvs:
10328         raise errors.OpPrereqError("Online logical volumes found, cannot"
10329                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10330                                    errors.ECODE_STATE)
10331       # update the size of disk based on what is found
10332       for dsk in self.disks:
10333         dsk[constants.IDISK_SIZE] = \
10334           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10335                                         dsk[constants.IDISK_ADOPT])][0]))
10336
10337     elif self.op.disk_template == constants.DT_BLOCK:
10338       # Normalize and de-duplicate device paths
10339       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10340                        for disk in self.disks])
10341       if len(all_disks) != len(self.disks):
10342         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10343                                    errors.ECODE_INVAL)
10344       baddisks = [d for d in all_disks
10345                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10346       if baddisks:
10347         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10348                                    " cannot be adopted" %
10349                                    (", ".join(baddisks),
10350                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10351                                    errors.ECODE_INVAL)
10352
10353       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10354                                             list(all_disks))[pnode.name]
10355       node_disks.Raise("Cannot get block device information from node %s" %
10356                        pnode.name)
10357       node_disks = node_disks.payload
10358       delta = all_disks.difference(node_disks.keys())
10359       if delta:
10360         raise errors.OpPrereqError("Missing block device(s): %s" %
10361                                    utils.CommaJoin(delta),
10362                                    errors.ECODE_INVAL)
10363       for dsk in self.disks:
10364         dsk[constants.IDISK_SIZE] = \
10365           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10366
10367     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10368
10369     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10370     # check OS parameters (remotely)
10371     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10372
10373     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10374
10375     #TODO: _CheckExtParams (remotely)
10376     # Check parameters for extstorage
10377
10378     # memory check on primary node
10379     #TODO(dynmem): use MINMEM for checking
10380     if self.op.start:
10381       _CheckNodeFreeMemory(self, self.pnode.name,
10382                            "creating instance %s" % self.op.instance_name,
10383                            self.be_full[constants.BE_MAXMEM],
10384                            self.op.hypervisor)
10385
10386     self.dry_run_result = list(nodenames)
10387
10388   def Exec(self, feedback_fn):
10389     """Create and add the instance to the cluster.
10390
10391     """
10392     instance = self.op.instance_name
10393     pnode_name = self.pnode.name
10394
10395     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10396                 self.owned_locks(locking.LEVEL_NODE)), \
10397       "Node locks differ from node resource locks"
10398
10399     ht_kind = self.op.hypervisor
10400     if ht_kind in constants.HTS_REQ_PORT:
10401       network_port = self.cfg.AllocatePort()
10402     else:
10403       network_port = None
10404
10405     # This is ugly but we got a chicken-egg problem here
10406     # We can only take the group disk parameters, as the instance
10407     # has no disks yet (we are generating them right here).
10408     node = self.cfg.GetNodeInfo(pnode_name)
10409     nodegroup = self.cfg.GetNodeGroup(node.group)
10410     disks = _GenerateDiskTemplate(self,
10411                                   self.op.disk_template,
10412                                   instance, pnode_name,
10413                                   self.secondaries,
10414                                   self.disks,
10415                                   self.instance_file_storage_dir,
10416                                   self.op.file_driver,
10417                                   0,
10418                                   feedback_fn,
10419                                   self.cfg.GetGroupDiskParams(nodegroup))
10420
10421     iobj = objects.Instance(name=instance, os=self.op.os_type,
10422                             primary_node=pnode_name,
10423                             nics=self.nics, disks=disks,
10424                             disk_template=self.op.disk_template,
10425                             admin_state=constants.ADMINST_DOWN,
10426                             network_port=network_port,
10427                             beparams=self.op.beparams,
10428                             hvparams=self.op.hvparams,
10429                             hypervisor=self.op.hypervisor,
10430                             osparams=self.op.osparams,
10431                             hotplug_info=self.hotplug_info,
10432                             )
10433
10434     if self.op.tags:
10435       for tag in self.op.tags:
10436         iobj.AddTag(tag)
10437
10438     if self.adopt_disks:
10439       if self.op.disk_template == constants.DT_PLAIN:
10440         # rename LVs to the newly-generated names; we need to construct
10441         # 'fake' LV disks with the old data, plus the new unique_id
10442         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10443         rename_to = []
10444         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10445           rename_to.append(t_dsk.logical_id)
10446           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10447           self.cfg.SetDiskID(t_dsk, pnode_name)
10448         result = self.rpc.call_blockdev_rename(pnode_name,
10449                                                zip(tmp_disks, rename_to))
10450         result.Raise("Failed to rename adoped LVs")
10451     else:
10452       feedback_fn("* creating instance disks...")
10453       try:
10454         _CreateDisks(self, iobj)
10455       except errors.OpExecError:
10456         self.LogWarning("Device creation failed, reverting...")
10457         try:
10458           _RemoveDisks(self, iobj)
10459         finally:
10460           self.cfg.ReleaseDRBDMinors(instance)
10461           raise
10462
10463     feedback_fn("adding instance %s to cluster config" % instance)
10464
10465     self.cfg.AddInstance(iobj, self.proc.GetECId())
10466
10467     # Declare that we don't want to remove the instance lock anymore, as we've
10468     # added the instance to the config
10469     del self.remove_locks[locking.LEVEL_INSTANCE]
10470
10471     if self.op.mode == constants.INSTANCE_IMPORT:
10472       # Release unused nodes
10473       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10474     else:
10475       # Release all nodes
10476       _ReleaseLocks(self, locking.LEVEL_NODE)
10477
10478     disk_abort = False
10479     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10480       feedback_fn("* wiping instance disks...")
10481       try:
10482         _WipeDisks(self, iobj)
10483       except errors.OpExecError, err:
10484         logging.exception("Wiping disks failed")
10485         self.LogWarning("Wiping instance disks failed (%s)", err)
10486         disk_abort = True
10487
10488     if disk_abort:
10489       # Something is already wrong with the disks, don't do anything else
10490       pass
10491     elif self.op.wait_for_sync:
10492       disk_abort = not _WaitForSync(self, iobj)
10493     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10494       # make sure the disks are not degraded (still sync-ing is ok)
10495       feedback_fn("* checking mirrors status")
10496       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10497     else:
10498       disk_abort = False
10499
10500     if disk_abort:
10501       _RemoveDisks(self, iobj)
10502       self.cfg.RemoveInstance(iobj.name)
10503       # Make sure the instance lock gets removed
10504       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10505       raise errors.OpExecError("There are some degraded disks for"
10506                                " this instance")
10507
10508     # Release all node resource locks
10509     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10510
10511     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10512       # we need to set the disks ID to the primary node, since the
10513       # preceding code might or might have not done it, depending on
10514       # disk template and other options
10515       for disk in iobj.disks:
10516         self.cfg.SetDiskID(disk, pnode_name)
10517       if self.op.mode == constants.INSTANCE_CREATE:
10518         if not self.op.no_install:
10519           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10520                         not self.op.wait_for_sync)
10521           if pause_sync:
10522             feedback_fn("* pausing disk sync to install instance OS")
10523             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10524                                                               (iobj.disks,
10525                                                                iobj), True)
10526             for idx, success in enumerate(result.payload):
10527               if not success:
10528                 logging.warn("pause-sync of instance %s for disk %d failed",
10529                              instance, idx)
10530
10531           feedback_fn("* running the instance OS create scripts...")
10532           # FIXME: pass debug option from opcode to backend
10533           os_add_result = \
10534             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10535                                           self.op.debug_level)
10536           if pause_sync:
10537             feedback_fn("* resuming disk sync")
10538             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10539                                                               (iobj.disks,
10540                                                                iobj), False)
10541             for idx, success in enumerate(result.payload):
10542               if not success:
10543                 logging.warn("resume-sync of instance %s for disk %d failed",
10544                              instance, idx)
10545
10546           os_add_result.Raise("Could not add os for instance %s"
10547                               " on node %s" % (instance, pnode_name))
10548
10549       else:
10550         if self.op.mode == constants.INSTANCE_IMPORT:
10551           feedback_fn("* running the instance OS import scripts...")
10552
10553           transfers = []
10554
10555           for idx, image in enumerate(self.src_images):
10556             if not image:
10557               continue
10558
10559             # FIXME: pass debug option from opcode to backend
10560             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10561                                                constants.IEIO_FILE, (image, ),
10562                                                constants.IEIO_SCRIPT,
10563                                                (iobj.disks[idx], idx),
10564                                                None)
10565             transfers.append(dt)
10566
10567           import_result = \
10568             masterd.instance.TransferInstanceData(self, feedback_fn,
10569                                                   self.op.src_node, pnode_name,
10570                                                   self.pnode.secondary_ip,
10571                                                   iobj, transfers)
10572           if not compat.all(import_result):
10573             self.LogWarning("Some disks for instance %s on node %s were not"
10574                             " imported successfully" % (instance, pnode_name))
10575
10576           rename_from = self._old_instance_name
10577
10578         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10579           feedback_fn("* preparing remote import...")
10580           # The source cluster will stop the instance before attempting to make
10581           # a connection. In some cases stopping an instance can take a long
10582           # time, hence the shutdown timeout is added to the connection
10583           # timeout.
10584           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10585                              self.op.source_shutdown_timeout)
10586           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10587
10588           assert iobj.primary_node == self.pnode.name
10589           disk_results = \
10590             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10591                                           self.source_x509_ca,
10592                                           self._cds, timeouts)
10593           if not compat.all(disk_results):
10594             # TODO: Should the instance still be started, even if some disks
10595             # failed to import (valid for local imports, too)?
10596             self.LogWarning("Some disks for instance %s on node %s were not"
10597                             " imported successfully" % (instance, pnode_name))
10598
10599           rename_from = self.source_instance_name
10600
10601         else:
10602           # also checked in the prereq part
10603           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10604                                        % self.op.mode)
10605
10606         # Run rename script on newly imported instance
10607         assert iobj.name == instance
10608         feedback_fn("Running rename script for %s" % instance)
10609         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10610                                                    rename_from,
10611                                                    self.op.debug_level)
10612         if result.fail_msg:
10613           self.LogWarning("Failed to run rename script for %s on node"
10614                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10615
10616     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10617
10618     if self.op.start:
10619       iobj.admin_state = constants.ADMINST_UP
10620       self.cfg.Update(iobj, feedback_fn)
10621       logging.info("Starting instance %s on node %s", instance, pnode_name)
10622       feedback_fn("* starting instance...")
10623       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10624                                             False)
10625       result.Raise("Could not start instance")
10626
10627     return list(iobj.all_nodes)
10628
10629
10630 def _CheckRADOSFreeSpace():
10631   """Compute disk size requirements inside the RADOS cluster.
10632
10633   """
10634   # For the RADOS cluster we assume there is always enough space.
10635   pass
10636
10637
10638 class LUInstanceConsole(NoHooksLU):
10639   """Connect to an instance's console.
10640
10641   This is somewhat special in that it returns the command line that
10642   you need to run on the master node in order to connect to the
10643   console.
10644
10645   """
10646   REQ_BGL = False
10647
10648   def ExpandNames(self):
10649     self.share_locks = _ShareAll()
10650     self._ExpandAndLockInstance()
10651
10652   def CheckPrereq(self):
10653     """Check prerequisites.
10654
10655     This checks that the instance is in the cluster.
10656
10657     """
10658     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10659     assert self.instance is not None, \
10660       "Cannot retrieve locked instance %s" % self.op.instance_name
10661     _CheckNodeOnline(self, self.instance.primary_node)
10662
10663   def Exec(self, feedback_fn):
10664     """Connect to the console of an instance
10665
10666     """
10667     instance = self.instance
10668     node = instance.primary_node
10669
10670     node_insts = self.rpc.call_instance_list([node],
10671                                              [instance.hypervisor])[node]
10672     node_insts.Raise("Can't get node information from %s" % node)
10673
10674     if instance.name not in node_insts.payload:
10675       if instance.admin_state == constants.ADMINST_UP:
10676         state = constants.INSTST_ERRORDOWN
10677       elif instance.admin_state == constants.ADMINST_DOWN:
10678         state = constants.INSTST_ADMINDOWN
10679       else:
10680         state = constants.INSTST_ADMINOFFLINE
10681       raise errors.OpExecError("Instance %s is not running (state %s)" %
10682                                (instance.name, state))
10683
10684     logging.debug("Connecting to console of %s on %s", instance.name, node)
10685
10686     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10687
10688
10689 def _GetInstanceConsole(cluster, instance):
10690   """Returns console information for an instance.
10691
10692   @type cluster: L{objects.Cluster}
10693   @type instance: L{objects.Instance}
10694   @rtype: dict
10695
10696   """
10697   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10698   # beparams and hvparams are passed separately, to avoid editing the
10699   # instance and then saving the defaults in the instance itself.
10700   hvparams = cluster.FillHV(instance)
10701   beparams = cluster.FillBE(instance)
10702   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10703
10704   assert console.instance == instance.name
10705   assert console.Validate()
10706
10707   return console.ToDict()
10708
10709
10710 class LUInstanceReplaceDisks(LogicalUnit):
10711   """Replace the disks of an instance.
10712
10713   """
10714   HPATH = "mirrors-replace"
10715   HTYPE = constants.HTYPE_INSTANCE
10716   REQ_BGL = False
10717
10718   def CheckArguments(self):
10719     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10720                                   self.op.iallocator)
10721
10722   def ExpandNames(self):
10723     self._ExpandAndLockInstance()
10724
10725     assert locking.LEVEL_NODE not in self.needed_locks
10726     assert locking.LEVEL_NODE_RES not in self.needed_locks
10727     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10728
10729     assert self.op.iallocator is None or self.op.remote_node is None, \
10730       "Conflicting options"
10731
10732     if self.op.remote_node is not None:
10733       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10734
10735       # Warning: do not remove the locking of the new secondary here
10736       # unless DRBD8.AddChildren is changed to work in parallel;
10737       # currently it doesn't since parallel invocations of
10738       # FindUnusedMinor will conflict
10739       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10740       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10741     else:
10742       self.needed_locks[locking.LEVEL_NODE] = []
10743       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10744
10745       if self.op.iallocator is not None:
10746         # iallocator will select a new node in the same group
10747         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10748
10749     self.needed_locks[locking.LEVEL_NODE_RES] = []
10750
10751     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10752                                    self.op.iallocator, self.op.remote_node,
10753                                    self.op.disks, False, self.op.early_release,
10754                                    self.op.ignore_ipolicy)
10755
10756     self.tasklets = [self.replacer]
10757
10758   def DeclareLocks(self, level):
10759     if level == locking.LEVEL_NODEGROUP:
10760       assert self.op.remote_node is None
10761       assert self.op.iallocator is not None
10762       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10763
10764       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10765       # Lock all groups used by instance optimistically; this requires going
10766       # via the node before it's locked, requiring verification later on
10767       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10768         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10769
10770     elif level == locking.LEVEL_NODE:
10771       if self.op.iallocator is not None:
10772         assert self.op.remote_node is None
10773         assert not self.needed_locks[locking.LEVEL_NODE]
10774
10775         # Lock member nodes of all locked groups
10776         self.needed_locks[locking.LEVEL_NODE] = [node_name
10777           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10778           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10779       else:
10780         self._LockInstancesNodes()
10781     elif level == locking.LEVEL_NODE_RES:
10782       # Reuse node locks
10783       self.needed_locks[locking.LEVEL_NODE_RES] = \
10784         self.needed_locks[locking.LEVEL_NODE]
10785
10786   def BuildHooksEnv(self):
10787     """Build hooks env.
10788
10789     This runs on the master, the primary and all the secondaries.
10790
10791     """
10792     instance = self.replacer.instance
10793     env = {
10794       "MODE": self.op.mode,
10795       "NEW_SECONDARY": self.op.remote_node,
10796       "OLD_SECONDARY": instance.secondary_nodes[0],
10797       }
10798     env.update(_BuildInstanceHookEnvByObject(self, instance))
10799     return env
10800
10801   def BuildHooksNodes(self):
10802     """Build hooks nodes.
10803
10804     """
10805     instance = self.replacer.instance
10806     nl = [
10807       self.cfg.GetMasterNode(),
10808       instance.primary_node,
10809       ]
10810     if self.op.remote_node is not None:
10811       nl.append(self.op.remote_node)
10812     return nl, nl
10813
10814   def CheckPrereq(self):
10815     """Check prerequisites.
10816
10817     """
10818     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10819             self.op.iallocator is None)
10820
10821     # Verify if node group locks are still correct
10822     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10823     if owned_groups:
10824       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10825
10826     return LogicalUnit.CheckPrereq(self)
10827
10828
10829 class TLReplaceDisks(Tasklet):
10830   """Replaces disks for an instance.
10831
10832   Note: Locking is not within the scope of this class.
10833
10834   """
10835   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10836                disks, delay_iallocator, early_release, ignore_ipolicy):
10837     """Initializes this class.
10838
10839     """
10840     Tasklet.__init__(self, lu)
10841
10842     # Parameters
10843     self.instance_name = instance_name
10844     self.mode = mode
10845     self.iallocator_name = iallocator_name
10846     self.remote_node = remote_node
10847     self.disks = disks
10848     self.delay_iallocator = delay_iallocator
10849     self.early_release = early_release
10850     self.ignore_ipolicy = ignore_ipolicy
10851
10852     # Runtime data
10853     self.instance = None
10854     self.new_node = None
10855     self.target_node = None
10856     self.other_node = None
10857     self.remote_node_info = None
10858     self.node_secondary_ip = None
10859
10860   @staticmethod
10861   def CheckArguments(mode, remote_node, iallocator):
10862     """Helper function for users of this class.
10863
10864     """
10865     # check for valid parameter combination
10866     if mode == constants.REPLACE_DISK_CHG:
10867       if remote_node is None and iallocator is None:
10868         raise errors.OpPrereqError("When changing the secondary either an"
10869                                    " iallocator script must be used or the"
10870                                    " new node given", errors.ECODE_INVAL)
10871
10872       if remote_node is not None and iallocator is not None:
10873         raise errors.OpPrereqError("Give either the iallocator or the new"
10874                                    " secondary, not both", errors.ECODE_INVAL)
10875
10876     elif remote_node is not None or iallocator is not None:
10877       # Not replacing the secondary
10878       raise errors.OpPrereqError("The iallocator and new node options can"
10879                                  " only be used when changing the"
10880                                  " secondary node", errors.ECODE_INVAL)
10881
10882   @staticmethod
10883   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10884     """Compute a new secondary node using an IAllocator.
10885
10886     """
10887     ial = IAllocator(lu.cfg, lu.rpc,
10888                      mode=constants.IALLOCATOR_MODE_RELOC,
10889                      name=instance_name,
10890                      relocate_from=list(relocate_from))
10891
10892     ial.Run(iallocator_name)
10893
10894     if not ial.success:
10895       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10896                                  " %s" % (iallocator_name, ial.info),
10897                                  errors.ECODE_NORES)
10898
10899     if len(ial.result) != ial.required_nodes:
10900       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10901                                  " of nodes (%s), required %s" %
10902                                  (iallocator_name,
10903                                   len(ial.result), ial.required_nodes),
10904                                  errors.ECODE_FAULT)
10905
10906     remote_node_name = ial.result[0]
10907
10908     lu.LogInfo("Selected new secondary for instance '%s': %s",
10909                instance_name, remote_node_name)
10910
10911     return remote_node_name
10912
10913   def _FindFaultyDisks(self, node_name):
10914     """Wrapper for L{_FindFaultyInstanceDisks}.
10915
10916     """
10917     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10918                                     node_name, True)
10919
10920   def _CheckDisksActivated(self, instance):
10921     """Checks if the instance disks are activated.
10922
10923     @param instance: The instance to check disks
10924     @return: True if they are activated, False otherwise
10925
10926     """
10927     nodes = instance.all_nodes
10928
10929     for idx, dev in enumerate(instance.disks):
10930       for node in nodes:
10931         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10932         self.cfg.SetDiskID(dev, node)
10933
10934         result = _BlockdevFind(self, node, dev, instance)
10935
10936         if result.offline:
10937           continue
10938         elif result.fail_msg or not result.payload:
10939           return False
10940
10941     return True
10942
10943   def CheckPrereq(self):
10944     """Check prerequisites.
10945
10946     This checks that the instance is in the cluster.
10947
10948     """
10949     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10950     assert instance is not None, \
10951       "Cannot retrieve locked instance %s" % self.instance_name
10952
10953     if instance.disk_template != constants.DT_DRBD8:
10954       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10955                                  " instances", errors.ECODE_INVAL)
10956
10957     if len(instance.secondary_nodes) != 1:
10958       raise errors.OpPrereqError("The instance has a strange layout,"
10959                                  " expected one secondary but found %d" %
10960                                  len(instance.secondary_nodes),
10961                                  errors.ECODE_FAULT)
10962
10963     if not self.delay_iallocator:
10964       self._CheckPrereq2()
10965
10966   def _CheckPrereq2(self):
10967     """Check prerequisites, second part.
10968
10969     This function should always be part of CheckPrereq. It was separated and is
10970     now called from Exec because during node evacuation iallocator was only
10971     called with an unmodified cluster model, not taking planned changes into
10972     account.
10973
10974     """
10975     instance = self.instance
10976     secondary_node = instance.secondary_nodes[0]
10977
10978     if self.iallocator_name is None:
10979       remote_node = self.remote_node
10980     else:
10981       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10982                                        instance.name, instance.secondary_nodes)
10983
10984     if remote_node is None:
10985       self.remote_node_info = None
10986     else:
10987       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10988              "Remote node '%s' is not locked" % remote_node
10989
10990       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10991       assert self.remote_node_info is not None, \
10992         "Cannot retrieve locked node %s" % remote_node
10993
10994     if remote_node == self.instance.primary_node:
10995       raise errors.OpPrereqError("The specified node is the primary node of"
10996                                  " the instance", errors.ECODE_INVAL)
10997
10998     if remote_node == secondary_node:
10999       raise errors.OpPrereqError("The specified node is already the"
11000                                  " secondary node of the instance",
11001                                  errors.ECODE_INVAL)
11002
11003     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11004                                     constants.REPLACE_DISK_CHG):
11005       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11006                                  errors.ECODE_INVAL)
11007
11008     if self.mode == constants.REPLACE_DISK_AUTO:
11009       if not self._CheckDisksActivated(instance):
11010         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11011                                    " first" % self.instance_name,
11012                                    errors.ECODE_STATE)
11013       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11014       faulty_secondary = self._FindFaultyDisks(secondary_node)
11015
11016       if faulty_primary and faulty_secondary:
11017         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11018                                    " one node and can not be repaired"
11019                                    " automatically" % self.instance_name,
11020                                    errors.ECODE_STATE)
11021
11022       if faulty_primary:
11023         self.disks = faulty_primary
11024         self.target_node = instance.primary_node
11025         self.other_node = secondary_node
11026         check_nodes = [self.target_node, self.other_node]
11027       elif faulty_secondary:
11028         self.disks = faulty_secondary
11029         self.target_node = secondary_node
11030         self.other_node = instance.primary_node
11031         check_nodes = [self.target_node, self.other_node]
11032       else:
11033         self.disks = []
11034         check_nodes = []
11035
11036     else:
11037       # Non-automatic modes
11038       if self.mode == constants.REPLACE_DISK_PRI:
11039         self.target_node = instance.primary_node
11040         self.other_node = secondary_node
11041         check_nodes = [self.target_node, self.other_node]
11042
11043       elif self.mode == constants.REPLACE_DISK_SEC:
11044         self.target_node = secondary_node
11045         self.other_node = instance.primary_node
11046         check_nodes = [self.target_node, self.other_node]
11047
11048       elif self.mode == constants.REPLACE_DISK_CHG:
11049         self.new_node = remote_node
11050         self.other_node = instance.primary_node
11051         self.target_node = secondary_node
11052         check_nodes = [self.new_node, self.other_node]
11053
11054         _CheckNodeNotDrained(self.lu, remote_node)
11055         _CheckNodeVmCapable(self.lu, remote_node)
11056
11057         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11058         assert old_node_info is not None
11059         if old_node_info.offline and not self.early_release:
11060           # doesn't make sense to delay the release
11061           self.early_release = True
11062           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11063                           " early-release mode", secondary_node)
11064
11065       else:
11066         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11067                                      self.mode)
11068
11069       # If not specified all disks should be replaced
11070       if not self.disks:
11071         self.disks = range(len(self.instance.disks))
11072
11073     # TODO: This is ugly, but right now we can't distinguish between internal
11074     # submitted opcode and external one. We should fix that.
11075     if self.remote_node_info:
11076       # We change the node, lets verify it still meets instance policy
11077       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11078       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
11079                                        new_group_info)
11080       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11081                               ignore=self.ignore_ipolicy)
11082
11083     for node in check_nodes:
11084       _CheckNodeOnline(self.lu, node)
11085
11086     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11087                                                           self.other_node,
11088                                                           self.target_node]
11089                               if node_name is not None)
11090
11091     # Release unneeded node and node resource locks
11092     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11093     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11094
11095     # Release any owned node group
11096     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11097       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11098
11099     # Check whether disks are valid
11100     for disk_idx in self.disks:
11101       instance.FindDisk(disk_idx)
11102
11103     # Get secondary node IP addresses
11104     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11105                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11106
11107   def Exec(self, feedback_fn):
11108     """Execute disk replacement.
11109
11110     This dispatches the disk replacement to the appropriate handler.
11111
11112     """
11113     if self.delay_iallocator:
11114       self._CheckPrereq2()
11115
11116     if __debug__:
11117       # Verify owned locks before starting operation
11118       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11119       assert set(owned_nodes) == set(self.node_secondary_ip), \
11120           ("Incorrect node locks, owning %s, expected %s" %
11121            (owned_nodes, self.node_secondary_ip.keys()))
11122       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11123               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11124
11125       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11126       assert list(owned_instances) == [self.instance_name], \
11127           "Instance '%s' not locked" % self.instance_name
11128
11129       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11130           "Should not own any node group lock at this point"
11131
11132     if not self.disks:
11133       feedback_fn("No disks need replacement")
11134       return
11135
11136     feedback_fn("Replacing disk(s) %s for %s" %
11137                 (utils.CommaJoin(self.disks), self.instance.name))
11138
11139     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11140
11141     # Activate the instance disks if we're replacing them on a down instance
11142     if activate_disks:
11143       _StartInstanceDisks(self.lu, self.instance, True)
11144
11145     try:
11146       # Should we replace the secondary node?
11147       if self.new_node is not None:
11148         fn = self._ExecDrbd8Secondary
11149       else:
11150         fn = self._ExecDrbd8DiskOnly
11151
11152       result = fn(feedback_fn)
11153     finally:
11154       # Deactivate the instance disks if we're replacing them on a
11155       # down instance
11156       if activate_disks:
11157         _SafeShutdownInstanceDisks(self.lu, self.instance)
11158
11159     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11160
11161     if __debug__:
11162       # Verify owned locks
11163       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11164       nodes = frozenset(self.node_secondary_ip)
11165       assert ((self.early_release and not owned_nodes) or
11166               (not self.early_release and not (set(owned_nodes) - nodes))), \
11167         ("Not owning the correct locks, early_release=%s, owned=%r,"
11168          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11169
11170     return result
11171
11172   def _CheckVolumeGroup(self, nodes):
11173     self.lu.LogInfo("Checking volume groups")
11174
11175     vgname = self.cfg.GetVGName()
11176
11177     # Make sure volume group exists on all involved nodes
11178     results = self.rpc.call_vg_list(nodes)
11179     if not results:
11180       raise errors.OpExecError("Can't list volume groups on the nodes")
11181
11182     for node in nodes:
11183       res = results[node]
11184       res.Raise("Error checking node %s" % node)
11185       if vgname not in res.payload:
11186         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11187                                  (vgname, node))
11188
11189   def _CheckDisksExistence(self, nodes):
11190     # Check disk existence
11191     for idx, dev in enumerate(self.instance.disks):
11192       if idx not in self.disks:
11193         continue
11194
11195       for node in nodes:
11196         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11197         self.cfg.SetDiskID(dev, node)
11198
11199         result = _BlockdevFind(self, node, dev, self.instance)
11200
11201         msg = result.fail_msg
11202         if msg or not result.payload:
11203           if not msg:
11204             msg = "disk not found"
11205           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11206                                    (idx, node, msg))
11207
11208   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11209     for idx, dev in enumerate(self.instance.disks):
11210       if idx not in self.disks:
11211         continue
11212
11213       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11214                       (idx, node_name))
11215
11216       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11217                                    on_primary, ldisk=ldisk):
11218         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11219                                  " replace disks for instance %s" %
11220                                  (node_name, self.instance.name))
11221
11222   def _CreateNewStorage(self, node_name):
11223     """Create new storage on the primary or secondary node.
11224
11225     This is only used for same-node replaces, not for changing the
11226     secondary node, hence we don't want to modify the existing disk.
11227
11228     """
11229     iv_names = {}
11230
11231     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11232     for idx, dev in enumerate(disks):
11233       if idx not in self.disks:
11234         continue
11235
11236       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11237
11238       self.cfg.SetDiskID(dev, node_name)
11239
11240       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11241       names = _GenerateUniqueNames(self.lu, lv_names)
11242
11243       (data_disk, meta_disk) = dev.children
11244       vg_data = data_disk.logical_id[0]
11245       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11246                              logical_id=(vg_data, names[0]),
11247                              params=data_disk.params)
11248       vg_meta = meta_disk.logical_id[0]
11249       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
11250                              logical_id=(vg_meta, names[1]),
11251                              params=meta_disk.params)
11252
11253       new_lvs = [lv_data, lv_meta]
11254       old_lvs = [child.Copy() for child in dev.children]
11255       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11256
11257       # we pass force_create=True to force the LVM creation
11258       for new_lv in new_lvs:
11259         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11260                              _GetInstanceInfoText(self.instance), False)
11261
11262     return iv_names
11263
11264   def _CheckDevices(self, node_name, iv_names):
11265     for name, (dev, _, _) in iv_names.iteritems():
11266       self.cfg.SetDiskID(dev, node_name)
11267
11268       result = _BlockdevFind(self, node_name, dev, self.instance)
11269
11270       msg = result.fail_msg
11271       if msg or not result.payload:
11272         if not msg:
11273           msg = "disk not found"
11274         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11275                                  (name, msg))
11276
11277       if result.payload.is_degraded:
11278         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11279
11280   def _RemoveOldStorage(self, node_name, iv_names):
11281     for name, (_, old_lvs, _) in iv_names.iteritems():
11282       self.lu.LogInfo("Remove logical volumes for %s" % name)
11283
11284       for lv in old_lvs:
11285         self.cfg.SetDiskID(lv, node_name)
11286
11287         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11288         if msg:
11289           self.lu.LogWarning("Can't remove old LV: %s" % msg,
11290                              hint="remove unused LVs manually")
11291
11292   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11293     """Replace a disk on the primary or secondary for DRBD 8.
11294
11295     The algorithm for replace is quite complicated:
11296
11297       1. for each disk to be replaced:
11298
11299         1. create new LVs on the target node with unique names
11300         1. detach old LVs from the drbd device
11301         1. rename old LVs to name_replaced.<time_t>
11302         1. rename new LVs to old LVs
11303         1. attach the new LVs (with the old names now) to the drbd device
11304
11305       1. wait for sync across all devices
11306
11307       1. for each modified disk:
11308
11309         1. remove old LVs (which have the name name_replaces.<time_t>)
11310
11311     Failures are not very well handled.
11312
11313     """
11314     steps_total = 6
11315
11316     # Step: check device activation
11317     self.lu.LogStep(1, steps_total, "Check device existence")
11318     self._CheckDisksExistence([self.other_node, self.target_node])
11319     self._CheckVolumeGroup([self.target_node, self.other_node])
11320
11321     # Step: check other node consistency
11322     self.lu.LogStep(2, steps_total, "Check peer consistency")
11323     self._CheckDisksConsistency(self.other_node,
11324                                 self.other_node == self.instance.primary_node,
11325                                 False)
11326
11327     # Step: create new storage
11328     self.lu.LogStep(3, steps_total, "Allocate new storage")
11329     iv_names = self._CreateNewStorage(self.target_node)
11330
11331     # Step: for each lv, detach+rename*2+attach
11332     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11333     for dev, old_lvs, new_lvs in iv_names.itervalues():
11334       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11335
11336       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11337                                                      old_lvs)
11338       result.Raise("Can't detach drbd from local storage on node"
11339                    " %s for device %s" % (self.target_node, dev.iv_name))
11340       #dev.children = []
11341       #cfg.Update(instance)
11342
11343       # ok, we created the new LVs, so now we know we have the needed
11344       # storage; as such, we proceed on the target node to rename
11345       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11346       # using the assumption that logical_id == physical_id (which in
11347       # turn is the unique_id on that node)
11348
11349       # FIXME(iustin): use a better name for the replaced LVs
11350       temp_suffix = int(time.time())
11351       ren_fn = lambda d, suff: (d.physical_id[0],
11352                                 d.physical_id[1] + "_replaced-%s" % suff)
11353
11354       # Build the rename list based on what LVs exist on the node
11355       rename_old_to_new = []
11356       for to_ren in old_lvs:
11357         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11358         if not result.fail_msg and result.payload:
11359           # device exists
11360           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11361
11362       self.lu.LogInfo("Renaming the old LVs on the target node")
11363       result = self.rpc.call_blockdev_rename(self.target_node,
11364                                              rename_old_to_new)
11365       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11366
11367       # Now we rename the new LVs to the old LVs
11368       self.lu.LogInfo("Renaming the new LVs on the target node")
11369       rename_new_to_old = [(new, old.physical_id)
11370                            for old, new in zip(old_lvs, new_lvs)]
11371       result = self.rpc.call_blockdev_rename(self.target_node,
11372                                              rename_new_to_old)
11373       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11374
11375       # Intermediate steps of in memory modifications
11376       for old, new in zip(old_lvs, new_lvs):
11377         new.logical_id = old.logical_id
11378         self.cfg.SetDiskID(new, self.target_node)
11379
11380       # We need to modify old_lvs so that removal later removes the
11381       # right LVs, not the newly added ones; note that old_lvs is a
11382       # copy here
11383       for disk in old_lvs:
11384         disk.logical_id = ren_fn(disk, temp_suffix)
11385         self.cfg.SetDiskID(disk, self.target_node)
11386
11387       # Now that the new lvs have the old name, we can add them to the device
11388       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11389       result = self.rpc.call_blockdev_addchildren(self.target_node,
11390                                                   (dev, self.instance), new_lvs)
11391       msg = result.fail_msg
11392       if msg:
11393         for new_lv in new_lvs:
11394           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11395                                                new_lv).fail_msg
11396           if msg2:
11397             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11398                                hint=("cleanup manually the unused logical"
11399                                      "volumes"))
11400         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11401
11402     cstep = itertools.count(5)
11403
11404     if self.early_release:
11405       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11406       self._RemoveOldStorage(self.target_node, iv_names)
11407       # TODO: Check if releasing locks early still makes sense
11408       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11409     else:
11410       # Release all resource locks except those used by the instance
11411       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11412                     keep=self.node_secondary_ip.keys())
11413
11414     # Release all node locks while waiting for sync
11415     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11416
11417     # TODO: Can the instance lock be downgraded here? Take the optional disk
11418     # shutdown in the caller into consideration.
11419
11420     # Wait for sync
11421     # This can fail as the old devices are degraded and _WaitForSync
11422     # does a combined result over all disks, so we don't check its return value
11423     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11424     _WaitForSync(self.lu, self.instance)
11425
11426     # Check all devices manually
11427     self._CheckDevices(self.instance.primary_node, iv_names)
11428
11429     # Step: remove old storage
11430     if not self.early_release:
11431       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11432       self._RemoveOldStorage(self.target_node, iv_names)
11433
11434   def _ExecDrbd8Secondary(self, feedback_fn):
11435     """Replace the secondary node for DRBD 8.
11436
11437     The algorithm for replace is quite complicated:
11438       - for all disks of the instance:
11439         - create new LVs on the new node with same names
11440         - shutdown the drbd device on the old secondary
11441         - disconnect the drbd network on the primary
11442         - create the drbd device on the new secondary
11443         - network attach the drbd on the primary, using an artifice:
11444           the drbd code for Attach() will connect to the network if it
11445           finds a device which is connected to the good local disks but
11446           not network enabled
11447       - wait for sync across all devices
11448       - remove all disks from the old secondary
11449
11450     Failures are not very well handled.
11451
11452     """
11453     steps_total = 6
11454
11455     pnode = self.instance.primary_node
11456
11457     # Step: check device activation
11458     self.lu.LogStep(1, steps_total, "Check device existence")
11459     self._CheckDisksExistence([self.instance.primary_node])
11460     self._CheckVolumeGroup([self.instance.primary_node])
11461
11462     # Step: check other node consistency
11463     self.lu.LogStep(2, steps_total, "Check peer consistency")
11464     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11465
11466     # Step: create new storage
11467     self.lu.LogStep(3, steps_total, "Allocate new storage")
11468     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11469     for idx, dev in enumerate(disks):
11470       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11471                       (self.new_node, idx))
11472       # we pass force_create=True to force LVM creation
11473       for new_lv in dev.children:
11474         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11475                              True, _GetInstanceInfoText(self.instance), False)
11476
11477     # Step 4: dbrd minors and drbd setups changes
11478     # after this, we must manually remove the drbd minors on both the
11479     # error and the success paths
11480     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11481     minors = self.cfg.AllocateDRBDMinor([self.new_node
11482                                          for dev in self.instance.disks],
11483                                         self.instance.name)
11484     logging.debug("Allocated minors %r", minors)
11485
11486     iv_names = {}
11487     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11488       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11489                       (self.new_node, idx))
11490       # create new devices on new_node; note that we create two IDs:
11491       # one without port, so the drbd will be activated without
11492       # networking information on the new node at this stage, and one
11493       # with network, for the latter activation in step 4
11494       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11495       if self.instance.primary_node == o_node1:
11496         p_minor = o_minor1
11497       else:
11498         assert self.instance.primary_node == o_node2, "Three-node instance?"
11499         p_minor = o_minor2
11500
11501       new_alone_id = (self.instance.primary_node, self.new_node, None,
11502                       p_minor, new_minor, o_secret)
11503       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11504                     p_minor, new_minor, o_secret)
11505
11506       iv_names[idx] = (dev, dev.children, new_net_id)
11507       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11508                     new_net_id)
11509       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11510                               logical_id=new_alone_id,
11511                               children=dev.children,
11512                               size=dev.size,
11513                               params={})
11514       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11515                                              self.cfg)
11516       try:
11517         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11518                               anno_new_drbd,
11519                               _GetInstanceInfoText(self.instance), False)
11520       except errors.GenericError:
11521         self.cfg.ReleaseDRBDMinors(self.instance.name)
11522         raise
11523
11524     # We have new devices, shutdown the drbd on the old secondary
11525     for idx, dev in enumerate(self.instance.disks):
11526       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11527       self.cfg.SetDiskID(dev, self.target_node)
11528       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11529                                             (dev, self.instance)).fail_msg
11530       if msg:
11531         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11532                            "node: %s" % (idx, msg),
11533                            hint=("Please cleanup this device manually as"
11534                                  " soon as possible"))
11535
11536     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11537     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11538                                                self.instance.disks)[pnode]
11539
11540     msg = result.fail_msg
11541     if msg:
11542       # detaches didn't succeed (unlikely)
11543       self.cfg.ReleaseDRBDMinors(self.instance.name)
11544       raise errors.OpExecError("Can't detach the disks from the network on"
11545                                " old node: %s" % (msg,))
11546
11547     # if we managed to detach at least one, we update all the disks of
11548     # the instance to point to the new secondary
11549     self.lu.LogInfo("Updating instance configuration")
11550     for dev, _, new_logical_id in iv_names.itervalues():
11551       dev.logical_id = new_logical_id
11552       self.cfg.SetDiskID(dev, self.instance.primary_node)
11553
11554     self.cfg.Update(self.instance, feedback_fn)
11555
11556     # Release all node locks (the configuration has been updated)
11557     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11558
11559     # and now perform the drbd attach
11560     self.lu.LogInfo("Attaching primary drbds to new secondary"
11561                     " (standalone => connected)")
11562     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11563                                             self.new_node],
11564                                            self.node_secondary_ip,
11565                                            (self.instance.disks, self.instance),
11566                                            self.instance.name,
11567                                            False)
11568     for to_node, to_result in result.items():
11569       msg = to_result.fail_msg
11570       if msg:
11571         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11572                            to_node, msg,
11573                            hint=("please do a gnt-instance info to see the"
11574                                  " status of disks"))
11575
11576     cstep = itertools.count(5)
11577
11578     if self.early_release:
11579       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11580       self._RemoveOldStorage(self.target_node, iv_names)
11581       # TODO: Check if releasing locks early still makes sense
11582       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11583     else:
11584       # Release all resource locks except those used by the instance
11585       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11586                     keep=self.node_secondary_ip.keys())
11587
11588     # TODO: Can the instance lock be downgraded here? Take the optional disk
11589     # shutdown in the caller into consideration.
11590
11591     # Wait for sync
11592     # This can fail as the old devices are degraded and _WaitForSync
11593     # does a combined result over all disks, so we don't check its return value
11594     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11595     _WaitForSync(self.lu, self.instance)
11596
11597     # Check all devices manually
11598     self._CheckDevices(self.instance.primary_node, iv_names)
11599
11600     # Step: remove old storage
11601     if not self.early_release:
11602       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11603       self._RemoveOldStorage(self.target_node, iv_names)
11604
11605
11606 class LURepairNodeStorage(NoHooksLU):
11607   """Repairs the volume group on a node.
11608
11609   """
11610   REQ_BGL = False
11611
11612   def CheckArguments(self):
11613     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11614
11615     storage_type = self.op.storage_type
11616
11617     if (constants.SO_FIX_CONSISTENCY not in
11618         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11619       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11620                                  " repaired" % storage_type,
11621                                  errors.ECODE_INVAL)
11622
11623   def ExpandNames(self):
11624     self.needed_locks = {
11625       locking.LEVEL_NODE: [self.op.node_name],
11626       }
11627
11628   def _CheckFaultyDisks(self, instance, node_name):
11629     """Ensure faulty disks abort the opcode or at least warn."""
11630     try:
11631       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11632                                   node_name, True):
11633         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11634                                    " node '%s'" % (instance.name, node_name),
11635                                    errors.ECODE_STATE)
11636     except errors.OpPrereqError, err:
11637       if self.op.ignore_consistency:
11638         self.proc.LogWarning(str(err.args[0]))
11639       else:
11640         raise
11641
11642   def CheckPrereq(self):
11643     """Check prerequisites.
11644
11645     """
11646     # Check whether any instance on this node has faulty disks
11647     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11648       if inst.admin_state != constants.ADMINST_UP:
11649         continue
11650       check_nodes = set(inst.all_nodes)
11651       check_nodes.discard(self.op.node_name)
11652       for inst_node_name in check_nodes:
11653         self._CheckFaultyDisks(inst, inst_node_name)
11654
11655   def Exec(self, feedback_fn):
11656     feedback_fn("Repairing storage unit '%s' on %s ..." %
11657                 (self.op.name, self.op.node_name))
11658
11659     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11660     result = self.rpc.call_storage_execute(self.op.node_name,
11661                                            self.op.storage_type, st_args,
11662                                            self.op.name,
11663                                            constants.SO_FIX_CONSISTENCY)
11664     result.Raise("Failed to repair storage unit '%s' on %s" %
11665                  (self.op.name, self.op.node_name))
11666
11667
11668 class LUNodeEvacuate(NoHooksLU):
11669   """Evacuates instances off a list of nodes.
11670
11671   """
11672   REQ_BGL = False
11673
11674   _MODE2IALLOCATOR = {
11675     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11676     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11677     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11678     }
11679   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11680   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11681           constants.IALLOCATOR_NEVAC_MODES)
11682
11683   def CheckArguments(self):
11684     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11685
11686   def ExpandNames(self):
11687     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11688
11689     if self.op.remote_node is not None:
11690       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11691       assert self.op.remote_node
11692
11693       if self.op.remote_node == self.op.node_name:
11694         raise errors.OpPrereqError("Can not use evacuated node as a new"
11695                                    " secondary node", errors.ECODE_INVAL)
11696
11697       if self.op.mode != constants.NODE_EVAC_SEC:
11698         raise errors.OpPrereqError("Without the use of an iallocator only"
11699                                    " secondary instances can be evacuated",
11700                                    errors.ECODE_INVAL)
11701
11702     # Declare locks
11703     self.share_locks = _ShareAll()
11704     self.needed_locks = {
11705       locking.LEVEL_INSTANCE: [],
11706       locking.LEVEL_NODEGROUP: [],
11707       locking.LEVEL_NODE: [],
11708       }
11709
11710     # Determine nodes (via group) optimistically, needs verification once locks
11711     # have been acquired
11712     self.lock_nodes = self._DetermineNodes()
11713
11714   def _DetermineNodes(self):
11715     """Gets the list of nodes to operate on.
11716
11717     """
11718     if self.op.remote_node is None:
11719       # Iallocator will choose any node(s) in the same group
11720       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11721     else:
11722       group_nodes = frozenset([self.op.remote_node])
11723
11724     # Determine nodes to be locked
11725     return set([self.op.node_name]) | group_nodes
11726
11727   def _DetermineInstances(self):
11728     """Builds list of instances to operate on.
11729
11730     """
11731     assert self.op.mode in constants.NODE_EVAC_MODES
11732
11733     if self.op.mode == constants.NODE_EVAC_PRI:
11734       # Primary instances only
11735       inst_fn = _GetNodePrimaryInstances
11736       assert self.op.remote_node is None, \
11737         "Evacuating primary instances requires iallocator"
11738     elif self.op.mode == constants.NODE_EVAC_SEC:
11739       # Secondary instances only
11740       inst_fn = _GetNodeSecondaryInstances
11741     else:
11742       # All instances
11743       assert self.op.mode == constants.NODE_EVAC_ALL
11744       inst_fn = _GetNodeInstances
11745       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11746       # per instance
11747       raise errors.OpPrereqError("Due to an issue with the iallocator"
11748                                  " interface it is not possible to evacuate"
11749                                  " all instances at once; specify explicitly"
11750                                  " whether to evacuate primary or secondary"
11751                                  " instances",
11752                                  errors.ECODE_INVAL)
11753
11754     return inst_fn(self.cfg, self.op.node_name)
11755
11756   def DeclareLocks(self, level):
11757     if level == locking.LEVEL_INSTANCE:
11758       # Lock instances optimistically, needs verification once node and group
11759       # locks have been acquired
11760       self.needed_locks[locking.LEVEL_INSTANCE] = \
11761         set(i.name for i in self._DetermineInstances())
11762
11763     elif level == locking.LEVEL_NODEGROUP:
11764       # Lock node groups for all potential target nodes optimistically, needs
11765       # verification once nodes have been acquired
11766       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11767         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11768
11769     elif level == locking.LEVEL_NODE:
11770       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11771
11772   def CheckPrereq(self):
11773     # Verify locks
11774     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11775     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11776     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11777
11778     need_nodes = self._DetermineNodes()
11779
11780     if not owned_nodes.issuperset(need_nodes):
11781       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11782                                  " locks were acquired, current nodes are"
11783                                  " are '%s', used to be '%s'; retry the"
11784                                  " operation" %
11785                                  (self.op.node_name,
11786                                   utils.CommaJoin(need_nodes),
11787                                   utils.CommaJoin(owned_nodes)),
11788                                  errors.ECODE_STATE)
11789
11790     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11791     if owned_groups != wanted_groups:
11792       raise errors.OpExecError("Node groups changed since locks were acquired,"
11793                                " current groups are '%s', used to be '%s';"
11794                                " retry the operation" %
11795                                (utils.CommaJoin(wanted_groups),
11796                                 utils.CommaJoin(owned_groups)))
11797
11798     # Determine affected instances
11799     self.instances = self._DetermineInstances()
11800     self.instance_names = [i.name for i in self.instances]
11801
11802     if set(self.instance_names) != owned_instances:
11803       raise errors.OpExecError("Instances on node '%s' changed since locks"
11804                                " were acquired, current instances are '%s',"
11805                                " used to be '%s'; retry the operation" %
11806                                (self.op.node_name,
11807                                 utils.CommaJoin(self.instance_names),
11808                                 utils.CommaJoin(owned_instances)))
11809
11810     if self.instance_names:
11811       self.LogInfo("Evacuating instances from node '%s': %s",
11812                    self.op.node_name,
11813                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11814     else:
11815       self.LogInfo("No instances to evacuate from node '%s'",
11816                    self.op.node_name)
11817
11818     if self.op.remote_node is not None:
11819       for i in self.instances:
11820         if i.primary_node == self.op.remote_node:
11821           raise errors.OpPrereqError("Node %s is the primary node of"
11822                                      " instance %s, cannot use it as"
11823                                      " secondary" %
11824                                      (self.op.remote_node, i.name),
11825                                      errors.ECODE_INVAL)
11826
11827   def Exec(self, feedback_fn):
11828     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11829
11830     if not self.instance_names:
11831       # No instances to evacuate
11832       jobs = []
11833
11834     elif self.op.iallocator is not None:
11835       # TODO: Implement relocation to other group
11836       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11837                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11838                        instances=list(self.instance_names))
11839
11840       ial.Run(self.op.iallocator)
11841
11842       if not ial.success:
11843         raise errors.OpPrereqError("Can't compute node evacuation using"
11844                                    " iallocator '%s': %s" %
11845                                    (self.op.iallocator, ial.info),
11846                                    errors.ECODE_NORES)
11847
11848       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11849
11850     elif self.op.remote_node is not None:
11851       assert self.op.mode == constants.NODE_EVAC_SEC
11852       jobs = [
11853         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11854                                         remote_node=self.op.remote_node,
11855                                         disks=[],
11856                                         mode=constants.REPLACE_DISK_CHG,
11857                                         early_release=self.op.early_release)]
11858         for instance_name in self.instance_names
11859         ]
11860
11861     else:
11862       raise errors.ProgrammerError("No iallocator or remote node")
11863
11864     return ResultWithJobs(jobs)
11865
11866
11867 def _SetOpEarlyRelease(early_release, op):
11868   """Sets C{early_release} flag on opcodes if available.
11869
11870   """
11871   try:
11872     op.early_release = early_release
11873   except AttributeError:
11874     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11875
11876   return op
11877
11878
11879 def _NodeEvacDest(use_nodes, group, nodes):
11880   """Returns group or nodes depending on caller's choice.
11881
11882   """
11883   if use_nodes:
11884     return utils.CommaJoin(nodes)
11885   else:
11886     return group
11887
11888
11889 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11890   """Unpacks the result of change-group and node-evacuate iallocator requests.
11891
11892   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11893   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11894
11895   @type lu: L{LogicalUnit}
11896   @param lu: Logical unit instance
11897   @type alloc_result: tuple/list
11898   @param alloc_result: Result from iallocator
11899   @type early_release: bool
11900   @param early_release: Whether to release locks early if possible
11901   @type use_nodes: bool
11902   @param use_nodes: Whether to display node names instead of groups
11903
11904   """
11905   (moved, failed, jobs) = alloc_result
11906
11907   if failed:
11908     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11909                                  for (name, reason) in failed)
11910     lu.LogWarning("Unable to evacuate instances %s", failreason)
11911     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11912
11913   if moved:
11914     lu.LogInfo("Instances to be moved: %s",
11915                utils.CommaJoin("%s (to %s)" %
11916                                (name, _NodeEvacDest(use_nodes, group, nodes))
11917                                for (name, group, nodes) in moved))
11918
11919   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11920               map(opcodes.OpCode.LoadOpCode, ops))
11921           for ops in jobs]
11922
11923
11924 class LUInstanceGrowDisk(LogicalUnit):
11925   """Grow a disk of an instance.
11926
11927   """
11928   HPATH = "disk-grow"
11929   HTYPE = constants.HTYPE_INSTANCE
11930   REQ_BGL = False
11931
11932   def ExpandNames(self):
11933     self._ExpandAndLockInstance()
11934     self.needed_locks[locking.LEVEL_NODE] = []
11935     self.needed_locks[locking.LEVEL_NODE_RES] = []
11936     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11937     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11938
11939   def DeclareLocks(self, level):
11940     if level == locking.LEVEL_NODE:
11941       self._LockInstancesNodes()
11942     elif level == locking.LEVEL_NODE_RES:
11943       # Copy node locks
11944       self.needed_locks[locking.LEVEL_NODE_RES] = \
11945         self.needed_locks[locking.LEVEL_NODE][:]
11946
11947   def BuildHooksEnv(self):
11948     """Build hooks env.
11949
11950     This runs on the master, the primary and all the secondaries.
11951
11952     """
11953     env = {
11954       "DISK": self.op.disk,
11955       "AMOUNT": self.op.amount,
11956       "ABSOLUTE": self.op.absolute,
11957       }
11958     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11959     return env
11960
11961   def BuildHooksNodes(self):
11962     """Build hooks nodes.
11963
11964     """
11965     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11966     return (nl, nl)
11967
11968   def CheckPrereq(self):
11969     """Check prerequisites.
11970
11971     This checks that the instance is in the cluster.
11972
11973     """
11974     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11975     assert instance is not None, \
11976       "Cannot retrieve locked instance %s" % self.op.instance_name
11977     nodenames = list(instance.all_nodes)
11978     for node in nodenames:
11979       _CheckNodeOnline(self, node)
11980
11981     self.instance = instance
11982
11983     if instance.disk_template not in constants.DTS_GROWABLE:
11984       raise errors.OpPrereqError("Instance's disk layout does not support"
11985                                  " growing", errors.ECODE_INVAL)
11986
11987     self.disk = instance.FindDisk(self.op.disk)
11988
11989     if self.op.absolute:
11990       self.target = self.op.amount
11991       self.delta = self.target - self.disk.size
11992       if self.delta < 0:
11993         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11994                                    "current disk size (%s)" %
11995                                    (utils.FormatUnit(self.target, "h"),
11996                                     utils.FormatUnit(self.disk.size, "h")),
11997                                    errors.ECODE_STATE)
11998     else:
11999       self.delta = self.op.amount
12000       self.target = self.disk.size + self.delta
12001       if self.delta < 0:
12002         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12003                                    utils.FormatUnit(self.delta, "h"),
12004                                    errors.ECODE_INVAL)
12005
12006     if instance.disk_template not in (constants.DT_FILE,
12007                                       constants.DT_SHARED_FILE,
12008                                       constants.DT_RBD,
12009                                       constants.DT_EXT):
12010       # TODO: check the free disk space for file, when that feature will be
12011       # supported
12012       _CheckNodesFreeDiskPerVG(self, nodenames,
12013                                self.disk.ComputeGrowth(self.delta))
12014
12015   def Exec(self, feedback_fn):
12016     """Execute disk grow.
12017
12018     """
12019     instance = self.instance
12020     disk = self.disk
12021
12022     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12023     assert (self.owned_locks(locking.LEVEL_NODE) ==
12024             self.owned_locks(locking.LEVEL_NODE_RES))
12025
12026     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12027     if not disks_ok:
12028       raise errors.OpExecError("Cannot activate block device to grow")
12029
12030     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12031                 (self.op.disk, instance.name,
12032                  utils.FormatUnit(self.delta, "h"),
12033                  utils.FormatUnit(self.target, "h")))
12034
12035     # First run all grow ops in dry-run mode
12036     for node in instance.all_nodes:
12037       self.cfg.SetDiskID(disk, node)
12038       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12039                                            True)
12040       result.Raise("Grow request failed to node %s" % node)
12041
12042     # We know that (as far as we can test) operations across different
12043     # nodes will succeed, time to run it for real
12044     for node in instance.all_nodes:
12045       self.cfg.SetDiskID(disk, node)
12046       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12047                                            False)
12048       result.Raise("Grow request failed to node %s" % node)
12049
12050       # TODO: Rewrite code to work properly
12051       # DRBD goes into sync mode for a short amount of time after executing the
12052       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
12053       # calling "resize" in sync mode fails. Sleeping for a short amount of
12054       # time is a work-around.
12055       time.sleep(5)
12056
12057     disk.RecordGrow(self.delta)
12058     self.cfg.Update(instance, feedback_fn)
12059
12060     # Changes have been recorded, release node lock
12061     _ReleaseLocks(self, locking.LEVEL_NODE)
12062
12063     # Downgrade lock while waiting for sync
12064     self.glm.downgrade(locking.LEVEL_INSTANCE)
12065
12066     if self.op.wait_for_sync:
12067       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12068       if disk_abort:
12069         self.proc.LogWarning("Disk sync-ing has not returned a good"
12070                              " status; please check the instance")
12071       if instance.admin_state != constants.ADMINST_UP:
12072         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12073     elif instance.admin_state != constants.ADMINST_UP:
12074       self.proc.LogWarning("Not shutting down the disk even if the instance is"
12075                            " not supposed to be running because no wait for"
12076                            " sync mode was requested")
12077
12078     assert self.owned_locks(locking.LEVEL_NODE_RES)
12079     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12080
12081
12082 class LUInstanceQueryData(NoHooksLU):
12083   """Query runtime instance data.
12084
12085   """
12086   REQ_BGL = False
12087
12088   def ExpandNames(self):
12089     self.needed_locks = {}
12090
12091     # Use locking if requested or when non-static information is wanted
12092     if not (self.op.static or self.op.use_locking):
12093       self.LogWarning("Non-static data requested, locks need to be acquired")
12094       self.op.use_locking = True
12095
12096     if self.op.instances or not self.op.use_locking:
12097       # Expand instance names right here
12098       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12099     else:
12100       # Will use acquired locks
12101       self.wanted_names = None
12102
12103     if self.op.use_locking:
12104       self.share_locks = _ShareAll()
12105
12106       if self.wanted_names is None:
12107         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12108       else:
12109         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12110
12111       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12112       self.needed_locks[locking.LEVEL_NODE] = []
12113       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12114
12115   def DeclareLocks(self, level):
12116     if self.op.use_locking:
12117       if level == locking.LEVEL_NODEGROUP:
12118         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12119
12120         # Lock all groups used by instances optimistically; this requires going
12121         # via the node before it's locked, requiring verification later on
12122         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12123           frozenset(group_uuid
12124                     for instance_name in owned_instances
12125                     for group_uuid in
12126                       self.cfg.GetInstanceNodeGroups(instance_name))
12127
12128       elif level == locking.LEVEL_NODE:
12129         self._LockInstancesNodes()
12130
12131   def CheckPrereq(self):
12132     """Check prerequisites.
12133
12134     This only checks the optional instance list against the existing names.
12135
12136     """
12137     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12138     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12139     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12140
12141     if self.wanted_names is None:
12142       assert self.op.use_locking, "Locking was not used"
12143       self.wanted_names = owned_instances
12144
12145     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12146
12147     if self.op.use_locking:
12148       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12149                                 None)
12150     else:
12151       assert not (owned_instances or owned_groups or owned_nodes)
12152
12153     self.wanted_instances = instances.values()
12154
12155   def _ComputeBlockdevStatus(self, node, instance, dev):
12156     """Returns the status of a block device
12157
12158     """
12159     if self.op.static or not node:
12160       return None
12161
12162     self.cfg.SetDiskID(dev, node)
12163
12164     result = self.rpc.call_blockdev_find(node, dev)
12165     if result.offline:
12166       return None
12167
12168     result.Raise("Can't compute disk status for %s" % instance.name)
12169
12170     status = result.payload
12171     if status is None:
12172       return None
12173
12174     return (status.dev_path, status.major, status.minor,
12175             status.sync_percent, status.estimated_time,
12176             status.is_degraded, status.ldisk_status)
12177
12178   def _ComputeDiskStatus(self, instance, snode, dev):
12179     """Compute block device status.
12180
12181     """
12182     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12183
12184     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12185
12186   def _ComputeDiskStatusInner(self, instance, snode, dev):
12187     """Compute block device status.
12188
12189     @attention: The device has to be annotated already.
12190
12191     """
12192     if dev.dev_type in constants.LDS_DRBD:
12193       # we change the snode then (otherwise we use the one passed in)
12194       if dev.logical_id[0] == instance.primary_node:
12195         snode = dev.logical_id[1]
12196       else:
12197         snode = dev.logical_id[0]
12198
12199     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12200                                               instance, dev)
12201     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12202
12203     if dev.children:
12204       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12205                                         instance, snode),
12206                          dev.children)
12207     else:
12208       dev_children = []
12209
12210     return {
12211       "iv_name": dev.iv_name,
12212       "dev_type": dev.dev_type,
12213       "logical_id": dev.logical_id,
12214       "physical_id": dev.physical_id,
12215       "pstatus": dev_pstatus,
12216       "sstatus": dev_sstatus,
12217       "children": dev_children,
12218       "mode": dev.mode,
12219       "size": dev.size,
12220       }
12221
12222   def Exec(self, feedback_fn):
12223     """Gather and return data"""
12224     result = {}
12225
12226     cluster = self.cfg.GetClusterInfo()
12227
12228     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12229     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12230
12231     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12232                                                  for node in nodes.values()))
12233
12234     group2name_fn = lambda uuid: groups[uuid].name
12235
12236     for instance in self.wanted_instances:
12237       pnode = nodes[instance.primary_node]
12238
12239       if self.op.static or pnode.offline:
12240         remote_state = None
12241         if pnode.offline:
12242           self.LogWarning("Primary node %s is marked offline, returning static"
12243                           " information only for instance %s" %
12244                           (pnode.name, instance.name))
12245       else:
12246         remote_info = self.rpc.call_instance_info(instance.primary_node,
12247                                                   instance.name,
12248                                                   instance.hypervisor)
12249         remote_info.Raise("Error checking node %s" % instance.primary_node)
12250         remote_info = remote_info.payload
12251         if remote_info and "state" in remote_info:
12252           remote_state = "up"
12253         else:
12254           if instance.admin_state == constants.ADMINST_UP:
12255             remote_state = "down"
12256           else:
12257             remote_state = instance.admin_state
12258
12259       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12260                   instance.disks)
12261
12262       snodes_group_uuids = [nodes[snode_name].group
12263                             for snode_name in instance.secondary_nodes]
12264
12265       result[instance.name] = {
12266         "name": instance.name,
12267         "config_state": instance.admin_state,
12268         "run_state": remote_state,
12269         "pnode": instance.primary_node,
12270         "pnode_group_uuid": pnode.group,
12271         "pnode_group_name": group2name_fn(pnode.group),
12272         "snodes": instance.secondary_nodes,
12273         "snodes_group_uuids": snodes_group_uuids,
12274         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12275         "os": instance.os,
12276         # this happens to be the same format used for hooks
12277         "nics": _NICListToTuple(self, instance.nics),
12278         "disk_template": instance.disk_template,
12279         "disks": disks,
12280         "hypervisor": instance.hypervisor,
12281         "network_port": instance.network_port,
12282         "hv_instance": instance.hvparams,
12283         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12284         "be_instance": instance.beparams,
12285         "be_actual": cluster.FillBE(instance),
12286         "os_instance": instance.osparams,
12287         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12288         "serial_no": instance.serial_no,
12289         "mtime": instance.mtime,
12290         "ctime": instance.ctime,
12291         "uuid": instance.uuid,
12292         }
12293
12294     return result
12295
12296
12297 def PrepareContainerMods(mods, private_fn):
12298   """Prepares a list of container modifications by adding a private data field.
12299
12300   @type mods: list of tuples; (operation, index, parameters)
12301   @param mods: List of modifications
12302   @type private_fn: callable or None
12303   @param private_fn: Callable for constructing a private data field for a
12304     modification
12305   @rtype: list
12306
12307   """
12308   if private_fn is None:
12309     fn = lambda: None
12310   else:
12311     fn = private_fn
12312
12313   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12314
12315
12316 #: Type description for changes as returned by L{ApplyContainerMods}'s
12317 #: callbacks
12318 _TApplyContModsCbChanges = \
12319   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12320     ht.TNonEmptyString,
12321     ht.TAny,
12322     ])))
12323
12324
12325 def ApplyContainerMods(kind, container, chgdesc, mods,
12326                        create_fn, modify_fn, remove_fn):
12327   """Applies descriptions in C{mods} to C{container}.
12328
12329   @type kind: string
12330   @param kind: One-word item description
12331   @type container: list
12332   @param container: Container to modify
12333   @type chgdesc: None or list
12334   @param chgdesc: List of applied changes
12335   @type mods: list
12336   @param mods: Modifications as returned by L{PrepareContainerMods}
12337   @type create_fn: callable
12338   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12339     receives absolute item index, parameters and private data object as added
12340     by L{PrepareContainerMods}, returns tuple containing new item and changes
12341     as list
12342   @type modify_fn: callable
12343   @param modify_fn: Callback for modifying an existing item
12344     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12345     and private data object as added by L{PrepareContainerMods}, returns
12346     changes as list
12347   @type remove_fn: callable
12348   @param remove_fn: Callback on removing item; receives absolute item index,
12349     item and private data object as added by L{PrepareContainerMods}
12350
12351   """
12352   for (op, idx, params, private) in mods:
12353     if idx == -1:
12354       # Append
12355       absidx = len(container) - 1
12356     elif idx < 0:
12357       raise IndexError("Not accepting negative indices other than -1")
12358     elif idx > len(container):
12359       raise IndexError("Got %s index %s, but there are only %s" %
12360                        (kind, idx, len(container)))
12361     else:
12362       absidx = idx
12363
12364     changes = None
12365
12366     if op == constants.DDM_ADD:
12367       # Calculate where item will be added
12368       if idx == -1:
12369         addidx = len(container)
12370       else:
12371         addidx = idx
12372
12373       if create_fn is None:
12374         item = params
12375       else:
12376         (item, changes) = create_fn(addidx, params, private)
12377
12378       if idx == -1:
12379         container.append(item)
12380       else:
12381         assert idx >= 0
12382         assert idx <= len(container)
12383         # list.insert does so before the specified index
12384         container.insert(idx, item)
12385     else:
12386       # Retrieve existing item
12387       try:
12388         item = container[absidx]
12389       except IndexError:
12390         raise IndexError("Invalid %s index %s" % (kind, idx))
12391
12392       if op == constants.DDM_REMOVE:
12393         assert not params
12394
12395         if remove_fn is not None:
12396           remove_fn(absidx, item, private)
12397
12398         #TODO: include a hotplugged msg in changes
12399         changes = [("%s/%s" % (kind, absidx), "remove")]
12400
12401         assert container[absidx] == item
12402         del container[absidx]
12403       elif op == constants.DDM_MODIFY:
12404         if modify_fn is not None:
12405           #TODO: include a hotplugged msg in changes
12406           changes = modify_fn(absidx, item, params, private)
12407
12408       else:
12409         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12410
12411     assert _TApplyContModsCbChanges(changes)
12412
12413     if not (chgdesc is None or changes is None):
12414       chgdesc.extend(changes)
12415
12416
12417 def _UpdateIvNames(base_index, disks):
12418   """Updates the C{iv_name} attribute of disks.
12419
12420   @type disks: list of L{objects.Disk}
12421
12422   """
12423   for (idx, disk) in enumerate(disks):
12424     disk.iv_name = "disk/%s" % (base_index + idx, )
12425
12426
12427 class _InstNicModPrivate:
12428   """Data structure for network interface modifications.
12429
12430   Used by L{LUInstanceSetParams}.
12431
12432   """
12433   def __init__(self):
12434     self.params = None
12435     self.filled = None
12436
12437
12438 class LUInstanceSetParams(LogicalUnit):
12439   """Modifies an instances's parameters.
12440
12441   """
12442   HPATH = "instance-modify"
12443   HTYPE = constants.HTYPE_INSTANCE
12444   REQ_BGL = False
12445
12446   @staticmethod
12447   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12448     assert ht.TList(mods)
12449     assert not mods or len(mods[0]) in (2, 3)
12450
12451     if mods and len(mods[0]) == 2:
12452       result = []
12453
12454       addremove = 0
12455       for op, params in mods:
12456         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12457           result.append((op, -1, params))
12458           addremove += 1
12459
12460           if addremove > 1:
12461             raise errors.OpPrereqError("Only one %s add or remove operation is"
12462                                        " supported at a time" % kind,
12463                                        errors.ECODE_INVAL)
12464         else:
12465           result.append((constants.DDM_MODIFY, op, params))
12466
12467       assert verify_fn(result)
12468     else:
12469       result = mods
12470
12471     return result
12472
12473   @staticmethod
12474   def _CheckMods(kind, mods, key_types, item_fn):
12475     """Ensures requested disk/NIC modifications are valid.
12476
12477     """
12478     for (op, _, params) in mods:
12479       assert ht.TDict(params)
12480
12481       # If key_types is an empty dict, we assume we have an 'ext' template
12482       # and thus do not ForceDictType
12483       if key_types:
12484         utils.ForceDictType(params, key_types)
12485
12486       if op == constants.DDM_REMOVE:
12487         if params:
12488           raise errors.OpPrereqError("No settings should be passed when"
12489                                      " removing a %s" % kind,
12490                                      errors.ECODE_INVAL)
12491       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12492         item_fn(op, params)
12493       else:
12494         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12495
12496   @staticmethod
12497   def _VerifyDiskModification(op, params):
12498     """Verifies a disk modification.
12499
12500     """
12501     if op == constants.DDM_ADD:
12502       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12503       if mode not in constants.DISK_ACCESS_SET:
12504         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12505                                    errors.ECODE_INVAL)
12506
12507       size = params.get(constants.IDISK_SIZE, None)
12508       if size is None:
12509         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12510                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12511
12512       try:
12513         size = int(size)
12514       except (TypeError, ValueError), err:
12515         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12516                                    errors.ECODE_INVAL)
12517
12518       params[constants.IDISK_SIZE] = size
12519
12520     elif op == constants.DDM_MODIFY:
12521       if constants.IDISK_SIZE in params:
12522         raise errors.OpPrereqError("Disk size change not possible, use"
12523                                    " grow-disk", errors.ECODE_INVAL)
12524       if constants.IDISK_MODE not in params:
12525         raise errors.OpPrereqError("Disk 'mode' is the only kind of"
12526                                    " modification supported, but missing",
12527                                    errors.ECODE_NOENT)
12528       if len(params) > 1:
12529         raise errors.OpPrereqError("Disk modification doesn't support"
12530                                    " additional arbitrary parameters",
12531                                    errors.ECODE_INVAL)
12532
12533   @staticmethod
12534   def _VerifyNicModification(op, params):
12535     """Verifies a network interface modification.
12536
12537     """
12538     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12539       ip = params.get(constants.INIC_IP, None)
12540       req_net = params.get(constants.INIC_NETWORK, None)
12541       link = params.get(constants.NIC_LINK, None)
12542       mode = params.get(constants.NIC_MODE, None)
12543       if req_net is not None:
12544         if req_net.lower() == constants.VALUE_NONE:
12545           params[constants.INIC_NETWORK] = None
12546           req_net = None
12547         elif link is not None or mode is not None:
12548           raise errors.OpPrereqError("If network is given"
12549                                      " mode or link should not",
12550                                      errors.ECODE_INVAL)
12551
12552       if op == constants.DDM_ADD:
12553         macaddr = params.get(constants.INIC_MAC, None)
12554         if macaddr is None:
12555           params[constants.INIC_MAC] = constants.VALUE_AUTO
12556
12557       if ip is not None:
12558         if ip.lower() == constants.VALUE_NONE:
12559           params[constants.INIC_IP] = None
12560         else:
12561           if ip.lower() == constants.NIC_IP_POOL:
12562             if op == constants.DDM_ADD and req_net is None:
12563               raise errors.OpPrereqError("If ip=pool, parameter network"
12564                                          " cannot be none",
12565                                          errors.ECODE_INVAL)
12566           else:
12567             if not netutils.IPAddress.IsValid(ip):
12568               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12569                                          errors.ECODE_INVAL)
12570
12571       if constants.INIC_MAC in params:
12572         macaddr = params[constants.INIC_MAC]
12573         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12574           macaddr = utils.NormalizeAndValidateMac(macaddr)
12575
12576         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12577           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12578                                      " modifying an existing NIC",
12579                                      errors.ECODE_INVAL)
12580
12581   def CheckArguments(self):
12582     if not (self.op.nics or self.op.disks or self.op.disk_template or
12583             self.op.hvparams or self.op.beparams or self.op.os_name or
12584             self.op.offline is not None or self.op.runtime_mem):
12585       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12586
12587     if self.op.hvparams:
12588       _CheckGlobalHvParams(self.op.hvparams)
12589
12590     if self.op.allow_arbit_params:
12591       self.op.disks = \
12592         self._UpgradeDiskNicMods("disk", self.op.disks,
12593           opcodes.OpInstanceSetParams.TestExtDiskModifications)
12594     else:
12595       self.op.disks = \
12596         self._UpgradeDiskNicMods("disk", self.op.disks,
12597           opcodes.OpInstanceSetParams.TestDiskModifications)
12598
12599     self.op.nics = \
12600       self._UpgradeDiskNicMods("NIC", self.op.nics,
12601         opcodes.OpInstanceSetParams.TestNicModifications)
12602
12603     # Check disk modifications
12604     if self.op.allow_arbit_params:
12605       self._CheckMods("disk", self.op.disks, {},
12606                       self._VerifyDiskModification)
12607     else:
12608       self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12609                       self._VerifyDiskModification)
12610
12611     if self.op.disks and self.op.disk_template is not None:
12612       raise errors.OpPrereqError("Disk template conversion and other disk"
12613                                  " changes not supported at the same time",
12614                                  errors.ECODE_INVAL)
12615
12616     if (self.op.disk_template and
12617         self.op.disk_template in constants.DTS_INT_MIRROR and
12618         self.op.remote_node is None):
12619       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12620                                  " one requires specifying a secondary node",
12621                                  errors.ECODE_INVAL)
12622
12623     # Check NIC modifications
12624     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12625                     self._VerifyNicModification)
12626
12627   def ExpandNames(self):
12628     self._ExpandAndLockInstance()
12629     # Can't even acquire node locks in shared mode as upcoming changes in
12630     # Ganeti 2.6 will start to modify the node object on disk conversion
12631     self.needed_locks[locking.LEVEL_NODE] = []
12632     self.needed_locks[locking.LEVEL_NODE_RES] = []
12633     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12634
12635   def DeclareLocks(self, level):
12636     # TODO: Acquire group lock in shared mode (disk parameters)
12637     if level == locking.LEVEL_NODE:
12638       self._LockInstancesNodes()
12639       if self.op.disk_template and self.op.remote_node:
12640         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12641         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12642     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12643       # Copy node locks
12644       self.needed_locks[locking.LEVEL_NODE_RES] = \
12645         self.needed_locks[locking.LEVEL_NODE][:]
12646
12647   def BuildHooksEnv(self):
12648     """Build hooks env.
12649
12650     This runs on the master, primary and secondaries.
12651
12652     """
12653     args = dict()
12654     if constants.BE_MINMEM in self.be_new:
12655       args["minmem"] = self.be_new[constants.BE_MINMEM]
12656     if constants.BE_MAXMEM in self.be_new:
12657       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12658     if constants.BE_VCPUS in self.be_new:
12659       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12660     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12661     # information at all.
12662
12663     if self._new_nics is not None:
12664       nics = []
12665
12666       for nic in self._new_nics:
12667         n = copy.deepcopy(nic)
12668         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12669         n.nicparams = nicparams
12670         nics.append(_NICToTuple(self, n))
12671
12672       args["nics"] = nics
12673
12674     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12675     if self.op.disk_template:
12676       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12677     if self.op.runtime_mem:
12678       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12679
12680     return env
12681
12682   def BuildHooksNodes(self):
12683     """Build hooks nodes.
12684
12685     """
12686     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12687     return (nl, nl)
12688
12689   def _PrepareNicModification(self, params, private, old_ip, old_net,
12690                               old_params, cluster, pnode):
12691
12692     update_params_dict = dict([(key, params[key])
12693                                for key in constants.NICS_PARAMETERS
12694                                if key in params])
12695
12696     req_link = update_params_dict.get(constants.NIC_LINK, None)
12697     req_mode = update_params_dict.get(constants.NIC_MODE, None)
12698
12699     new_net = params.get(constants.INIC_NETWORK, old_net)
12700     if new_net is not None:
12701       netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12702       if netparams is None:
12703         raise errors.OpPrereqError("No netparams found for the network"
12704                                    " %s, propably not connected." % new_net,
12705                                    errors.ECODE_INVAL)
12706       new_params = dict(netparams)
12707     else:
12708       new_params = _GetUpdatedParams(old_params, update_params_dict)
12709
12710     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12711
12712     new_filled_params = cluster.SimpleFillNIC(new_params)
12713     objects.NIC.CheckParameterSyntax(new_filled_params)
12714
12715     new_mode = new_filled_params[constants.NIC_MODE]
12716     if new_mode == constants.NIC_MODE_BRIDGED:
12717       bridge = new_filled_params[constants.NIC_LINK]
12718       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12719       if msg:
12720         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12721         if self.op.force:
12722           self.warn.append(msg)
12723         else:
12724           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12725
12726     elif new_mode == constants.NIC_MODE_ROUTED:
12727       ip = params.get(constants.INIC_IP, old_ip)
12728       if ip is None:
12729         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12730                                    " on a routed NIC", errors.ECODE_INVAL)
12731
12732     if constants.INIC_MAC in params:
12733       mac = params[constants.INIC_MAC]
12734       if mac is None:
12735         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12736                                    errors.ECODE_INVAL)
12737       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12738         # otherwise generate the MAC address
12739         params[constants.INIC_MAC] = \
12740           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12741       else:
12742         # or validate/reserve the current one
12743         try:
12744           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12745         except errors.ReservationError:
12746           raise errors.OpPrereqError("MAC address '%s' already in use"
12747                                      " in cluster" % mac,
12748                                      errors.ECODE_NOTUNIQUE)
12749     elif new_net != old_net:
12750       def get_net_prefix(net):
12751         if net:
12752           uuid = self.cfg.LookupNetwork(net)
12753           if uuid:
12754             nobj = self.cfg.GetNetwork(uuid)
12755             return nobj.mac_prefix
12756         return None
12757       new_prefix = get_net_prefix(new_net)
12758       old_prefix = get_net_prefix(old_net)
12759       if old_prefix != new_prefix:
12760         params[constants.INIC_MAC] = \
12761           self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12762
12763     #if there is a change in nic-network configuration
12764     new_ip = params.get(constants.INIC_IP, old_ip)
12765     if (new_ip, new_net) != (old_ip, old_net):
12766       if new_ip:
12767         if new_net:
12768           if new_ip.lower() == constants.NIC_IP_POOL:
12769             try:
12770               new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12771             except errors.ReservationError:
12772               raise errors.OpPrereqError("Unable to get a free IP"
12773                                         " from the address pool",
12774                                          errors.ECODE_STATE)
12775             self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12776             params[constants.INIC_IP] = new_ip
12777           elif new_ip != old_ip or new_net != old_net:
12778             try:
12779               self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12780               self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12781             except errors.ReservationError:
12782               raise errors.OpPrereqError("IP %s not available in network %s" %
12783                                          (new_ip, new_net),
12784                                          errors.ECODE_NOTUNIQUE)
12785         elif new_ip.lower() == constants.NIC_IP_POOL:
12786           raise errors.OpPrereqError("ip=pool, but no network found",
12787                                      ECODEE_INVAL)
12788         else:
12789           # new net is None
12790           if self.op.conflicts_check:
12791             _CheckForConflictingIp(self, new_ip, pnode)
12792
12793       if old_ip:
12794         if old_net:
12795           try:
12796             self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
12797           except errors.AddressPoolError:
12798             logging.warning("Release IP %s not contained in network %s",
12799                             old_ip, old_net)
12800
12801     # there are no changes in (net, ip) tuple
12802     elif (old_net is not None and
12803           (req_link is not None or req_mode is not None)):
12804       raise errors.OpPrereqError("Not allowed to change link or mode of"
12805                                  " a NIC that is connected to a network.",
12806                                  errors.ECODE_INVAL)
12807
12808     logging.info("new_params %s", new_params)
12809     logging.info("new_filled_params %s", new_filled_params)
12810     private.params = new_params
12811     private.filled = new_filled_params
12812
12813   def CheckPrereq(self):
12814     """Check prerequisites.
12815
12816     This only checks the instance list against the existing names.
12817
12818     """
12819     # checking the new params on the primary/secondary nodes
12820
12821     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12822     cluster = self.cluster = self.cfg.GetClusterInfo()
12823     assert self.instance is not None, \
12824       "Cannot retrieve locked instance %s" % self.op.instance_name
12825     pnode = instance.primary_node
12826     nodelist = list(instance.all_nodes)
12827     pnode_info = self.cfg.GetNodeInfo(pnode)
12828     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12829
12830     # Prepare disk/NIC modifications
12831     self.diskmod = PrepareContainerMods(self.op.disks, None)
12832     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12833     logging.info("nicmod %s", self.nicmod)
12834
12835     # Check the validity of the `provider' parameter
12836     if instance.disk_template in constants.DT_EXT:
12837       for mod in self.diskmod:
12838         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
12839         if mod[0] == constants.DDM_ADD:
12840           if ext_provider is None:
12841             raise errors.OpPrereqError("Instance template is '%s' and parameter"
12842                                        " '%s' missing, during disk add" %
12843                                        (constants.DT_EXT,
12844                                         constants.IDISK_PROVIDER),
12845                                        errors.ECODE_NOENT)
12846         elif mod[0] == constants.DDM_MODIFY:
12847           if ext_provider:
12848             raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
12849                                        " modification" % constants.IDISK_PROVIDER,
12850                                        errors.ECODE_INVAL)
12851     else:
12852       for mod in self.diskmod:
12853         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
12854         if ext_provider is not None:
12855           raise errors.OpPrereqError("Parameter '%s' is only valid for instances"
12856                                      " of type '%s'" % (constants.IDISK_PROVIDER,
12857                                       constants.DT_EXT), errors.ECODE_INVAL)
12858
12859     # OS change
12860     if self.op.os_name and not self.op.force:
12861       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12862                       self.op.force_variant)
12863       instance_os = self.op.os_name
12864     else:
12865       instance_os = instance.os
12866
12867     assert not (self.op.disk_template and self.op.disks), \
12868       "Can't modify disk template and apply disk changes at the same time"
12869
12870     if self.op.disk_template:
12871       if instance.disk_template == self.op.disk_template:
12872         raise errors.OpPrereqError("Instance already has disk template %s" %
12873                                    instance.disk_template, errors.ECODE_INVAL)
12874
12875       if (instance.disk_template,
12876           self.op.disk_template) not in self._DISK_CONVERSIONS:
12877         raise errors.OpPrereqError("Unsupported disk template conversion from"
12878                                    " %s to %s" % (instance.disk_template,
12879                                                   self.op.disk_template),
12880                                    errors.ECODE_INVAL)
12881       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12882                           msg="cannot change disk template")
12883       if self.op.disk_template in constants.DTS_INT_MIRROR:
12884         if self.op.remote_node == pnode:
12885           raise errors.OpPrereqError("Given new secondary node %s is the same"
12886                                      " as the primary node of the instance" %
12887                                      self.op.remote_node, errors.ECODE_STATE)
12888         _CheckNodeOnline(self, self.op.remote_node)
12889         _CheckNodeNotDrained(self, self.op.remote_node)
12890         # FIXME: here we assume that the old instance type is DT_PLAIN
12891         assert instance.disk_template == constants.DT_PLAIN
12892         disks = [{constants.IDISK_SIZE: d.size,
12893                   constants.IDISK_VG: d.logical_id[0]}
12894                  for d in instance.disks]
12895         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12896         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12897
12898         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12899         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12900         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12901         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12902                                 ignore=self.op.ignore_ipolicy)
12903         if pnode_info.group != snode_info.group:
12904           self.LogWarning("The primary and secondary nodes are in two"
12905                           " different node groups; the disk parameters"
12906                           " from the first disk's node group will be"
12907                           " used")
12908
12909     # hvparams processing
12910     if self.op.hvparams:
12911       hv_type = instance.hypervisor
12912       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12913       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12914       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12915
12916       # local check
12917       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12918       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12919       self.hv_proposed = self.hv_new = hv_new # the new actual values
12920       self.hv_inst = i_hvdict # the new dict (without defaults)
12921     else:
12922       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12923                                               instance.hvparams)
12924       self.hv_new = self.hv_inst = {}
12925
12926     # beparams processing
12927     if self.op.beparams:
12928       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12929                                    use_none=True)
12930       objects.UpgradeBeParams(i_bedict)
12931       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12932       be_new = cluster.SimpleFillBE(i_bedict)
12933       self.be_proposed = self.be_new = be_new # the new actual values
12934       self.be_inst = i_bedict # the new dict (without defaults)
12935     else:
12936       self.be_new = self.be_inst = {}
12937       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12938     be_old = cluster.FillBE(instance)
12939
12940     # CPU param validation -- checking every time a parameter is
12941     # changed to cover all cases where either CPU mask or vcpus have
12942     # changed
12943     if (constants.BE_VCPUS in self.be_proposed and
12944         constants.HV_CPU_MASK in self.hv_proposed):
12945       cpu_list = \
12946         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12947       # Verify mask is consistent with number of vCPUs. Can skip this
12948       # test if only 1 entry in the CPU mask, which means same mask
12949       # is applied to all vCPUs.
12950       if (len(cpu_list) > 1 and
12951           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12952         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12953                                    " CPU mask [%s]" %
12954                                    (self.be_proposed[constants.BE_VCPUS],
12955                                     self.hv_proposed[constants.HV_CPU_MASK]),
12956                                    errors.ECODE_INVAL)
12957
12958       # Only perform this test if a new CPU mask is given
12959       if constants.HV_CPU_MASK in self.hv_new:
12960         # Calculate the largest CPU number requested
12961         max_requested_cpu = max(map(max, cpu_list))
12962         # Check that all of the instance's nodes have enough physical CPUs to
12963         # satisfy the requested CPU mask
12964         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12965                                 max_requested_cpu + 1, instance.hypervisor)
12966
12967     # osparams processing
12968     if self.op.osparams:
12969       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12970       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12971       self.os_inst = i_osdict # the new dict (without defaults)
12972     else:
12973       self.os_inst = {}
12974
12975     self.warn = []
12976
12977     #TODO(dynmem): do the appropriate check involving MINMEM
12978     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12979         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12980       mem_check_list = [pnode]
12981       if be_new[constants.BE_AUTO_BALANCE]:
12982         # either we changed auto_balance to yes or it was from before
12983         mem_check_list.extend(instance.secondary_nodes)
12984       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12985                                                   instance.hypervisor)
12986       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12987                                          [instance.hypervisor])
12988       pninfo = nodeinfo[pnode]
12989       msg = pninfo.fail_msg
12990       if msg:
12991         # Assume the primary node is unreachable and go ahead
12992         self.warn.append("Can't get info from primary node %s: %s" %
12993                          (pnode, msg))
12994       else:
12995         (_, _, (pnhvinfo, )) = pninfo.payload
12996         if not isinstance(pnhvinfo.get("memory_free", None), int):
12997           self.warn.append("Node data from primary node %s doesn't contain"
12998                            " free memory information" % pnode)
12999         elif instance_info.fail_msg:
13000           self.warn.append("Can't get instance runtime information: %s" %
13001                           instance_info.fail_msg)
13002         else:
13003           if instance_info.payload:
13004             current_mem = int(instance_info.payload["memory"])
13005           else:
13006             # Assume instance not running
13007             # (there is a slight race condition here, but it's not very
13008             # probable, and we have no other way to check)
13009             # TODO: Describe race condition
13010             current_mem = 0
13011           #TODO(dynmem): do the appropriate check involving MINMEM
13012           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13013                       pnhvinfo["memory_free"])
13014           if miss_mem > 0:
13015             raise errors.OpPrereqError("This change will prevent the instance"
13016                                        " from starting, due to %d MB of memory"
13017                                        " missing on its primary node" %
13018                                        miss_mem,
13019                                        errors.ECODE_NORES)
13020
13021       if be_new[constants.BE_AUTO_BALANCE]:
13022         for node, nres in nodeinfo.items():
13023           if node not in instance.secondary_nodes:
13024             continue
13025           nres.Raise("Can't get info from secondary node %s" % node,
13026                      prereq=True, ecode=errors.ECODE_STATE)
13027           (_, _, (nhvinfo, )) = nres.payload
13028           if not isinstance(nhvinfo.get("memory_free", None), int):
13029             raise errors.OpPrereqError("Secondary node %s didn't return free"
13030                                        " memory information" % node,
13031                                        errors.ECODE_STATE)
13032           #TODO(dynmem): do the appropriate check involving MINMEM
13033           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13034             raise errors.OpPrereqError("This change will prevent the instance"
13035                                        " from failover to its secondary node"
13036                                        " %s, due to not enough memory" % node,
13037                                        errors.ECODE_STATE)
13038
13039     if self.op.runtime_mem:
13040       remote_info = self.rpc.call_instance_info(instance.primary_node,
13041                                                 instance.name,
13042                                                 instance.hypervisor)
13043       remote_info.Raise("Error checking node %s" % instance.primary_node)
13044       if not remote_info.payload: # not running already
13045         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
13046                                    errors.ECODE_STATE)
13047
13048       current_memory = remote_info.payload["memory"]
13049       if (not self.op.force and
13050            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13051             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13052         raise errors.OpPrereqError("Instance %s must have memory between %d"
13053                                    " and %d MB of memory unless --force is"
13054                                    " given" % (instance.name,
13055                                     self.be_proposed[constants.BE_MINMEM],
13056                                     self.be_proposed[constants.BE_MAXMEM]),
13057                                    errors.ECODE_INVAL)
13058
13059       if self.op.runtime_mem > current_memory:
13060         _CheckNodeFreeMemory(self, instance.primary_node,
13061                              "ballooning memory for instance %s" %
13062                              instance.name,
13063                              self.op.memory - current_memory,
13064                              instance.hypervisor)
13065
13066     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13067       raise errors.OpPrereqError("Disk operations not supported for"
13068                                  " diskless instances",
13069                                  errors.ECODE_INVAL)
13070
13071     def _PrepareNicCreate(_, params, private):
13072       self._PrepareNicModification(params, private, None, None,
13073                                    {}, cluster, pnode)
13074       return (None, None)
13075
13076     def _PrepareNicMod(_, nic, params, private):
13077       self._PrepareNicModification(params, private, nic.ip, nic.network,
13078                                    nic.nicparams, cluster, pnode)
13079       return None
13080
13081     def _PrepareNicRemove(_, params, private):
13082       ip = params.ip
13083       net = params.network
13084       if net is not None and ip is not None:
13085         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13086
13087     # Verify NIC changes (operating on copy)
13088     nics = instance.nics[:]
13089     ApplyContainerMods("NIC", nics, None, self.nicmod,
13090                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13091     if len(nics) > constants.MAX_NICS:
13092       raise errors.OpPrereqError("Instance has too many network interfaces"
13093                                  " (%d), cannot add more" % constants.MAX_NICS,
13094                                  errors.ECODE_STATE)
13095
13096
13097     # Verify disk changes (operating on a copy)
13098     disks = instance.disks[:]
13099     ApplyContainerMods("disk", disks, None, self.diskmod,
13100                        None, None, None)
13101     if len(disks) > constants.MAX_DISKS:
13102       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13103                                  " more" % constants.MAX_DISKS,
13104                                  errors.ECODE_STATE)
13105
13106     if self.op.offline is not None:
13107       if self.op.offline:
13108         msg = "can't change to offline"
13109       else:
13110         msg = "can't change to online"
13111       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13112
13113     # Pre-compute NIC changes (necessary to use result in hooks)
13114     self._nic_chgdesc = []
13115     if self.nicmod:
13116       # Operate on copies as this is still in prereq
13117       nics = [nic.Copy() for nic in instance.nics]
13118       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13119                          self._CreateNewNic, self._ApplyNicMods,
13120                          self._RemoveNic)
13121       self._new_nics = nics
13122     else:
13123       self._new_nics = None
13124
13125
13126   def _ConvertPlainToDrbd(self, feedback_fn):
13127     """Converts an instance from plain to drbd.
13128
13129     """
13130     feedback_fn("Converting template to drbd")
13131     instance = self.instance
13132     pnode = instance.primary_node
13133     snode = self.op.remote_node
13134
13135     assert instance.disk_template == constants.DT_PLAIN
13136
13137     # create a fake disk info for _GenerateDiskTemplate
13138     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13139                   constants.IDISK_VG: d.logical_id[0]}
13140                  for d in instance.disks]
13141     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13142                                       instance.name, pnode, [snode],
13143                                       disk_info, None, None, 0, feedback_fn,
13144                                       self.diskparams)
13145     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13146                                         self.diskparams)
13147     info = _GetInstanceInfoText(instance)
13148     feedback_fn("Creating additional volumes...")
13149     # first, create the missing data and meta devices
13150     for disk in anno_disks:
13151       # unfortunately this is... not too nice
13152       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13153                             info, True)
13154       for child in disk.children:
13155         _CreateSingleBlockDev(self, snode, instance, child, info, True)
13156     # at this stage, all new LVs have been created, we can rename the
13157     # old ones
13158     feedback_fn("Renaming original volumes...")
13159     rename_list = [(o, n.children[0].logical_id)
13160                    for (o, n) in zip(instance.disks, new_disks)]
13161     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13162     result.Raise("Failed to rename original LVs")
13163
13164     feedback_fn("Initializing DRBD devices...")
13165     # all child devices are in place, we can now create the DRBD devices
13166     for disk in anno_disks:
13167       for node in [pnode, snode]:
13168         f_create = node == pnode
13169         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13170
13171     # at this point, the instance has been modified
13172     instance.disk_template = constants.DT_DRBD8
13173     instance.disks = new_disks
13174     self.cfg.Update(instance, feedback_fn)
13175
13176     # Release node locks while waiting for sync
13177     _ReleaseLocks(self, locking.LEVEL_NODE)
13178
13179     # disks are created, waiting for sync
13180     disk_abort = not _WaitForSync(self, instance,
13181                                   oneshot=not self.op.wait_for_sync)
13182     if disk_abort:
13183       raise errors.OpExecError("There are some degraded disks for"
13184                                " this instance, please cleanup manually")
13185
13186     # Node resource locks will be released by caller
13187
13188   def _ConvertDrbdToPlain(self, feedback_fn):
13189     """Converts an instance from drbd to plain.
13190
13191     """
13192     instance = self.instance
13193
13194     assert len(instance.secondary_nodes) == 1
13195     assert instance.disk_template == constants.DT_DRBD8
13196
13197     pnode = instance.primary_node
13198     snode = instance.secondary_nodes[0]
13199     feedback_fn("Converting template to plain")
13200
13201     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13202     new_disks = [d.children[0] for d in instance.disks]
13203
13204     # copy over size and mode
13205     for parent, child in zip(old_disks, new_disks):
13206       child.size = parent.size
13207       child.mode = parent.mode
13208
13209     # this is a DRBD disk, return its port to the pool
13210     # NOTE: this must be done right before the call to cfg.Update!
13211     for disk in old_disks:
13212       tcp_port = disk.logical_id[2]
13213       self.cfg.AddTcpUdpPort(tcp_port)
13214
13215     # update instance structure
13216     instance.disks = new_disks
13217     instance.disk_template = constants.DT_PLAIN
13218     self.cfg.Update(instance, feedback_fn)
13219
13220     # Release locks in case removing disks takes a while
13221     _ReleaseLocks(self, locking.LEVEL_NODE)
13222
13223     feedback_fn("Removing volumes on the secondary node...")
13224     for disk in old_disks:
13225       self.cfg.SetDiskID(disk, snode)
13226       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13227       if msg:
13228         self.LogWarning("Could not remove block device %s on node %s,"
13229                         " continuing anyway: %s", disk.iv_name, snode, msg)
13230
13231     feedback_fn("Removing unneeded volumes on the primary node...")
13232     for idx, disk in enumerate(old_disks):
13233       meta = disk.children[1]
13234       self.cfg.SetDiskID(meta, pnode)
13235       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13236       if msg:
13237         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13238                         " continuing anyway: %s", idx, pnode, msg)
13239
13240   def _CreateNewDisk(self, idx, params, _):
13241     """Creates a new disk.
13242
13243     """
13244     instance = self.instance
13245
13246     # add a new disk
13247     if instance.disk_template in constants.DTS_FILEBASED:
13248       (file_driver, file_path) = instance.disks[0].logical_id
13249       file_path = os.path.dirname(file_path)
13250     else:
13251       file_driver = file_path = None
13252
13253     disk = \
13254       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13255                             instance.primary_node, instance.secondary_nodes,
13256                             [params], file_path, file_driver, idx,
13257                             self.Log, self.diskparams)[0]
13258
13259     info = _GetInstanceInfoText(instance)
13260
13261     logging.info("Creating volume %s for instance %s",
13262                  disk.iv_name, instance.name)
13263     # Note: this needs to be kept in sync with _CreateDisks
13264     #HARDCODE
13265     for node in instance.all_nodes:
13266       f_create = (node == instance.primary_node)
13267       try:
13268         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13269       except errors.OpExecError, err:
13270         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13271                         disk.iv_name, disk, node, err)
13272
13273     if self.op.hotplug and disk.pci:
13274       disk_ok, device_info = _AssembleInstanceDisks(self, self.instance,
13275                                                     [disk], check=False)
13276       _, _, dev_path = device_info[0]
13277       result = self.rpc.call_hot_add_disk(self.instance.primary_node,
13278                                           self.instance, disk, dev_path, idx)
13279     return (disk, [
13280       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13281       ])
13282
13283   @staticmethod
13284   def _ModifyDisk(idx, disk, params, _):
13285     """Modifies a disk.
13286
13287     """
13288     disk.mode = params[constants.IDISK_MODE]
13289
13290     return [
13291       ("disk.mode/%d" % idx, disk.mode),
13292       ]
13293
13294   def _RemoveDisk(self, idx, root, _):
13295     """Removes a disk.
13296
13297     """
13298     #TODO: log warning in case hotplug is not possible
13299     #      handle errors
13300     if root.pci and not self.op.hotplug:
13301       raise errors.OpPrereqError("Cannot remove a disk that has"
13302                                  " been hotplugged"
13303                                  " without removing it with hotplug",
13304                                  errors.ECODE_INVAL)
13305     if self.op.hotplug and root.pci:
13306       self.rpc.call_hot_del_disk(self.instance.primary_node,
13307                                  self.instance, root, idx)
13308       _ShutdownInstanceDisks(self, self.instance, [root])
13309       self.cfg.UpdatePCIInfo(self.instance.name, root.pci)
13310
13311     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13312     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13313       self.cfg.SetDiskID(disk, node)
13314       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13315       if msg:
13316         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13317                         " continuing anyway", idx, node, msg)
13318
13319     # if this is a DRBD disk, return its port to the pool
13320     if root.dev_type in constants.LDS_DRBD:
13321       self.cfg.AddTcpUdpPort(root.logical_id[2])
13322
13323   def _CreateNewNic(self, idx, params, private):
13324     """Creates data structure for a new network interface.
13325
13326     """
13327     mac = params[constants.INIC_MAC]
13328     ip = params.get(constants.INIC_IP, None)
13329     network = params.get(constants.INIC_NETWORK, None)
13330     #TODO: not private.filled?? can a nic have no nicparams??
13331     nicparams = private.filled
13332
13333     nic = objects.NIC(mac=mac, ip=ip, network=network, nicparams=nicparams)
13334
13335     #TODO: log warning in case hotplug is not possible
13336     #      handle errors
13337     #      return changes
13338     if self.op.hotplug:
13339       nic_idx, pci = _GetPCIInfo(self, 'nics')
13340       nic.idx = nic_idx
13341       nic.pci = pci
13342       result = self.rpc.call_hot_add_nic(self.instance.primary_node,
13343                                          self.instance, nic, idx)
13344     desc =  [
13345       ("nic.%d" % idx,
13346        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13347        (mac, ip, private.filled[constants.NIC_MODE],
13348        private.filled[constants.NIC_LINK],
13349        network)),
13350       ]
13351     return (nic, desc)
13352
13353   def _ApplyNicMods(self, idx, nic, params, private):
13354     """Modifies a network interface.
13355
13356     """
13357     changes = []
13358
13359     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13360       if key in params:
13361         changes.append(("nic.%s/%d" % (key, idx), params[key]))
13362         setattr(nic, key, params[key])
13363
13364     if private.filled:
13365       nic.nicparams = private.filled
13366
13367       for (key, val) in nic.nicparams.items():
13368         changes.append(("nic.%s/%d" % (key, idx), val))
13369
13370     #TODO: log warning in case hotplug is not possible
13371     #      handle errors
13372     if self.op.hotplug and nic.pci:
13373       self.rpc.call_hot_del_nic(self.instance.primary_node,
13374                                 self.instance, nic, idx)
13375       result = self.rpc.call_hot_add_nic(self.instance.primary_node,
13376                                          self.instance, nic, idx)
13377     return changes
13378
13379   def _RemoveNic(self, idx, nic, private):
13380     if nic.pci and not self.op.hotplug:
13381       raise errors.OpPrereqError("Cannot remove a nic that has been hotplugged"
13382                                  " without removing it with hotplug",
13383                                  errors.ECODE_INVAL)
13384     #TODO: log warning in case hotplug is not possible
13385     #      handle errors
13386     if self.op.hotplug and nic.pci:
13387       self.rpc.call_hot_del_nic(self.instance.primary_node,
13388                                 self.instance, nic, idx)
13389       self.cfg.UpdatePCIInfo(self.instance.name, nic.pci)
13390
13391
13392   def Exec(self, feedback_fn):
13393     """Modifies an instance.
13394
13395     All parameters take effect only at the next restart of the instance.
13396
13397     """
13398     # Process here the warnings from CheckPrereq, as we don't have a
13399     # feedback_fn there.
13400     # TODO: Replace with self.LogWarning
13401     for warn in self.warn:
13402       feedback_fn("WARNING: %s" % warn)
13403
13404     assert ((self.op.disk_template is None) ^
13405             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13406       "Not owning any node resource locks"
13407
13408     result = []
13409     instance = self.instance
13410
13411     # runtime memory
13412     if self.op.runtime_mem:
13413       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13414                                                      instance,
13415                                                      self.op.runtime_mem)
13416       rpcres.Raise("Cannot modify instance runtime memory")
13417       result.append(("runtime_memory", self.op.runtime_mem))
13418
13419     # Apply disk changes
13420     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13421                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13422     _UpdateIvNames(0, instance.disks)
13423
13424     if self.op.disk_template:
13425       if __debug__:
13426         check_nodes = set(instance.all_nodes)
13427         if self.op.remote_node:
13428           check_nodes.add(self.op.remote_node)
13429         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13430           owned = self.owned_locks(level)
13431           assert not (check_nodes - owned), \
13432             ("Not owning the correct locks, owning %r, expected at least %r" %
13433              (owned, check_nodes))
13434
13435       r_shut = _ShutdownInstanceDisks(self, instance)
13436       if not r_shut:
13437         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13438                                  " proceed with disk template conversion")
13439       mode = (instance.disk_template, self.op.disk_template)
13440       try:
13441         self._DISK_CONVERSIONS[mode](self, feedback_fn)
13442       except:
13443         self.cfg.ReleaseDRBDMinors(instance.name)
13444         raise
13445       result.append(("disk_template", self.op.disk_template))
13446
13447       assert instance.disk_template == self.op.disk_template, \
13448         ("Expected disk template '%s', found '%s'" %
13449          (self.op.disk_template, instance.disk_template))
13450
13451     # Release node and resource locks if there are any (they might already have
13452     # been released during disk conversion)
13453     _ReleaseLocks(self, locking.LEVEL_NODE)
13454     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13455
13456     # Apply NIC changes
13457     if self._new_nics is not None:
13458       instance.nics = self._new_nics
13459       result.extend(self._nic_chgdesc)
13460
13461     # hvparams changes
13462     if self.op.hvparams:
13463       instance.hvparams = self.hv_inst
13464       for key, val in self.op.hvparams.iteritems():
13465         result.append(("hv/%s" % key, val))
13466
13467     # beparams changes
13468     if self.op.beparams:
13469       instance.beparams = self.be_inst
13470       for key, val in self.op.beparams.iteritems():
13471         result.append(("be/%s" % key, val))
13472
13473     # OS change
13474     if self.op.os_name:
13475       instance.os = self.op.os_name
13476
13477     # osparams changes
13478     if self.op.osparams:
13479       instance.osparams = self.os_inst
13480       for key, val in self.op.osparams.iteritems():
13481         result.append(("os/%s" % key, val))
13482
13483     if self.op.offline is None:
13484       # Ignore
13485       pass
13486     elif self.op.offline:
13487       # Mark instance as offline
13488       self.cfg.MarkInstanceOffline(instance.name)
13489       result.append(("admin_state", constants.ADMINST_OFFLINE))
13490     else:
13491       # Mark instance as online, but stopped
13492       self.cfg.MarkInstanceDown(instance.name)
13493       result.append(("admin_state", constants.ADMINST_DOWN))
13494
13495     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13496
13497     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13498                 self.owned_locks(locking.LEVEL_NODE)), \
13499       "All node locks should have been released by now"
13500
13501     return result
13502
13503   _DISK_CONVERSIONS = {
13504     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13505     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13506     }
13507
13508
13509 class LUInstanceChangeGroup(LogicalUnit):
13510   HPATH = "instance-change-group"
13511   HTYPE = constants.HTYPE_INSTANCE
13512   REQ_BGL = False
13513
13514   def ExpandNames(self):
13515     self.share_locks = _ShareAll()
13516     self.needed_locks = {
13517       locking.LEVEL_NODEGROUP: [],
13518       locking.LEVEL_NODE: [],
13519       }
13520
13521     self._ExpandAndLockInstance()
13522
13523     if self.op.target_groups:
13524       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13525                                   self.op.target_groups)
13526     else:
13527       self.req_target_uuids = None
13528
13529     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13530
13531   def DeclareLocks(self, level):
13532     if level == locking.LEVEL_NODEGROUP:
13533       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13534
13535       if self.req_target_uuids:
13536         lock_groups = set(self.req_target_uuids)
13537
13538         # Lock all groups used by instance optimistically; this requires going
13539         # via the node before it's locked, requiring verification later on
13540         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13541         lock_groups.update(instance_groups)
13542       else:
13543         # No target groups, need to lock all of them
13544         lock_groups = locking.ALL_SET
13545
13546       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13547
13548     elif level == locking.LEVEL_NODE:
13549       if self.req_target_uuids:
13550         # Lock all nodes used by instances
13551         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13552         self._LockInstancesNodes()
13553
13554         # Lock all nodes in all potential target groups
13555         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13556                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13557         member_nodes = [node_name
13558                         for group in lock_groups
13559                         for node_name in self.cfg.GetNodeGroup(group).members]
13560         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13561       else:
13562         # Lock all nodes as all groups are potential targets
13563         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13564
13565   def CheckPrereq(self):
13566     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13567     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13568     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13569
13570     assert (self.req_target_uuids is None or
13571             owned_groups.issuperset(self.req_target_uuids))
13572     assert owned_instances == set([self.op.instance_name])
13573
13574     # Get instance information
13575     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13576
13577     # Check if node groups for locked instance are still correct
13578     assert owned_nodes.issuperset(self.instance.all_nodes), \
13579       ("Instance %s's nodes changed while we kept the lock" %
13580        self.op.instance_name)
13581
13582     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13583                                            owned_groups)
13584
13585     if self.req_target_uuids:
13586       # User requested specific target groups
13587       self.target_uuids = frozenset(self.req_target_uuids)
13588     else:
13589       # All groups except those used by the instance are potential targets
13590       self.target_uuids = owned_groups - inst_groups
13591
13592     conflicting_groups = self.target_uuids & inst_groups
13593     if conflicting_groups:
13594       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13595                                  " used by the instance '%s'" %
13596                                  (utils.CommaJoin(conflicting_groups),
13597                                   self.op.instance_name),
13598                                  errors.ECODE_INVAL)
13599
13600     if not self.target_uuids:
13601       raise errors.OpPrereqError("There are no possible target groups",
13602                                  errors.ECODE_INVAL)
13603
13604   def BuildHooksEnv(self):
13605     """Build hooks env.
13606
13607     """
13608     assert self.target_uuids
13609
13610     env = {
13611       "TARGET_GROUPS": " ".join(self.target_uuids),
13612       }
13613
13614     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13615
13616     return env
13617
13618   def BuildHooksNodes(self):
13619     """Build hooks nodes.
13620
13621     """
13622     mn = self.cfg.GetMasterNode()
13623     return ([mn], [mn])
13624
13625   def Exec(self, feedback_fn):
13626     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13627
13628     assert instances == [self.op.instance_name], "Instance not locked"
13629
13630     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13631                      instances=instances, target_groups=list(self.target_uuids))
13632
13633     ial.Run(self.op.iallocator)
13634
13635     if not ial.success:
13636       raise errors.OpPrereqError("Can't compute solution for changing group of"
13637                                  " instance '%s' using iallocator '%s': %s" %
13638                                  (self.op.instance_name, self.op.iallocator,
13639                                   ial.info),
13640                                  errors.ECODE_NORES)
13641
13642     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13643
13644     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13645                  " instance '%s'", len(jobs), self.op.instance_name)
13646
13647     return ResultWithJobs(jobs)
13648
13649
13650 class LUBackupQuery(NoHooksLU):
13651   """Query the exports list
13652
13653   """
13654   REQ_BGL = False
13655
13656   def CheckArguments(self):
13657     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13658                              ["node", "export"], self.op.use_locking)
13659
13660   def ExpandNames(self):
13661     self.expq.ExpandNames(self)
13662
13663   def DeclareLocks(self, level):
13664     self.expq.DeclareLocks(self, level)
13665
13666   def Exec(self, feedback_fn):
13667     result = {}
13668
13669     for (node, expname) in self.expq.OldStyleQuery(self):
13670       if expname is None:
13671         result[node] = False
13672       else:
13673         result.setdefault(node, []).append(expname)
13674
13675     return result
13676
13677
13678 class _ExportQuery(_QueryBase):
13679   FIELDS = query.EXPORT_FIELDS
13680
13681   #: The node name is not a unique key for this query
13682   SORT_FIELD = "node"
13683
13684   def ExpandNames(self, lu):
13685     lu.needed_locks = {}
13686
13687     # The following variables interact with _QueryBase._GetNames
13688     if self.names:
13689       self.wanted = _GetWantedNodes(lu, self.names)
13690     else:
13691       self.wanted = locking.ALL_SET
13692
13693     self.do_locking = self.use_locking
13694
13695     if self.do_locking:
13696       lu.share_locks = _ShareAll()
13697       lu.needed_locks = {
13698         locking.LEVEL_NODE: self.wanted,
13699         }
13700
13701   def DeclareLocks(self, lu, level):
13702     pass
13703
13704   def _GetQueryData(self, lu):
13705     """Computes the list of nodes and their attributes.
13706
13707     """
13708     # Locking is not used
13709     # TODO
13710     assert not (compat.any(lu.glm.is_owned(level)
13711                            for level in locking.LEVELS
13712                            if level != locking.LEVEL_CLUSTER) or
13713                 self.do_locking or self.use_locking)
13714
13715     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13716
13717     result = []
13718
13719     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13720       if nres.fail_msg:
13721         result.append((node, None))
13722       else:
13723         result.extend((node, expname) for expname in nres.payload)
13724
13725     return result
13726
13727
13728 class LUBackupPrepare(NoHooksLU):
13729   """Prepares an instance for an export and returns useful information.
13730
13731   """
13732   REQ_BGL = False
13733
13734   def ExpandNames(self):
13735     self._ExpandAndLockInstance()
13736
13737   def CheckPrereq(self):
13738     """Check prerequisites.
13739
13740     """
13741     instance_name = self.op.instance_name
13742
13743     self.instance = self.cfg.GetInstanceInfo(instance_name)
13744     assert self.instance is not None, \
13745           "Cannot retrieve locked instance %s" % self.op.instance_name
13746     _CheckNodeOnline(self, self.instance.primary_node)
13747
13748     self._cds = _GetClusterDomainSecret()
13749
13750   def Exec(self, feedback_fn):
13751     """Prepares an instance for an export.
13752
13753     """
13754     instance = self.instance
13755
13756     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13757       salt = utils.GenerateSecret(8)
13758
13759       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13760       result = self.rpc.call_x509_cert_create(instance.primary_node,
13761                                               constants.RIE_CERT_VALIDITY)
13762       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13763
13764       (name, cert_pem) = result.payload
13765
13766       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13767                                              cert_pem)
13768
13769       return {
13770         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13771         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13772                           salt),
13773         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13774         }
13775
13776     return None
13777
13778
13779 class LUBackupExport(LogicalUnit):
13780   """Export an instance to an image in the cluster.
13781
13782   """
13783   HPATH = "instance-export"
13784   HTYPE = constants.HTYPE_INSTANCE
13785   REQ_BGL = False
13786
13787   def CheckArguments(self):
13788     """Check the arguments.
13789
13790     """
13791     self.x509_key_name = self.op.x509_key_name
13792     self.dest_x509_ca_pem = self.op.destination_x509_ca
13793
13794     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13795       if not self.x509_key_name:
13796         raise errors.OpPrereqError("Missing X509 key name for encryption",
13797                                    errors.ECODE_INVAL)
13798
13799       if not self.dest_x509_ca_pem:
13800         raise errors.OpPrereqError("Missing destination X509 CA",
13801                                    errors.ECODE_INVAL)
13802
13803   def ExpandNames(self):
13804     self._ExpandAndLockInstance()
13805
13806     # Lock all nodes for local exports
13807     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13808       # FIXME: lock only instance primary and destination node
13809       #
13810       # Sad but true, for now we have do lock all nodes, as we don't know where
13811       # the previous export might be, and in this LU we search for it and
13812       # remove it from its current node. In the future we could fix this by:
13813       #  - making a tasklet to search (share-lock all), then create the
13814       #    new one, then one to remove, after
13815       #  - removing the removal operation altogether
13816       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13817
13818   def DeclareLocks(self, level):
13819     """Last minute lock declaration."""
13820     # All nodes are locked anyway, so nothing to do here.
13821
13822   def BuildHooksEnv(self):
13823     """Build hooks env.
13824
13825     This will run on the master, primary node and target node.
13826
13827     """
13828     env = {
13829       "EXPORT_MODE": self.op.mode,
13830       "EXPORT_NODE": self.op.target_node,
13831       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13832       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13833       # TODO: Generic function for boolean env variables
13834       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13835       }
13836
13837     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13838
13839     return env
13840
13841   def BuildHooksNodes(self):
13842     """Build hooks nodes.
13843
13844     """
13845     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13846
13847     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13848       nl.append(self.op.target_node)
13849
13850     return (nl, nl)
13851
13852   def CheckPrereq(self):
13853     """Check prerequisites.
13854
13855     This checks that the instance and node names are valid.
13856
13857     """
13858     instance_name = self.op.instance_name
13859
13860     self.instance = self.cfg.GetInstanceInfo(instance_name)
13861     assert self.instance is not None, \
13862           "Cannot retrieve locked instance %s" % self.op.instance_name
13863     _CheckNodeOnline(self, self.instance.primary_node)
13864
13865     if (self.op.remove_instance and
13866         self.instance.admin_state == constants.ADMINST_UP and
13867         not self.op.shutdown):
13868       raise errors.OpPrereqError("Can not remove instance without shutting it"
13869                                  " down before")
13870
13871     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13872       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13873       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13874       assert self.dst_node is not None
13875
13876       _CheckNodeOnline(self, self.dst_node.name)
13877       _CheckNodeNotDrained(self, self.dst_node.name)
13878
13879       self._cds = None
13880       self.dest_disk_info = None
13881       self.dest_x509_ca = None
13882
13883     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13884       self.dst_node = None
13885
13886       if len(self.op.target_node) != len(self.instance.disks):
13887         raise errors.OpPrereqError(("Received destination information for %s"
13888                                     " disks, but instance %s has %s disks") %
13889                                    (len(self.op.target_node), instance_name,
13890                                     len(self.instance.disks)),
13891                                    errors.ECODE_INVAL)
13892
13893       cds = _GetClusterDomainSecret()
13894
13895       # Check X509 key name
13896       try:
13897         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13898       except (TypeError, ValueError), err:
13899         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13900
13901       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13902         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13903                                    errors.ECODE_INVAL)
13904
13905       # Load and verify CA
13906       try:
13907         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13908       except OpenSSL.crypto.Error, err:
13909         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13910                                    (err, ), errors.ECODE_INVAL)
13911
13912       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13913       if errcode is not None:
13914         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13915                                    (msg, ), errors.ECODE_INVAL)
13916
13917       self.dest_x509_ca = cert
13918
13919       # Verify target information
13920       disk_info = []
13921       for idx, disk_data in enumerate(self.op.target_node):
13922         try:
13923           (host, port, magic) = \
13924             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13925         except errors.GenericError, err:
13926           raise errors.OpPrereqError("Target info for disk %s: %s" %
13927                                      (idx, err), errors.ECODE_INVAL)
13928
13929         disk_info.append((host, port, magic))
13930
13931       assert len(disk_info) == len(self.op.target_node)
13932       self.dest_disk_info = disk_info
13933
13934     else:
13935       raise errors.ProgrammerError("Unhandled export mode %r" %
13936                                    self.op.mode)
13937
13938     # instance disk type verification
13939     # TODO: Implement export support for file-based disks
13940     for disk in self.instance.disks:
13941       if disk.dev_type == constants.LD_FILE:
13942         raise errors.OpPrereqError("Export not supported for instances with"
13943                                    " file-based disks", errors.ECODE_INVAL)
13944
13945   def _CleanupExports(self, feedback_fn):
13946     """Removes exports of current instance from all other nodes.
13947
13948     If an instance in a cluster with nodes A..D was exported to node C, its
13949     exports will be removed from the nodes A, B and D.
13950
13951     """
13952     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13953
13954     nodelist = self.cfg.GetNodeList()
13955     nodelist.remove(self.dst_node.name)
13956
13957     # on one-node clusters nodelist will be empty after the removal
13958     # if we proceed the backup would be removed because OpBackupQuery
13959     # substitutes an empty list with the full cluster node list.
13960     iname = self.instance.name
13961     if nodelist:
13962       feedback_fn("Removing old exports for instance %s" % iname)
13963       exportlist = self.rpc.call_export_list(nodelist)
13964       for node in exportlist:
13965         if exportlist[node].fail_msg:
13966           continue
13967         if iname in exportlist[node].payload:
13968           msg = self.rpc.call_export_remove(node, iname).fail_msg
13969           if msg:
13970             self.LogWarning("Could not remove older export for instance %s"
13971                             " on node %s: %s", iname, node, msg)
13972
13973   def Exec(self, feedback_fn):
13974     """Export an instance to an image in the cluster.
13975
13976     """
13977     assert self.op.mode in constants.EXPORT_MODES
13978
13979     instance = self.instance
13980     src_node = instance.primary_node
13981
13982     if self.op.shutdown:
13983       # shutdown the instance, but not the disks
13984       feedback_fn("Shutting down instance %s" % instance.name)
13985       result = self.rpc.call_instance_shutdown(src_node, instance,
13986                                                self.op.shutdown_timeout)
13987       # TODO: Maybe ignore failures if ignore_remove_failures is set
13988       result.Raise("Could not shutdown instance %s on"
13989                    " node %s" % (instance.name, src_node))
13990
13991     # set the disks ID correctly since call_instance_start needs the
13992     # correct drbd minor to create the symlinks
13993     for disk in instance.disks:
13994       self.cfg.SetDiskID(disk, src_node)
13995
13996     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13997
13998     if activate_disks:
13999       # Activate the instance disks if we'exporting a stopped instance
14000       feedback_fn("Activating disks for %s" % instance.name)
14001       _StartInstanceDisks(self, instance, None)
14002
14003     try:
14004       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14005                                                      instance)
14006
14007       helper.CreateSnapshots()
14008       try:
14009         if (self.op.shutdown and
14010             instance.admin_state == constants.ADMINST_UP and
14011             not self.op.remove_instance):
14012           assert not activate_disks
14013           feedback_fn("Starting instance %s" % instance.name)
14014           result = self.rpc.call_instance_start(src_node,
14015                                                 (instance, None, None), False)
14016           msg = result.fail_msg
14017           if msg:
14018             feedback_fn("Failed to start instance: %s" % msg)
14019             _ShutdownInstanceDisks(self, instance)
14020             raise errors.OpExecError("Could not start instance: %s" % msg)
14021
14022         if self.op.mode == constants.EXPORT_MODE_LOCAL:
14023           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14024         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14025           connect_timeout = constants.RIE_CONNECT_TIMEOUT
14026           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14027
14028           (key_name, _, _) = self.x509_key_name
14029
14030           dest_ca_pem = \
14031             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14032                                             self.dest_x509_ca)
14033
14034           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14035                                                      key_name, dest_ca_pem,
14036                                                      timeouts)
14037       finally:
14038         helper.Cleanup()
14039
14040       # Check for backwards compatibility
14041       assert len(dresults) == len(instance.disks)
14042       assert compat.all(isinstance(i, bool) for i in dresults), \
14043              "Not all results are boolean: %r" % dresults
14044
14045     finally:
14046       if activate_disks:
14047         feedback_fn("Deactivating disks for %s" % instance.name)
14048         _ShutdownInstanceDisks(self, instance)
14049
14050     if not (compat.all(dresults) and fin_resu):
14051       failures = []
14052       if not fin_resu:
14053         failures.append("export finalization")
14054       if not compat.all(dresults):
14055         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14056                                if not dsk)
14057         failures.append("disk export: disk(s) %s" % fdsk)
14058
14059       raise errors.OpExecError("Export failed, errors in %s" %
14060                                utils.CommaJoin(failures))
14061
14062     # At this point, the export was successful, we can cleanup/finish
14063
14064     # Remove instance if requested
14065     if self.op.remove_instance:
14066       feedback_fn("Removing instance %s" % instance.name)
14067       _RemoveInstance(self, feedback_fn, instance,
14068                       self.op.ignore_remove_failures)
14069
14070     if self.op.mode == constants.EXPORT_MODE_LOCAL:
14071       self._CleanupExports(feedback_fn)
14072
14073     return fin_resu, dresults
14074
14075
14076 class LUBackupRemove(NoHooksLU):
14077   """Remove exports related to the named instance.
14078
14079   """
14080   REQ_BGL = False
14081
14082   def ExpandNames(self):
14083     self.needed_locks = {}
14084     # We need all nodes to be locked in order for RemoveExport to work, but we
14085     # don't need to lock the instance itself, as nothing will happen to it (and
14086     # we can remove exports also for a removed instance)
14087     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14088
14089   def Exec(self, feedback_fn):
14090     """Remove any export.
14091
14092     """
14093     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14094     # If the instance was not found we'll try with the name that was passed in.
14095     # This will only work if it was an FQDN, though.
14096     fqdn_warn = False
14097     if not instance_name:
14098       fqdn_warn = True
14099       instance_name = self.op.instance_name
14100
14101     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14102     exportlist = self.rpc.call_export_list(locked_nodes)
14103     found = False
14104     for node in exportlist:
14105       msg = exportlist[node].fail_msg
14106       if msg:
14107         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14108         continue
14109       if instance_name in exportlist[node].payload:
14110         found = True
14111         result = self.rpc.call_export_remove(node, instance_name)
14112         msg = result.fail_msg
14113         if msg:
14114           logging.error("Could not remove export for instance %s"
14115                         " on node %s: %s", instance_name, node, msg)
14116
14117     if fqdn_warn and not found:
14118       feedback_fn("Export not found. If trying to remove an export belonging"
14119                   " to a deleted instance please use its Fully Qualified"
14120                   " Domain Name.")
14121
14122
14123 class LUGroupAdd(LogicalUnit):
14124   """Logical unit for creating node groups.
14125
14126   """
14127   HPATH = "group-add"
14128   HTYPE = constants.HTYPE_GROUP
14129   REQ_BGL = False
14130
14131   def ExpandNames(self):
14132     # We need the new group's UUID here so that we can create and acquire the
14133     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14134     # that it should not check whether the UUID exists in the configuration.
14135     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14136     self.needed_locks = {}
14137     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14138
14139   def CheckPrereq(self):
14140     """Check prerequisites.
14141
14142     This checks that the given group name is not an existing node group
14143     already.
14144
14145     """
14146     try:
14147       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14148     except errors.OpPrereqError:
14149       pass
14150     else:
14151       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14152                                  " node group (UUID: %s)" %
14153                                  (self.op.group_name, existing_uuid),
14154                                  errors.ECODE_EXISTS)
14155
14156     if self.op.ndparams:
14157       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14158
14159     if self.op.hv_state:
14160       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14161     else:
14162       self.new_hv_state = None
14163
14164     if self.op.disk_state:
14165       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14166     else:
14167       self.new_disk_state = None
14168
14169     if self.op.diskparams:
14170       for templ in constants.DISK_TEMPLATES:
14171         if templ in self.op.diskparams:
14172           utils.ForceDictType(self.op.diskparams[templ],
14173                               constants.DISK_DT_TYPES)
14174       self.new_diskparams = self.op.diskparams
14175       try:
14176         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14177       except errors.OpPrereqError, err:
14178         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14179                                    errors.ECODE_INVAL)
14180     else:
14181       self.new_diskparams = {}
14182
14183     if self.op.ipolicy:
14184       cluster = self.cfg.GetClusterInfo()
14185       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14186       try:
14187         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14188       except errors.ConfigurationError, err:
14189         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14190                                    errors.ECODE_INVAL)
14191
14192   def BuildHooksEnv(self):
14193     """Build hooks env.
14194
14195     """
14196     return {
14197       "GROUP_NAME": self.op.group_name,
14198       }
14199
14200   def BuildHooksNodes(self):
14201     """Build hooks nodes.
14202
14203     """
14204     mn = self.cfg.GetMasterNode()
14205     return ([mn], [mn])
14206
14207   def Exec(self, feedback_fn):
14208     """Add the node group to the cluster.
14209
14210     """
14211     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14212                                   uuid=self.group_uuid,
14213                                   alloc_policy=self.op.alloc_policy,
14214                                   ndparams=self.op.ndparams,
14215                                   diskparams=self.new_diskparams,
14216                                   ipolicy=self.op.ipolicy,
14217                                   hv_state_static=self.new_hv_state,
14218                                   disk_state_static=self.new_disk_state)
14219
14220     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14221     del self.remove_locks[locking.LEVEL_NODEGROUP]
14222
14223
14224 class LUGroupAssignNodes(NoHooksLU):
14225   """Logical unit for assigning nodes to groups.
14226
14227   """
14228   REQ_BGL = False
14229
14230   def ExpandNames(self):
14231     # These raise errors.OpPrereqError on their own:
14232     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14233     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14234
14235     # We want to lock all the affected nodes and groups. We have readily
14236     # available the list of nodes, and the *destination* group. To gather the
14237     # list of "source" groups, we need to fetch node information later on.
14238     self.needed_locks = {
14239       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14240       locking.LEVEL_NODE: self.op.nodes,
14241       }
14242
14243   def DeclareLocks(self, level):
14244     if level == locking.LEVEL_NODEGROUP:
14245       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14246
14247       # Try to get all affected nodes' groups without having the group or node
14248       # lock yet. Needs verification later in the code flow.
14249       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14250
14251       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14252
14253   def CheckPrereq(self):
14254     """Check prerequisites.
14255
14256     """
14257     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14258     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14259             frozenset(self.op.nodes))
14260
14261     expected_locks = (set([self.group_uuid]) |
14262                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14263     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14264     if actual_locks != expected_locks:
14265       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14266                                " current groups are '%s', used to be '%s'" %
14267                                (utils.CommaJoin(expected_locks),
14268                                 utils.CommaJoin(actual_locks)))
14269
14270     self.node_data = self.cfg.GetAllNodesInfo()
14271     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14272     instance_data = self.cfg.GetAllInstancesInfo()
14273
14274     if self.group is None:
14275       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14276                                (self.op.group_name, self.group_uuid))
14277
14278     (new_splits, previous_splits) = \
14279       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14280                                              for node in self.op.nodes],
14281                                             self.node_data, instance_data)
14282
14283     if new_splits:
14284       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14285
14286       if not self.op.force:
14287         raise errors.OpExecError("The following instances get split by this"
14288                                  " change and --force was not given: %s" %
14289                                  fmt_new_splits)
14290       else:
14291         self.LogWarning("This operation will split the following instances: %s",
14292                         fmt_new_splits)
14293
14294         if previous_splits:
14295           self.LogWarning("In addition, these already-split instances continue"
14296                           " to be split across groups: %s",
14297                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14298
14299   def Exec(self, feedback_fn):
14300     """Assign nodes to a new group.
14301
14302     """
14303     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14304
14305     self.cfg.AssignGroupNodes(mods)
14306
14307   @staticmethod
14308   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14309     """Check for split instances after a node assignment.
14310
14311     This method considers a series of node assignments as an atomic operation,
14312     and returns information about split instances after applying the set of
14313     changes.
14314
14315     In particular, it returns information about newly split instances, and
14316     instances that were already split, and remain so after the change.
14317
14318     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14319     considered.
14320
14321     @type changes: list of (node_name, new_group_uuid) pairs.
14322     @param changes: list of node assignments to consider.
14323     @param node_data: a dict with data for all nodes
14324     @param instance_data: a dict with all instances to consider
14325     @rtype: a two-tuple
14326     @return: a list of instances that were previously okay and result split as a
14327       consequence of this change, and a list of instances that were previously
14328       split and this change does not fix.
14329
14330     """
14331     changed_nodes = dict((node, group) for node, group in changes
14332                          if node_data[node].group != group)
14333
14334     all_split_instances = set()
14335     previously_split_instances = set()
14336
14337     def InstanceNodes(instance):
14338       return [instance.primary_node] + list(instance.secondary_nodes)
14339
14340     for inst in instance_data.values():
14341       if inst.disk_template not in constants.DTS_INT_MIRROR:
14342         continue
14343
14344       instance_nodes = InstanceNodes(inst)
14345
14346       if len(set(node_data[node].group for node in instance_nodes)) > 1:
14347         previously_split_instances.add(inst.name)
14348
14349       if len(set(changed_nodes.get(node, node_data[node].group)
14350                  for node in instance_nodes)) > 1:
14351         all_split_instances.add(inst.name)
14352
14353     return (list(all_split_instances - previously_split_instances),
14354             list(previously_split_instances & all_split_instances))
14355
14356
14357 class _GroupQuery(_QueryBase):
14358   FIELDS = query.GROUP_FIELDS
14359
14360   def ExpandNames(self, lu):
14361     lu.needed_locks = {}
14362
14363     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14364     self._cluster = lu.cfg.GetClusterInfo()
14365     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14366
14367     if not self.names:
14368       self.wanted = [name_to_uuid[name]
14369                      for name in utils.NiceSort(name_to_uuid.keys())]
14370     else:
14371       # Accept names to be either names or UUIDs.
14372       missing = []
14373       self.wanted = []
14374       all_uuid = frozenset(self._all_groups.keys())
14375
14376       for name in self.names:
14377         if name in all_uuid:
14378           self.wanted.append(name)
14379         elif name in name_to_uuid:
14380           self.wanted.append(name_to_uuid[name])
14381         else:
14382           missing.append(name)
14383
14384       if missing:
14385         raise errors.OpPrereqError("Some groups do not exist: %s" %
14386                                    utils.CommaJoin(missing),
14387                                    errors.ECODE_NOENT)
14388
14389   def DeclareLocks(self, lu, level):
14390     pass
14391
14392   def _GetQueryData(self, lu):
14393     """Computes the list of node groups and their attributes.
14394
14395     """
14396     do_nodes = query.GQ_NODE in self.requested_data
14397     do_instances = query.GQ_INST in self.requested_data
14398
14399     group_to_nodes = None
14400     group_to_instances = None
14401
14402     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14403     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14404     # latter GetAllInstancesInfo() is not enough, for we have to go through
14405     # instance->node. Hence, we will need to process nodes even if we only need
14406     # instance information.
14407     if do_nodes or do_instances:
14408       all_nodes = lu.cfg.GetAllNodesInfo()
14409       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14410       node_to_group = {}
14411
14412       for node in all_nodes.values():
14413         if node.group in group_to_nodes:
14414           group_to_nodes[node.group].append(node.name)
14415           node_to_group[node.name] = node.group
14416
14417       if do_instances:
14418         all_instances = lu.cfg.GetAllInstancesInfo()
14419         group_to_instances = dict((uuid, []) for uuid in self.wanted)
14420
14421         for instance in all_instances.values():
14422           node = instance.primary_node
14423           if node in node_to_group:
14424             group_to_instances[node_to_group[node]].append(instance.name)
14425
14426         if not do_nodes:
14427           # Do not pass on node information if it was not requested.
14428           group_to_nodes = None
14429
14430     return query.GroupQueryData(self._cluster,
14431                                 [self._all_groups[uuid]
14432                                  for uuid in self.wanted],
14433                                 group_to_nodes, group_to_instances,
14434                                 query.GQ_DISKPARAMS in self.requested_data)
14435
14436
14437 class LUGroupQuery(NoHooksLU):
14438   """Logical unit for querying node groups.
14439
14440   """
14441   REQ_BGL = False
14442
14443   def CheckArguments(self):
14444     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14445                           self.op.output_fields, False)
14446
14447   def ExpandNames(self):
14448     self.gq.ExpandNames(self)
14449
14450   def DeclareLocks(self, level):
14451     self.gq.DeclareLocks(self, level)
14452
14453   def Exec(self, feedback_fn):
14454     return self.gq.OldStyleQuery(self)
14455
14456
14457 class LUGroupSetParams(LogicalUnit):
14458   """Modifies the parameters of a node group.
14459
14460   """
14461   HPATH = "group-modify"
14462   HTYPE = constants.HTYPE_GROUP
14463   REQ_BGL = False
14464
14465   def CheckArguments(self):
14466     all_changes = [
14467       self.op.ndparams,
14468       self.op.diskparams,
14469       self.op.alloc_policy,
14470       self.op.hv_state,
14471       self.op.disk_state,
14472       self.op.ipolicy,
14473       ]
14474
14475     if all_changes.count(None) == len(all_changes):
14476       raise errors.OpPrereqError("Please pass at least one modification",
14477                                  errors.ECODE_INVAL)
14478
14479   def ExpandNames(self):
14480     # This raises errors.OpPrereqError on its own:
14481     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14482
14483     self.needed_locks = {
14484       locking.LEVEL_INSTANCE: [],
14485       locking.LEVEL_NODEGROUP: [self.group_uuid],
14486       }
14487
14488     self.share_locks[locking.LEVEL_INSTANCE] = 1
14489
14490   def DeclareLocks(self, level):
14491     if level == locking.LEVEL_INSTANCE:
14492       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14493
14494       # Lock instances optimistically, needs verification once group lock has
14495       # been acquired
14496       self.needed_locks[locking.LEVEL_INSTANCE] = \
14497           self.cfg.GetNodeGroupInstances(self.group_uuid)
14498
14499   @staticmethod
14500   def _UpdateAndVerifyDiskParams(old, new):
14501     """Updates and verifies disk parameters.
14502
14503     """
14504     new_params = _GetUpdatedParams(old, new)
14505     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14506     return new_params
14507
14508   def CheckPrereq(self):
14509     """Check prerequisites.
14510
14511     """
14512     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14513
14514     # Check if locked instances are still correct
14515     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14516
14517     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14518     cluster = self.cfg.GetClusterInfo()
14519
14520     if self.group is None:
14521       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14522                                (self.op.group_name, self.group_uuid))
14523
14524     if self.op.ndparams:
14525       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14526       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14527       self.new_ndparams = new_ndparams
14528
14529     if self.op.diskparams:
14530       diskparams = self.group.diskparams
14531       uavdp = self._UpdateAndVerifyDiskParams
14532       # For each disktemplate subdict update and verify the values
14533       new_diskparams = dict((dt,
14534                              uavdp(diskparams.get(dt, {}),
14535                                    self.op.diskparams[dt]))
14536                             for dt in constants.DISK_TEMPLATES
14537                             if dt in self.op.diskparams)
14538       # As we've all subdicts of diskparams ready, lets merge the actual
14539       # dict with all updated subdicts
14540       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14541       try:
14542         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14543       except errors.OpPrereqError, err:
14544         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14545                                    errors.ECODE_INVAL)
14546
14547     if self.op.hv_state:
14548       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14549                                                  self.group.hv_state_static)
14550
14551     if self.op.disk_state:
14552       self.new_disk_state = \
14553         _MergeAndVerifyDiskState(self.op.disk_state,
14554                                  self.group.disk_state_static)
14555
14556     if self.op.ipolicy:
14557       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14558                                             self.op.ipolicy,
14559                                             group_policy=True)
14560
14561       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14562       inst_filter = lambda inst: inst.name in owned_instances
14563       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14564       violations = \
14565           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14566                                                                self.group),
14567                                         new_ipolicy, instances)
14568
14569       if violations:
14570         self.LogWarning("After the ipolicy change the following instances"
14571                         " violate them: %s",
14572                         utils.CommaJoin(violations))
14573
14574   def BuildHooksEnv(self):
14575     """Build hooks env.
14576
14577     """
14578     return {
14579       "GROUP_NAME": self.op.group_name,
14580       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14581       }
14582
14583   def BuildHooksNodes(self):
14584     """Build hooks nodes.
14585
14586     """
14587     mn = self.cfg.GetMasterNode()
14588     return ([mn], [mn])
14589
14590   def Exec(self, feedback_fn):
14591     """Modifies the node group.
14592
14593     """
14594     result = []
14595
14596     if self.op.ndparams:
14597       self.group.ndparams = self.new_ndparams
14598       result.append(("ndparams", str(self.group.ndparams)))
14599
14600     if self.op.diskparams:
14601       self.group.diskparams = self.new_diskparams
14602       result.append(("diskparams", str(self.group.diskparams)))
14603
14604     if self.op.alloc_policy:
14605       self.group.alloc_policy = self.op.alloc_policy
14606
14607     if self.op.hv_state:
14608       self.group.hv_state_static = self.new_hv_state
14609
14610     if self.op.disk_state:
14611       self.group.disk_state_static = self.new_disk_state
14612
14613     if self.op.ipolicy:
14614       self.group.ipolicy = self.new_ipolicy
14615
14616     self.cfg.Update(self.group, feedback_fn)
14617     return result
14618
14619
14620 class LUGroupRemove(LogicalUnit):
14621   HPATH = "group-remove"
14622   HTYPE = constants.HTYPE_GROUP
14623   REQ_BGL = False
14624
14625   def ExpandNames(self):
14626     # This will raises errors.OpPrereqError on its own:
14627     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14628     self.needed_locks = {
14629       locking.LEVEL_NODEGROUP: [self.group_uuid],
14630       }
14631
14632   def CheckPrereq(self):
14633     """Check prerequisites.
14634
14635     This checks that the given group name exists as a node group, that is
14636     empty (i.e., contains no nodes), and that is not the last group of the
14637     cluster.
14638
14639     """
14640     # Verify that the group is empty.
14641     group_nodes = [node.name
14642                    for node in self.cfg.GetAllNodesInfo().values()
14643                    if node.group == self.group_uuid]
14644
14645     if group_nodes:
14646       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14647                                  " nodes: %s" %
14648                                  (self.op.group_name,
14649                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14650                                  errors.ECODE_STATE)
14651
14652     # Verify the cluster would not be left group-less.
14653     if len(self.cfg.GetNodeGroupList()) == 1:
14654       raise errors.OpPrereqError("Group '%s' is the only group,"
14655                                  " cannot be removed" %
14656                                  self.op.group_name,
14657                                  errors.ECODE_STATE)
14658
14659   def BuildHooksEnv(self):
14660     """Build hooks env.
14661
14662     """
14663     return {
14664       "GROUP_NAME": self.op.group_name,
14665       }
14666
14667   def BuildHooksNodes(self):
14668     """Build hooks nodes.
14669
14670     """
14671     mn = self.cfg.GetMasterNode()
14672     return ([mn], [mn])
14673
14674   def Exec(self, feedback_fn):
14675     """Remove the node group.
14676
14677     """
14678     try:
14679       self.cfg.RemoveNodeGroup(self.group_uuid)
14680     except errors.ConfigurationError:
14681       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14682                                (self.op.group_name, self.group_uuid))
14683
14684     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14685
14686
14687 class LUGroupRename(LogicalUnit):
14688   HPATH = "group-rename"
14689   HTYPE = constants.HTYPE_GROUP
14690   REQ_BGL = False
14691
14692   def ExpandNames(self):
14693     # This raises errors.OpPrereqError on its own:
14694     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14695
14696     self.needed_locks = {
14697       locking.LEVEL_NODEGROUP: [self.group_uuid],
14698       }
14699
14700   def CheckPrereq(self):
14701     """Check prerequisites.
14702
14703     Ensures requested new name is not yet used.
14704
14705     """
14706     try:
14707       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14708     except errors.OpPrereqError:
14709       pass
14710     else:
14711       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14712                                  " node group (UUID: %s)" %
14713                                  (self.op.new_name, new_name_uuid),
14714                                  errors.ECODE_EXISTS)
14715
14716   def BuildHooksEnv(self):
14717     """Build hooks env.
14718
14719     """
14720     return {
14721       "OLD_NAME": self.op.group_name,
14722       "NEW_NAME": self.op.new_name,
14723       }
14724
14725   def BuildHooksNodes(self):
14726     """Build hooks nodes.
14727
14728     """
14729     mn = self.cfg.GetMasterNode()
14730
14731     all_nodes = self.cfg.GetAllNodesInfo()
14732     all_nodes.pop(mn, None)
14733
14734     run_nodes = [mn]
14735     run_nodes.extend(node.name for node in all_nodes.values()
14736                      if node.group == self.group_uuid)
14737
14738     return (run_nodes, run_nodes)
14739
14740   def Exec(self, feedback_fn):
14741     """Rename the node group.
14742
14743     """
14744     group = self.cfg.GetNodeGroup(self.group_uuid)
14745
14746     if group is None:
14747       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14748                                (self.op.group_name, self.group_uuid))
14749
14750     group.name = self.op.new_name
14751     self.cfg.Update(group, feedback_fn)
14752
14753     return self.op.new_name
14754
14755
14756 class LUGroupEvacuate(LogicalUnit):
14757   HPATH = "group-evacuate"
14758   HTYPE = constants.HTYPE_GROUP
14759   REQ_BGL = False
14760
14761   def ExpandNames(self):
14762     # This raises errors.OpPrereqError on its own:
14763     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14764
14765     if self.op.target_groups:
14766       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14767                                   self.op.target_groups)
14768     else:
14769       self.req_target_uuids = []
14770
14771     if self.group_uuid in self.req_target_uuids:
14772       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14773                                  " as a target group (targets are %s)" %
14774                                  (self.group_uuid,
14775                                   utils.CommaJoin(self.req_target_uuids)),
14776                                  errors.ECODE_INVAL)
14777
14778     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14779
14780     self.share_locks = _ShareAll()
14781     self.needed_locks = {
14782       locking.LEVEL_INSTANCE: [],
14783       locking.LEVEL_NODEGROUP: [],
14784       locking.LEVEL_NODE: [],
14785       }
14786
14787   def DeclareLocks(self, level):
14788     if level == locking.LEVEL_INSTANCE:
14789       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14790
14791       # Lock instances optimistically, needs verification once node and group
14792       # locks have been acquired
14793       self.needed_locks[locking.LEVEL_INSTANCE] = \
14794         self.cfg.GetNodeGroupInstances(self.group_uuid)
14795
14796     elif level == locking.LEVEL_NODEGROUP:
14797       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14798
14799       if self.req_target_uuids:
14800         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14801
14802         # Lock all groups used by instances optimistically; this requires going
14803         # via the node before it's locked, requiring verification later on
14804         lock_groups.update(group_uuid
14805                            for instance_name in
14806                              self.owned_locks(locking.LEVEL_INSTANCE)
14807                            for group_uuid in
14808                              self.cfg.GetInstanceNodeGroups(instance_name))
14809       else:
14810         # No target groups, need to lock all of them
14811         lock_groups = locking.ALL_SET
14812
14813       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14814
14815     elif level == locking.LEVEL_NODE:
14816       # This will only lock the nodes in the group to be evacuated which
14817       # contain actual instances
14818       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14819       self._LockInstancesNodes()
14820
14821       # Lock all nodes in group to be evacuated and target groups
14822       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14823       assert self.group_uuid in owned_groups
14824       member_nodes = [node_name
14825                       for group in owned_groups
14826                       for node_name in self.cfg.GetNodeGroup(group).members]
14827       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14828
14829   def CheckPrereq(self):
14830     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14831     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14832     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14833
14834     assert owned_groups.issuperset(self.req_target_uuids)
14835     assert self.group_uuid in owned_groups
14836
14837     # Check if locked instances are still correct
14838     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14839
14840     # Get instance information
14841     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14842
14843     # Check if node groups for locked instances are still correct
14844     _CheckInstancesNodeGroups(self.cfg, self.instances,
14845                               owned_groups, owned_nodes, self.group_uuid)
14846
14847     if self.req_target_uuids:
14848       # User requested specific target groups
14849       self.target_uuids = self.req_target_uuids
14850     else:
14851       # All groups except the one to be evacuated are potential targets
14852       self.target_uuids = [group_uuid for group_uuid in owned_groups
14853                            if group_uuid != self.group_uuid]
14854
14855       if not self.target_uuids:
14856         raise errors.OpPrereqError("There are no possible target groups",
14857                                    errors.ECODE_INVAL)
14858
14859   def BuildHooksEnv(self):
14860     """Build hooks env.
14861
14862     """
14863     return {
14864       "GROUP_NAME": self.op.group_name,
14865       "TARGET_GROUPS": " ".join(self.target_uuids),
14866       }
14867
14868   def BuildHooksNodes(self):
14869     """Build hooks nodes.
14870
14871     """
14872     mn = self.cfg.GetMasterNode()
14873
14874     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14875
14876     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14877
14878     return (run_nodes, run_nodes)
14879
14880   def Exec(self, feedback_fn):
14881     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14882
14883     assert self.group_uuid not in self.target_uuids
14884
14885     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14886                      instances=instances, target_groups=self.target_uuids)
14887
14888     ial.Run(self.op.iallocator)
14889
14890     if not ial.success:
14891       raise errors.OpPrereqError("Can't compute group evacuation using"
14892                                  " iallocator '%s': %s" %
14893                                  (self.op.iallocator, ial.info),
14894                                  errors.ECODE_NORES)
14895
14896     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14897
14898     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14899                  len(jobs), self.op.group_name)
14900
14901     return ResultWithJobs(jobs)
14902
14903
14904 class TagsLU(NoHooksLU): # pylint: disable=W0223
14905   """Generic tags LU.
14906
14907   This is an abstract class which is the parent of all the other tags LUs.
14908
14909   """
14910   def ExpandNames(self):
14911     self.group_uuid = None
14912     self.needed_locks = {}
14913
14914     if self.op.kind == constants.TAG_NODE:
14915       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14916       lock_level = locking.LEVEL_NODE
14917       lock_name = self.op.name
14918     elif self.op.kind == constants.TAG_INSTANCE:
14919       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14920       lock_level = locking.LEVEL_INSTANCE
14921       lock_name = self.op.name
14922     elif self.op.kind == constants.TAG_NODEGROUP:
14923       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14924       lock_level = locking.LEVEL_NODEGROUP
14925       lock_name = self.group_uuid
14926     elif self.op.kind == constants.TAG_NETWORK:
14927       self.network_uuid = self.cfg.LookupNetwork(self.op.name)
14928       lock_level = locking.LEVEL_NETWORK
14929       lock_name = self.network_uuid
14930     else:
14931       lock_level = None
14932       lock_name = None
14933
14934     if lock_level and getattr(self.op, "use_locking", True):
14935       self.needed_locks[lock_level] = lock_name
14936
14937     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14938     # not possible to acquire the BGL based on opcode parameters)
14939
14940   def CheckPrereq(self):
14941     """Check prerequisites.
14942
14943     """
14944     if self.op.kind == constants.TAG_CLUSTER:
14945       self.target = self.cfg.GetClusterInfo()
14946     elif self.op.kind == constants.TAG_NODE:
14947       self.target = self.cfg.GetNodeInfo(self.op.name)
14948     elif self.op.kind == constants.TAG_INSTANCE:
14949       self.target = self.cfg.GetInstanceInfo(self.op.name)
14950     elif self.op.kind == constants.TAG_NODEGROUP:
14951       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14952     elif self.op.kind == constants.TAG_NETWORK:
14953       self.target = self.cfg.GetNetwork(self.network_uuid)
14954     else:
14955       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14956                                  str(self.op.kind), errors.ECODE_INVAL)
14957
14958
14959 class LUTagsGet(TagsLU):
14960   """Returns the tags of a given object.
14961
14962   """
14963   REQ_BGL = False
14964
14965   def ExpandNames(self):
14966     TagsLU.ExpandNames(self)
14967
14968     # Share locks as this is only a read operation
14969     self.share_locks = _ShareAll()
14970
14971   def Exec(self, feedback_fn):
14972     """Returns the tag list.
14973
14974     """
14975     return list(self.target.GetTags())
14976
14977
14978 class LUTagsSearch(NoHooksLU):
14979   """Searches the tags for a given pattern.
14980
14981   """
14982   REQ_BGL = False
14983
14984   def ExpandNames(self):
14985     self.needed_locks = {}
14986
14987   def CheckPrereq(self):
14988     """Check prerequisites.
14989
14990     This checks the pattern passed for validity by compiling it.
14991
14992     """
14993     try:
14994       self.re = re.compile(self.op.pattern)
14995     except re.error, err:
14996       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14997                                  (self.op.pattern, err), errors.ECODE_INVAL)
14998
14999   def Exec(self, feedback_fn):
15000     """Returns the tag list.
15001
15002     """
15003     cfg = self.cfg
15004     tgts = [("/cluster", cfg.GetClusterInfo())]
15005     ilist = cfg.GetAllInstancesInfo().values()
15006     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15007     nlist = cfg.GetAllNodesInfo().values()
15008     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15009     tgts.extend(("/nodegroup/%s" % n.name, n)
15010                 for n in cfg.GetAllNodeGroupsInfo().values())
15011     results = []
15012     for path, target in tgts:
15013       for tag in target.GetTags():
15014         if self.re.search(tag):
15015           results.append((path, tag))
15016     return results
15017
15018
15019 class LUTagsSet(TagsLU):
15020   """Sets a tag on a given object.
15021
15022   """
15023   REQ_BGL = False
15024
15025   def CheckPrereq(self):
15026     """Check prerequisites.
15027
15028     This checks the type and length of the tag name and value.
15029
15030     """
15031     TagsLU.CheckPrereq(self)
15032     for tag in self.op.tags:
15033       objects.TaggableObject.ValidateTag(tag)
15034
15035   def Exec(self, feedback_fn):
15036     """Sets the tag.
15037
15038     """
15039     try:
15040       for tag in self.op.tags:
15041         self.target.AddTag(tag)
15042     except errors.TagError, err:
15043       raise errors.OpExecError("Error while setting tag: %s" % str(err))
15044     self.cfg.Update(self.target, feedback_fn)
15045
15046
15047 class LUTagsDel(TagsLU):
15048   """Delete a list of tags from a given object.
15049
15050   """
15051   REQ_BGL = False
15052
15053   def CheckPrereq(self):
15054     """Check prerequisites.
15055
15056     This checks that we have the given tag.
15057
15058     """
15059     TagsLU.CheckPrereq(self)
15060     for tag in self.op.tags:
15061       objects.TaggableObject.ValidateTag(tag)
15062     del_tags = frozenset(self.op.tags)
15063     cur_tags = self.target.GetTags()
15064
15065     diff_tags = del_tags - cur_tags
15066     if diff_tags:
15067       diff_names = ("'%s'" % i for i in sorted(diff_tags))
15068       raise errors.OpPrereqError("Tag(s) %s not found" %
15069                                  (utils.CommaJoin(diff_names), ),
15070                                  errors.ECODE_NOENT)
15071
15072   def Exec(self, feedback_fn):
15073     """Remove the tag from the object.
15074
15075     """
15076     for tag in self.op.tags:
15077       self.target.RemoveTag(tag)
15078     self.cfg.Update(self.target, feedback_fn)
15079
15080
15081 class LUTestDelay(NoHooksLU):
15082   """Sleep for a specified amount of time.
15083
15084   This LU sleeps on the master and/or nodes for a specified amount of
15085   time.
15086
15087   """
15088   REQ_BGL = False
15089
15090   def ExpandNames(self):
15091     """Expand names and set required locks.
15092
15093     This expands the node list, if any.
15094
15095     """
15096     self.needed_locks = {}
15097     if self.op.on_nodes:
15098       # _GetWantedNodes can be used here, but is not always appropriate to use
15099       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15100       # more information.
15101       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15102       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15103
15104   def _TestDelay(self):
15105     """Do the actual sleep.
15106
15107     """
15108     if self.op.on_master:
15109       if not utils.TestDelay(self.op.duration):
15110         raise errors.OpExecError("Error during master delay test")
15111     if self.op.on_nodes:
15112       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15113       for node, node_result in result.items():
15114         node_result.Raise("Failure during rpc call to node %s" % node)
15115
15116   def Exec(self, feedback_fn):
15117     """Execute the test delay opcode, with the wanted repetitions.
15118
15119     """
15120     if self.op.repeat == 0:
15121       self._TestDelay()
15122     else:
15123       top_value = self.op.repeat - 1
15124       for i in range(self.op.repeat):
15125         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15126         self._TestDelay()
15127
15128
15129 class LUTestJqueue(NoHooksLU):
15130   """Utility LU to test some aspects of the job queue.
15131
15132   """
15133   REQ_BGL = False
15134
15135   # Must be lower than default timeout for WaitForJobChange to see whether it
15136   # notices changed jobs
15137   _CLIENT_CONNECT_TIMEOUT = 20.0
15138   _CLIENT_CONFIRM_TIMEOUT = 60.0
15139
15140   @classmethod
15141   def _NotifyUsingSocket(cls, cb, errcls):
15142     """Opens a Unix socket and waits for another program to connect.
15143
15144     @type cb: callable
15145     @param cb: Callback to send socket name to client
15146     @type errcls: class
15147     @param errcls: Exception class to use for errors
15148
15149     """
15150     # Using a temporary directory as there's no easy way to create temporary
15151     # sockets without writing a custom loop around tempfile.mktemp and
15152     # socket.bind
15153     tmpdir = tempfile.mkdtemp()
15154     try:
15155       tmpsock = utils.PathJoin(tmpdir, "sock")
15156
15157       logging.debug("Creating temporary socket at %s", tmpsock)
15158       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15159       try:
15160         sock.bind(tmpsock)
15161         sock.listen(1)
15162
15163         # Send details to client
15164         cb(tmpsock)
15165
15166         # Wait for client to connect before continuing
15167         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15168         try:
15169           (conn, _) = sock.accept()
15170         except socket.error, err:
15171           raise errcls("Client didn't connect in time (%s)" % err)
15172       finally:
15173         sock.close()
15174     finally:
15175       # Remove as soon as client is connected
15176       shutil.rmtree(tmpdir)
15177
15178     # Wait for client to close
15179     try:
15180       try:
15181         # pylint: disable=E1101
15182         # Instance of '_socketobject' has no ... member
15183         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15184         conn.recv(1)
15185       except socket.error, err:
15186         raise errcls("Client failed to confirm notification (%s)" % err)
15187     finally:
15188       conn.close()
15189
15190   def _SendNotification(self, test, arg, sockname):
15191     """Sends a notification to the client.
15192
15193     @type test: string
15194     @param test: Test name
15195     @param arg: Test argument (depends on test)
15196     @type sockname: string
15197     @param sockname: Socket path
15198
15199     """
15200     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15201
15202   def _Notify(self, prereq, test, arg):
15203     """Notifies the client of a test.
15204
15205     @type prereq: bool
15206     @param prereq: Whether this is a prereq-phase test
15207     @type test: string
15208     @param test: Test name
15209     @param arg: Test argument (depends on test)
15210
15211     """
15212     if prereq:
15213       errcls = errors.OpPrereqError
15214     else:
15215       errcls = errors.OpExecError
15216
15217     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15218                                                   test, arg),
15219                                    errcls)
15220
15221   def CheckArguments(self):
15222     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15223     self.expandnames_calls = 0
15224
15225   def ExpandNames(self):
15226     checkargs_calls = getattr(self, "checkargs_calls", 0)
15227     if checkargs_calls < 1:
15228       raise errors.ProgrammerError("CheckArguments was not called")
15229
15230     self.expandnames_calls += 1
15231
15232     if self.op.notify_waitlock:
15233       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15234
15235     self.LogInfo("Expanding names")
15236
15237     # Get lock on master node (just to get a lock, not for a particular reason)
15238     self.needed_locks = {
15239       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15240       }
15241
15242   def Exec(self, feedback_fn):
15243     if self.expandnames_calls < 1:
15244       raise errors.ProgrammerError("ExpandNames was not called")
15245
15246     if self.op.notify_exec:
15247       self._Notify(False, constants.JQT_EXEC, None)
15248
15249     self.LogInfo("Executing")
15250
15251     if self.op.log_messages:
15252       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15253       for idx, msg in enumerate(self.op.log_messages):
15254         self.LogInfo("Sending log message %s", idx + 1)
15255         feedback_fn(constants.JQT_MSGPREFIX + msg)
15256         # Report how many test messages have been sent
15257         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15258
15259     if self.op.fail:
15260       raise errors.OpExecError("Opcode failure was requested")
15261
15262     return True
15263
15264
15265 class IAllocator(object):
15266   """IAllocator framework.
15267
15268   An IAllocator instance has three sets of attributes:
15269     - cfg that is needed to query the cluster
15270     - input data (all members of the _KEYS class attribute are required)
15271     - four buffer attributes (in|out_data|text), that represent the
15272       input (to the external script) in text and data structure format,
15273       and the output from it, again in two formats
15274     - the result variables from the script (success, info, nodes) for
15275       easy usage
15276
15277   """
15278   # pylint: disable=R0902
15279   # lots of instance attributes
15280
15281   def __init__(self, cfg, rpc_runner, mode, **kwargs):
15282     self.cfg = cfg
15283     self.rpc = rpc_runner
15284     # init buffer variables
15285     self.in_text = self.out_text = self.in_data = self.out_data = None
15286     # init all input fields so that pylint is happy
15287     self.mode = mode
15288     self.memory = self.disks = self.disk_template = self.spindle_use = None
15289     self.os = self.tags = self.nics = self.vcpus = None
15290     self.hypervisor = None
15291     self.relocate_from = None
15292     self.name = None
15293     self.instances = None
15294     self.evac_mode = None
15295     self.target_groups = []
15296     # computed fields
15297     self.required_nodes = None
15298     # init result fields
15299     self.success = self.info = self.result = None
15300
15301     try:
15302       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
15303     except KeyError:
15304       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
15305                                    " IAllocator" % self.mode)
15306
15307     keyset = [n for (n, _) in keydata]
15308
15309     for key in kwargs:
15310       if key not in keyset:
15311         raise errors.ProgrammerError("Invalid input parameter '%s' to"
15312                                      " IAllocator" % key)
15313       setattr(self, key, kwargs[key])
15314
15315     for key in keyset:
15316       if key not in kwargs:
15317         raise errors.ProgrammerError("Missing input parameter '%s' to"
15318                                      " IAllocator" % key)
15319     self._BuildInputData(compat.partial(fn, self), keydata)
15320
15321   def _ComputeClusterData(self):
15322     """Compute the generic allocator input data.
15323
15324     This is the data that is independent of the actual operation.
15325
15326     """
15327     cfg = self.cfg
15328     cluster_info = cfg.GetClusterInfo()
15329     # cluster data
15330     data = {
15331       "version": constants.IALLOCATOR_VERSION,
15332       "cluster_name": cfg.GetClusterName(),
15333       "cluster_tags": list(cluster_info.GetTags()),
15334       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
15335       "ipolicy": cluster_info.ipolicy,
15336       }
15337     ninfo = cfg.GetAllNodesInfo()
15338     iinfo = cfg.GetAllInstancesInfo().values()
15339     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
15340
15341     # node data
15342     node_list = [n.name for n in ninfo.values() if n.vm_capable]
15343
15344     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
15345       hypervisor_name = self.hypervisor
15346     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
15347       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
15348     else:
15349       hypervisor_name = cluster_info.primary_hypervisor
15350
15351     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
15352                                         [hypervisor_name])
15353     node_iinfo = \
15354       self.rpc.call_all_instances_info(node_list,
15355                                        cluster_info.enabled_hypervisors)
15356
15357     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
15358
15359     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
15360     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
15361                                                  i_list, config_ndata)
15362     assert len(data["nodes"]) == len(ninfo), \
15363         "Incomplete node data computed"
15364
15365     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
15366
15367     self.in_data = data
15368
15369   @staticmethod
15370   def _ComputeNodeGroupData(cfg):
15371     """Compute node groups data.
15372
15373     """
15374     cluster = cfg.GetClusterInfo()
15375     ng = dict((guuid, {
15376       "name": gdata.name,
15377       "alloc_policy": gdata.alloc_policy,
15378       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
15379       })
15380       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
15381
15382     return ng
15383
15384   @staticmethod
15385   def _ComputeBasicNodeData(cfg, node_cfg):
15386     """Compute global node data.
15387
15388     @rtype: dict
15389     @returns: a dict of name: (node dict, node config)
15390
15391     """
15392     # fill in static (config-based) values
15393     node_results = dict((ninfo.name, {
15394       "tags": list(ninfo.GetTags()),
15395       "primary_ip": ninfo.primary_ip,
15396       "secondary_ip": ninfo.secondary_ip,
15397       "offline": ninfo.offline,
15398       "drained": ninfo.drained,
15399       "master_candidate": ninfo.master_candidate,
15400       "group": ninfo.group,
15401       "master_capable": ninfo.master_capable,
15402       "vm_capable": ninfo.vm_capable,
15403       "ndparams": cfg.GetNdParams(ninfo),
15404       })
15405       for ninfo in node_cfg.values())
15406
15407     return node_results
15408
15409   @staticmethod
15410   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
15411                               node_results):
15412     """Compute global node data.
15413
15414     @param node_results: the basic node structures as filled from the config
15415
15416     """
15417     #TODO(dynmem): compute the right data on MAX and MIN memory
15418     # make a copy of the current dict
15419     node_results = dict(node_results)
15420     for nname, nresult in node_data.items():
15421       assert nname in node_results, "Missing basic data for node %s" % nname
15422       ninfo = node_cfg[nname]
15423
15424       if not (ninfo.offline or ninfo.drained):
15425         nresult.Raise("Can't get data for node %s" % nname)
15426         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
15427                                 nname)
15428         remote_info = _MakeLegacyNodeInfo(nresult.payload)
15429
15430         for attr in ["memory_total", "memory_free", "memory_dom0",
15431                      "vg_size", "vg_free", "cpu_total"]:
15432           if attr not in remote_info:
15433             raise errors.OpExecError("Node '%s' didn't return attribute"
15434                                      " '%s'" % (nname, attr))
15435           if not isinstance(remote_info[attr], int):
15436             raise errors.OpExecError("Node '%s' returned invalid value"
15437                                      " for '%s': %s" %
15438                                      (nname, attr, remote_info[attr]))
15439         # compute memory used by primary instances
15440         i_p_mem = i_p_up_mem = 0
15441         for iinfo, beinfo in i_list:
15442           if iinfo.primary_node == nname:
15443             i_p_mem += beinfo[constants.BE_MAXMEM]
15444             if iinfo.name not in node_iinfo[nname].payload:
15445               i_used_mem = 0
15446             else:
15447               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
15448             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
15449             remote_info["memory_free"] -= max(0, i_mem_diff)
15450
15451             if iinfo.admin_state == constants.ADMINST_UP:
15452               i_p_up_mem += beinfo[constants.BE_MAXMEM]
15453
15454         # compute memory used by instances
15455         pnr_dyn = {
15456           "total_memory": remote_info["memory_total"],
15457           "reserved_memory": remote_info["memory_dom0"],
15458           "free_memory": remote_info["memory_free"],
15459           "total_disk": remote_info["vg_size"],
15460           "free_disk": remote_info["vg_free"],
15461           "total_cpus": remote_info["cpu_total"],
15462           "i_pri_memory": i_p_mem,
15463           "i_pri_up_memory": i_p_up_mem,
15464           }
15465         pnr_dyn.update(node_results[nname])
15466         node_results[nname] = pnr_dyn
15467
15468     return node_results
15469
15470   @staticmethod
15471   def _ComputeInstanceData(cluster_info, i_list):
15472     """Compute global instance data.
15473
15474     """
15475     instance_data = {}
15476     for iinfo, beinfo in i_list:
15477       nic_data = []
15478       for nic in iinfo.nics:
15479         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
15480         nic_dict = {
15481           "mac": nic.mac,
15482           "ip": nic.ip,
15483           "mode": filled_params[constants.NIC_MODE],
15484           "link": filled_params[constants.NIC_LINK],
15485           "network": nic.network,
15486           }
15487         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
15488           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
15489         nic_data.append(nic_dict)
15490       pir = {
15491         "tags": list(iinfo.GetTags()),
15492         "admin_state": iinfo.admin_state,
15493         "vcpus": beinfo[constants.BE_VCPUS],
15494         "memory": beinfo[constants.BE_MAXMEM],
15495         "spindle_use": beinfo[constants.BE_SPINDLE_USE],
15496         "os": iinfo.os,
15497         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
15498         "nics": nic_data,
15499         "disks": [{constants.IDISK_SIZE: dsk.size,
15500                    constants.IDISK_MODE: dsk.mode}
15501                   for dsk in iinfo.disks],
15502         "disk_template": iinfo.disk_template,
15503         "hypervisor": iinfo.hypervisor,
15504         }
15505       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
15506                                                  pir["disks"])
15507       instance_data[iinfo.name] = pir
15508
15509     return instance_data
15510
15511   def _AddNewInstance(self):
15512     """Add new instance data to allocator structure.
15513
15514     This in combination with _AllocatorGetClusterData will create the
15515     correct structure needed as input for the allocator.
15516
15517     The checks for the completeness of the opcode must have already been
15518     done.
15519
15520     """
15521     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
15522
15523     if self.disk_template in constants.DTS_INT_MIRROR:
15524       self.required_nodes = 2
15525     else:
15526       self.required_nodes = 1
15527
15528     request = {
15529       "name": self.name,
15530       "disk_template": self.disk_template,
15531       "tags": self.tags,
15532       "os": self.os,
15533       "vcpus": self.vcpus,
15534       "memory": self.memory,
15535       "spindle_use": self.spindle_use,
15536       "disks": self.disks,
15537       "disk_space_total": disk_space,
15538       "nics": self.nics,
15539       "required_nodes": self.required_nodes,
15540       "hypervisor": self.hypervisor,
15541       }
15542
15543     return request
15544
15545   def _AddRelocateInstance(self):
15546     """Add relocate instance data to allocator structure.
15547
15548     This in combination with _IAllocatorGetClusterData will create the
15549     correct structure needed as input for the allocator.
15550
15551     The checks for the completeness of the opcode must have already been
15552     done.
15553
15554     """
15555     instance = self.cfg.GetInstanceInfo(self.name)
15556     if instance is None:
15557       raise errors.ProgrammerError("Unknown instance '%s' passed to"
15558                                    " IAllocator" % self.name)
15559
15560     if instance.disk_template not in constants.DTS_MIRRORED:
15561       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15562                                  errors.ECODE_INVAL)
15563
15564     if instance.disk_template in constants.DTS_INT_MIRROR and \
15565         len(instance.secondary_nodes) != 1:
15566       raise errors.OpPrereqError("Instance has not exactly one secondary node",
15567                                  errors.ECODE_STATE)
15568
15569     self.required_nodes = 1
15570     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15571     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15572
15573     request = {
15574       "name": self.name,
15575       "disk_space_total": disk_space,
15576       "required_nodes": self.required_nodes,
15577       "relocate_from": self.relocate_from,
15578       }
15579     return request
15580
15581   def _AddNodeEvacuate(self):
15582     """Get data for node-evacuate requests.
15583
15584     """
15585     return {
15586       "instances": self.instances,
15587       "evac_mode": self.evac_mode,
15588       }
15589
15590   def _AddChangeGroup(self):
15591     """Get data for node-evacuate requests.
15592
15593     """
15594     return {
15595       "instances": self.instances,
15596       "target_groups": self.target_groups,
15597       }
15598
15599   def _BuildInputData(self, fn, keydata):
15600     """Build input data structures.
15601
15602     """
15603     self._ComputeClusterData()
15604
15605     request = fn()
15606     request["type"] = self.mode
15607     for keyname, keytype in keydata:
15608       if keyname not in request:
15609         raise errors.ProgrammerError("Request parameter %s is missing" %
15610                                      keyname)
15611       val = request[keyname]
15612       if not keytype(val):
15613         raise errors.ProgrammerError("Request parameter %s doesn't pass"
15614                                      " validation, value %s, expected"
15615                                      " type %s" % (keyname, val, keytype))
15616     self.in_data["request"] = request
15617
15618     self.in_text = serializer.Dump(self.in_data)
15619
15620   _STRING_LIST = ht.TListOf(ht.TString)
15621   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15622      # pylint: disable=E1101
15623      # Class '...' has no 'OP_ID' member
15624      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15625                           opcodes.OpInstanceMigrate.OP_ID,
15626                           opcodes.OpInstanceReplaceDisks.OP_ID])
15627      })))
15628
15629   _NEVAC_MOVED = \
15630     ht.TListOf(ht.TAnd(ht.TIsLength(3),
15631                        ht.TItems([ht.TNonEmptyString,
15632                                   ht.TNonEmptyString,
15633                                   ht.TListOf(ht.TNonEmptyString),
15634                                  ])))
15635   _NEVAC_FAILED = \
15636     ht.TListOf(ht.TAnd(ht.TIsLength(2),
15637                        ht.TItems([ht.TNonEmptyString,
15638                                   ht.TMaybeString,
15639                                  ])))
15640   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15641                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15642
15643   _MODE_DATA = {
15644     constants.IALLOCATOR_MODE_ALLOC:
15645       (_AddNewInstance,
15646        [
15647         ("name", ht.TString),
15648         ("memory", ht.TInt),
15649         ("spindle_use", ht.TInt),
15650         ("disks", ht.TListOf(ht.TDict)),
15651         ("disk_template", ht.TString),
15652         ("os", ht.TString),
15653         ("tags", _STRING_LIST),
15654         ("nics", ht.TListOf(ht.TDict)),
15655         ("vcpus", ht.TInt),
15656         ("hypervisor", ht.TString),
15657         ], ht.TList),
15658     constants.IALLOCATOR_MODE_RELOC:
15659       (_AddRelocateInstance,
15660        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15661        ht.TList),
15662      constants.IALLOCATOR_MODE_NODE_EVAC:
15663       (_AddNodeEvacuate, [
15664         ("instances", _STRING_LIST),
15665         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15666         ], _NEVAC_RESULT),
15667      constants.IALLOCATOR_MODE_CHG_GROUP:
15668       (_AddChangeGroup, [
15669         ("instances", _STRING_LIST),
15670         ("target_groups", _STRING_LIST),
15671         ], _NEVAC_RESULT),
15672     }
15673
15674   def Run(self, name, validate=True, call_fn=None):
15675     """Run an instance allocator and return the results.
15676
15677     """
15678     if call_fn is None:
15679       call_fn = self.rpc.call_iallocator_runner
15680
15681     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15682     result.Raise("Failure while running the iallocator script")
15683
15684     self.out_text = result.payload
15685     if validate:
15686       self._ValidateResult()
15687
15688   def _ValidateResult(self):
15689     """Process the allocator results.
15690
15691     This will process and if successful save the result in
15692     self.out_data and the other parameters.
15693
15694     """
15695     try:
15696       rdict = serializer.Load(self.out_text)
15697     except Exception, err:
15698       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15699
15700     if not isinstance(rdict, dict):
15701       raise errors.OpExecError("Can't parse iallocator results: not a dict")
15702
15703     # TODO: remove backwards compatiblity in later versions
15704     if "nodes" in rdict and "result" not in rdict:
15705       rdict["result"] = rdict["nodes"]
15706       del rdict["nodes"]
15707
15708     for key in "success", "info", "result":
15709       if key not in rdict:
15710         raise errors.OpExecError("Can't parse iallocator results:"
15711                                  " missing key '%s'" % key)
15712       setattr(self, key, rdict[key])
15713
15714     if not self._result_check(self.result):
15715       raise errors.OpExecError("Iallocator returned invalid result,"
15716                                " expected %s, got %s" %
15717                                (self._result_check, self.result),
15718                                errors.ECODE_INVAL)
15719
15720     if self.mode == constants.IALLOCATOR_MODE_RELOC:
15721       assert self.relocate_from is not None
15722       assert self.required_nodes == 1
15723
15724       node2group = dict((name, ndata["group"])
15725                         for (name, ndata) in self.in_data["nodes"].items())
15726
15727       fn = compat.partial(self._NodesToGroups, node2group,
15728                           self.in_data["nodegroups"])
15729
15730       instance = self.cfg.GetInstanceInfo(self.name)
15731       request_groups = fn(self.relocate_from + [instance.primary_node])
15732       result_groups = fn(rdict["result"] + [instance.primary_node])
15733
15734       if self.success and not set(result_groups).issubset(request_groups):
15735         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15736                                  " differ from original groups (%s)" %
15737                                  (utils.CommaJoin(result_groups),
15738                                   utils.CommaJoin(request_groups)))
15739
15740     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15741       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15742
15743     self.out_data = rdict
15744
15745   @staticmethod
15746   def _NodesToGroups(node2group, groups, nodes):
15747     """Returns a list of unique group names for a list of nodes.
15748
15749     @type node2group: dict
15750     @param node2group: Map from node name to group UUID
15751     @type groups: dict
15752     @param groups: Group information
15753     @type nodes: list
15754     @param nodes: Node names
15755
15756     """
15757     result = set()
15758
15759     for node in nodes:
15760       try:
15761         group_uuid = node2group[node]
15762       except KeyError:
15763         # Ignore unknown node
15764         pass
15765       else:
15766         try:
15767           group = groups[group_uuid]
15768         except KeyError:
15769           # Can't find group, let's use UUID
15770           group_name = group_uuid
15771         else:
15772           group_name = group["name"]
15773
15774         result.add(group_name)
15775
15776     return sorted(result)
15777
15778
15779 class LUTestAllocator(NoHooksLU):
15780   """Run allocator tests.
15781
15782   This LU runs the allocator tests
15783
15784   """
15785   def CheckPrereq(self):
15786     """Check prerequisites.
15787
15788     This checks the opcode parameters depending on the director and mode test.
15789
15790     """
15791     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15792       for attr in ["memory", "disks", "disk_template",
15793                    "os", "tags", "nics", "vcpus"]:
15794         if not hasattr(self.op, attr):
15795           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15796                                      attr, errors.ECODE_INVAL)
15797       iname = self.cfg.ExpandInstanceName(self.op.name)
15798       if iname is not None:
15799         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15800                                    iname, errors.ECODE_EXISTS)
15801       if not isinstance(self.op.nics, list):
15802         raise errors.OpPrereqError("Invalid parameter 'nics'",
15803                                    errors.ECODE_INVAL)
15804       if not isinstance(self.op.disks, list):
15805         raise errors.OpPrereqError("Invalid parameter 'disks'",
15806                                    errors.ECODE_INVAL)
15807       for row in self.op.disks:
15808         if (not isinstance(row, dict) or
15809             constants.IDISK_SIZE not in row or
15810             not isinstance(row[constants.IDISK_SIZE], int) or
15811             constants.IDISK_MODE not in row or
15812             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15813           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15814                                      " parameter", errors.ECODE_INVAL)
15815       if self.op.hypervisor is None:
15816         self.op.hypervisor = self.cfg.GetHypervisorType()
15817     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15818       fname = _ExpandInstanceName(self.cfg, self.op.name)
15819       self.op.name = fname
15820       self.relocate_from = \
15821           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15822     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15823                           constants.IALLOCATOR_MODE_NODE_EVAC):
15824       if not self.op.instances:
15825         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15826       self.op.instances = _GetWantedInstances(self, self.op.instances)
15827     else:
15828       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15829                                  self.op.mode, errors.ECODE_INVAL)
15830
15831     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15832       if self.op.allocator is None:
15833         raise errors.OpPrereqError("Missing allocator name",
15834                                    errors.ECODE_INVAL)
15835     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15836       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15837                                  self.op.direction, errors.ECODE_INVAL)
15838
15839   def Exec(self, feedback_fn):
15840     """Run the allocator test.
15841
15842     """
15843     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15844       ial = IAllocator(self.cfg, self.rpc,
15845                        mode=self.op.mode,
15846                        name=self.op.name,
15847                        memory=self.op.memory,
15848                        disks=self.op.disks,
15849                        disk_template=self.op.disk_template,
15850                        os=self.op.os,
15851                        tags=self.op.tags,
15852                        nics=self.op.nics,
15853                        vcpus=self.op.vcpus,
15854                        hypervisor=self.op.hypervisor,
15855                        )
15856     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15857       ial = IAllocator(self.cfg, self.rpc,
15858                        mode=self.op.mode,
15859                        name=self.op.name,
15860                        relocate_from=list(self.relocate_from),
15861                        )
15862     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15863       ial = IAllocator(self.cfg, self.rpc,
15864                        mode=self.op.mode,
15865                        instances=self.op.instances,
15866                        target_groups=self.op.target_groups)
15867     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15868       ial = IAllocator(self.cfg, self.rpc,
15869                        mode=self.op.mode,
15870                        instances=self.op.instances,
15871                        evac_mode=self.op.evac_mode)
15872     else:
15873       raise errors.ProgrammerError("Uncatched mode %s in"
15874                                    " LUTestAllocator.Exec", self.op.mode)
15875
15876     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15877       result = ial.in_text
15878     else:
15879       ial.Run(self.op.allocator, validate=False)
15880       result = ial.out_text
15881     return result
15882
15883 # Network LUs
15884 class LUNetworkAdd(LogicalUnit):
15885   """Logical unit for creating networks.
15886
15887   """
15888   HPATH = "network-add"
15889   HTYPE = constants.HTYPE_NETWORK
15890   REQ_BGL = False
15891
15892   def BuildHooksNodes(self):
15893     """Build hooks nodes.
15894
15895     """
15896     mn = self.cfg.GetMasterNode()
15897     return ([mn], [mn])
15898
15899   def ExpandNames(self):
15900     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15901     self.needed_locks = {}
15902     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15903
15904   def CheckPrereq(self):
15905     """Check prerequisites.
15906
15907     This checks that the given group name is not an existing node group
15908     already.
15909
15910     """
15911     if self.op.network is None:
15912       raise errors.OpPrereqError("Network must be given",
15913                                  errors.ECODE_INVAL)
15914
15915     uuid = self.cfg.LookupNetwork(self.op.network_name)
15916
15917     if uuid:
15918       raise errors.OpPrereqError("Network '%s' already defined" %
15919                                  self.op.network, errors.ECODE_EXISTS)
15920
15921     if self.op.mac_prefix:
15922       utils.NormalizeAndValidateMac(self.op.mac_prefix+":00:00:00")
15923
15924     # Check tag validity
15925     for tag in self.op.tags:
15926       objects.TaggableObject.ValidateTag(tag)
15927
15928
15929   def BuildHooksEnv(self):
15930     """Build hooks env.
15931
15932     """
15933     args = {
15934       "name": self.op.network_name,
15935       "network": self.op.network,
15936       "gateway": self.op.gateway,
15937       "network6": self.op.network6,
15938       "gateway6": self.op.gateway6,
15939       "mac_prefix": self.op.mac_prefix,
15940       "network_type": self.op.network_type,
15941       "tags": self.op.tags,
15942       }
15943     return _BuildNetworkHookEnv(**args)
15944
15945   def Exec(self, feedback_fn):
15946     """Add the ip pool to the cluster.
15947
15948     """
15949     nobj = objects.Network(name=self.op.network_name,
15950                            network=self.op.network,
15951                            gateway=self.op.gateway,
15952                            network6=self.op.network6,
15953                            gateway6=self.op.gateway6,
15954                            mac_prefix=self.op.mac_prefix,
15955                            network_type=self.op.network_type,
15956                            uuid=self.network_uuid,
15957                            family=4)
15958     # Initialize the associated address pool
15959     try:
15960       pool = network.AddressPool.InitializeNetwork(nobj)
15961     except errors.AddressPoolError, e:
15962       raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15963
15964     # Check if we need to reserve the nodes and the cluster master IP
15965     # These may not be allocated to any instances in routed mode, as
15966     # they wouldn't function anyway.
15967     for node in self.cfg.GetAllNodesInfo().values():
15968       for ip in [node.primary_ip, node.secondary_ip]:
15969         try:
15970           pool.Reserve(ip)
15971           self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15972
15973         except errors.AddressPoolError:
15974           pass
15975
15976     master_ip = self.cfg.GetClusterInfo().master_ip
15977     try:
15978       pool.Reserve(master_ip)
15979       self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15980     except errors.AddressPoolError:
15981       pass
15982
15983     if self.op.add_reserved_ips:
15984       for ip in self.op.add_reserved_ips:
15985         try:
15986           pool.Reserve(ip, external=True)
15987         except errors.AddressPoolError, e:
15988           raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15989
15990     if self.op.tags:
15991       for tag in self.op.tags:
15992         nobj.AddTag(tag)
15993
15994     self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15995     del self.remove_locks[locking.LEVEL_NETWORK]
15996
15997
15998 class LUNetworkRemove(LogicalUnit):
15999   HPATH = "network-remove"
16000   HTYPE = constants.HTYPE_NETWORK
16001   REQ_BGL = False
16002
16003   def ExpandNames(self):
16004     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16005
16006     self.needed_locks = {
16007       locking.LEVEL_NETWORK: [self.network_uuid],
16008       }
16009
16010
16011   def CheckPrereq(self):
16012     """Check prerequisites.
16013
16014     This checks that the given network name exists as a network, that is
16015     empty (i.e., contains no nodes), and that is not the last group of the
16016     cluster.
16017
16018     """
16019     if not self.network_uuid:
16020       raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
16021                                  errors.ECODE_INVAL)
16022
16023     # Verify that the network is not conncted.
16024     node_groups = [group.name
16025                    for group in self.cfg.GetAllNodeGroupsInfo().values()
16026                    for network in group.networks.keys()
16027                    if network == self.network_uuid]
16028
16029     if node_groups:
16030       self.LogWarning("Nework '%s' is connected to the following"
16031                       " node groups: %s" % (self.op.network_name,
16032                       utils.CommaJoin(utils.NiceSort(node_groups))))
16033       raise errors.OpPrereqError("Network still connected",
16034                                  errors.ECODE_STATE)
16035
16036   def BuildHooksEnv(self):
16037     """Build hooks env.
16038
16039     """
16040     return {
16041       "NETWORK_NAME": self.op.network_name,
16042       }
16043
16044   def BuildHooksNodes(self):
16045     """Build hooks nodes.
16046
16047     """
16048     mn = self.cfg.GetMasterNode()
16049     return ([mn], [mn])
16050
16051   def Exec(self, feedback_fn):
16052     """Remove the network.
16053
16054     """
16055     try:
16056       self.cfg.RemoveNetwork(self.network_uuid)
16057     except errors.ConfigurationError:
16058       raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16059                                (self.op.network_name, self.network_uuid))
16060
16061
16062 class LUNetworkSetParams(LogicalUnit):
16063   """Modifies the parameters of a network.
16064
16065   """
16066   HPATH = "network-modify"
16067   HTYPE = constants.HTYPE_NETWORK
16068   REQ_BGL = False
16069
16070   def CheckArguments(self):
16071     if (self.op.gateway and
16072         (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16073       raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16074                                  " at once", errors.ECODE_INVAL)
16075
16076
16077   def ExpandNames(self):
16078     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16079     self.network = self.cfg.GetNetwork(self.network_uuid)
16080     self.needed_locks = {
16081       locking.LEVEL_NETWORK: [self.network_uuid],
16082       }
16083
16084
16085     if self.network is None:
16086       raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
16087                                  (self.op.network_name, self.network_uuid),
16088                                  errors.ECODE_INVAL)
16089
16090   def CheckPrereq(self):
16091     """Check prerequisites.
16092
16093     """
16094     self.gateway = self.network.gateway
16095     self.network_type = self.network.network_type
16096     self.mac_prefix = self.network.mac_prefix
16097     self.network6 = self.network.network6
16098     self.gateway6 = self.network.gateway6
16099     self.tags = self.network.tags
16100
16101     self.pool = network.AddressPool(self.network)
16102
16103     if self.op.gateway:
16104       if self.op.gateway == constants.VALUE_NONE:
16105         self.gateway = None
16106       else:
16107         self.gateway = self.op.gateway
16108         if self.pool.IsReserved(self.gateway):
16109           raise errors.OpPrereqError("%s is already reserved" %
16110                                      self.gateway, errors.ECODE_INVAL)
16111
16112     if self.op.network_type:
16113       if self.op.network_type == constants.VALUE_NONE:
16114         self.network_type = None
16115       else:
16116         self.network_type = self.op.network_type
16117
16118     if self.op.mac_prefix:
16119       if self.op.mac_prefix == constants.VALUE_NONE:
16120         self.mac_prefix = None
16121       else:
16122         utils.NormalizeAndValidateMac(self.op.mac_prefix+":00:00:00")
16123         self.mac_prefix = self.op.mac_prefix
16124
16125     if self.op.gateway6:
16126       if self.op.gateway6 == constants.VALUE_NONE:
16127         self.gateway6 = None
16128       else:
16129         self.gateway6 = self.op.gateway6
16130
16131     if self.op.network6:
16132       if self.op.network6 == constants.VALUE_NONE:
16133         self.network6 = None
16134       else:
16135         self.network6 = self.op.network6
16136
16137
16138
16139   def BuildHooksEnv(self):
16140     """Build hooks env.
16141
16142     """
16143     args = {
16144       "name": self.op.network_name,
16145       "network": self.network.network,
16146       "gateway": self.gateway,
16147       "network6": self.network6,
16148       "gateway6": self.gateway6,
16149       "mac_prefix": self.mac_prefix,
16150       "network_type": self.network_type,
16151       "tags": self.tags,
16152       }
16153     return _BuildNetworkHookEnv(**args)
16154
16155   def BuildHooksNodes(self):
16156     """Build hooks nodes.
16157
16158     """
16159     mn = self.cfg.GetMasterNode()
16160     return ([mn], [mn])
16161
16162   def Exec(self, feedback_fn):
16163     """Modifies the network.
16164
16165     """
16166     #TODO: reserve/release via temporary reservation manager
16167     #      extend cfg.ReserveIp/ReleaseIp with the external flag
16168     if self.op.gateway:
16169       if self.gateway == self.network.gateway:
16170         self.LogWarning("Gateway is already %s" % self.gateway)
16171       else:
16172         if self.gateway:
16173           self.pool.Reserve(self.gateway, external=True)
16174         if self.network.gateway:
16175           self.pool.Release(self.network.gateway, external=True)
16176         self.network.gateway = self.gateway
16177
16178     if self.op.add_reserved_ips:
16179       for ip in self.op.add_reserved_ips:
16180         try:
16181           if self.pool.IsReserved(ip):
16182             self.LogWarning("IP %s is already reserved" % ip)
16183           else:
16184             self.pool.Reserve(ip, external=True)
16185         except errors.AddressPoolError, e:
16186           self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
16187
16188     if self.op.remove_reserved_ips:
16189       for ip in self.op.remove_reserved_ips:
16190         if ip == self.network.gateway:
16191           self.LogWarning("Cannot unreserve Gateway's IP")
16192           continue
16193         try:
16194           if not self.pool.IsReserved(ip):
16195             self.LogWarning("IP %s is already unreserved" % ip)
16196           else:
16197             self.pool.Release(ip, external=True)
16198         except errors.AddressPoolError, e:
16199           self.LogWarning("Cannot release ip %s. %s" % (ip, e))
16200
16201     if self.op.mac_prefix:
16202       self.network.mac_prefix = self.mac_prefix
16203
16204     if self.op.network6:
16205       self.network.network6 = self.network6
16206
16207     if self.op.gateway6:
16208       self.network.gateway6 = self.gateway6
16209
16210     if self.op.network_type:
16211       self.network.network_type = self.network_type
16212
16213     self.pool.Validate()
16214
16215     self.cfg.Update(self.network, feedback_fn)
16216
16217
16218 class _NetworkQuery(_QueryBase):
16219   FIELDS = query.NETWORK_FIELDS
16220
16221   def ExpandNames(self, lu):
16222     lu.needed_locks = {}
16223
16224     self._all_networks = lu.cfg.GetAllNetworksInfo()
16225     name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
16226
16227     if not self.names:
16228       self.wanted = [name_to_uuid[name]
16229                      for name in utils.NiceSort(name_to_uuid.keys())]
16230     else:
16231       # Accept names to be either names or UUIDs.
16232       missing = []
16233       self.wanted = []
16234       all_uuid = frozenset(self._all_networks.keys())
16235
16236       for name in self.names:
16237         if name in all_uuid:
16238           self.wanted.append(name)
16239         elif name in name_to_uuid:
16240           self.wanted.append(name_to_uuid[name])
16241         else:
16242           missing.append(name)
16243
16244       if missing:
16245         raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16246                                    errors.ECODE_NOENT)
16247
16248   def DeclareLocks(self, lu, level):
16249     pass
16250
16251   def _GetQueryData(self, lu):
16252     """Computes the list of networks and their attributes.
16253
16254     """
16255     do_instances = query.NETQ_INST in self.requested_data
16256     do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
16257     do_stats = query.NETQ_STATS in self.requested_data
16258     cluster = lu.cfg.GetClusterInfo()
16259
16260     network_to_groups = None
16261     network_to_instances = None
16262     stats = None
16263
16264     # For NETQ_GROUP, we need to map network->[groups]
16265     if do_groups:
16266       all_groups = lu.cfg.GetAllNodeGroupsInfo()
16267       network_to_groups = dict((uuid, []) for uuid in self.wanted)
16268       default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
16269
16270       if do_instances:
16271         all_instances = lu.cfg.GetAllInstancesInfo()
16272         all_nodes = lu.cfg.GetAllNodesInfo()
16273         network_to_instances = dict((uuid, []) for uuid in self.wanted)
16274
16275
16276       for group in all_groups.values():
16277         if do_instances:
16278           group_nodes = [node.name for node in all_nodes.values() if
16279                          node.group == group.uuid]
16280           group_instances = [instance for instance in all_instances.values()
16281                              if instance.primary_node in group_nodes]
16282
16283         for net_uuid in group.networks.keys():
16284           if net_uuid in network_to_groups:
16285             netparams = group.networks[net_uuid]
16286             mode = netparams[constants.NIC_MODE]
16287             link = netparams[constants.NIC_LINK]
16288             info = group.name + '(' + mode + ', ' + link + ')'
16289             network_to_groups[net_uuid].append(info)
16290
16291             if do_instances:
16292               for instance in group_instances:
16293                 for nic in instance.nics:
16294                   if nic.network == self._all_networks[net_uuid].name:
16295                     network_to_instances[net_uuid].append(instance.name)
16296                     break
16297
16298     if do_stats:
16299       stats = {}
16300       for uuid, net in self._all_networks.items():
16301         if uuid in self.wanted:
16302           pool = network.AddressPool(net)
16303           stats[uuid] = {
16304             "free_count": pool.GetFreeCount(),
16305             "reserved_count": pool.GetReservedCount(),
16306             "map": pool.GetMap(),
16307             "external_reservations": ", ".join(pool.GetExternalReservations()),
16308             }
16309
16310     return query.NetworkQueryData([self._all_networks[uuid]
16311                                    for uuid in self.wanted],
16312                                    network_to_groups,
16313                                    network_to_instances,
16314                                    stats)
16315
16316
16317 class LUNetworkQuery(NoHooksLU):
16318   """Logical unit for querying networks.
16319
16320   """
16321   REQ_BGL = False
16322
16323   def CheckArguments(self):
16324     self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16325                             self.op.output_fields, False)
16326
16327   def ExpandNames(self):
16328     self.nq.ExpandNames(self)
16329
16330   def Exec(self, feedback_fn):
16331     return self.nq.OldStyleQuery(self)
16332
16333
16334
16335 class LUNetworkConnect(LogicalUnit):
16336   """Connect a network to a nodegroup
16337
16338   """
16339   HPATH = "network-connect"
16340   HTYPE = constants.HTYPE_NETWORK
16341   REQ_BGL = False
16342
16343   def ExpandNames(self):
16344     self.network_name = self.op.network_name
16345     self.group_name = self.op.group_name
16346     self.network_mode = self.op.network_mode
16347     self.network_link = self.op.network_link
16348
16349     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16350     self.network = self.cfg.GetNetwork(self.network_uuid)
16351     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16352     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16353
16354     self.needed_locks = {
16355       locking.LEVEL_INSTANCE: [],
16356       locking.LEVEL_NODEGROUP: [self.group_uuid],
16357       }
16358     self.share_locks[locking.LEVEL_INSTANCE] = 1
16359
16360   def DeclareLocks(self, level):
16361     if level == locking.LEVEL_INSTANCE:
16362       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16363
16364       # Lock instances optimistically, needs verification once group lock has
16365       # been acquired
16366       self.needed_locks[locking.LEVEL_INSTANCE] = \
16367           self.cfg.GetNodeGroupInstances(self.group_uuid)
16368
16369   def BuildHooksEnv(self):
16370     ret = dict()
16371     ret["GROUP_NAME"] = self.group_name
16372     ret["GROUP_NETWORK_MODE"] = self.network_mode
16373     ret["GROUP_NETWORK_LINK"] = self.network_link
16374     ret.update(_BuildNetworkHookEnvByObject(self, self.network))
16375     return ret
16376
16377   def BuildHooksNodes(self):
16378     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16379     return (nodes, nodes)
16380
16381
16382   def CheckPrereq(self):
16383     l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16384                                    for i in value)
16385
16386     if self.network is None:
16387       raise errors.OpPrereqError("Network %s does not exist" %
16388                                  self.network_name, errors.ECODE_INVAL)
16389
16390     self.netparams = dict()
16391     self.netparams[constants.NIC_MODE] = self.network_mode
16392     self.netparams[constants.NIC_LINK] = self.network_link
16393     objects.NIC.CheckParameterSyntax(self.netparams)
16394
16395     #if self.network_mode == constants.NIC_MODE_BRIDGED:
16396     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16397     self.connected = False
16398     if self.network_uuid in self.group.networks:
16399       self.LogWarning("Network '%s' is already mapped to group '%s'" %
16400                       (self.network_name, self.group.name))
16401       self.connected = True
16402       return
16403
16404     pool = network.AddressPool(self.network)
16405     if self.op.conflicts_check:
16406       groupinstances = []
16407       for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16408         groupinstances.append(self.cfg.GetInstanceInfo(n))
16409       instances = [(instance.name, idx, nic.ip)
16410                    for instance in groupinstances
16411                    for idx, nic in enumerate(instance.nics)
16412                    if (not nic.network and pool._Contains(nic.ip))]
16413       if instances:
16414         self.LogWarning("Following occurences use IPs from network %s"
16415                         " that is about to connect to nodegroup %s: %s" %
16416                         (self.network_name, self.group.name,
16417                         l(instances)))
16418         raise errors.OpPrereqError("Conflicting IPs found."
16419                                    " Please remove/modify"
16420                                    " corresponding NICs",
16421                                    errors.ECODE_INVAL)
16422
16423   def Exec(self, feedback_fn):
16424     if self.connected:
16425       return
16426
16427     self.group.networks[self.network_uuid] = self.netparams
16428     self.cfg.Update(self.group, feedback_fn)
16429
16430
16431 class LUNetworkDisconnect(LogicalUnit):
16432   """Disconnect a network to a nodegroup
16433
16434   """
16435   HPATH = "network-disconnect"
16436   HTYPE = constants.HTYPE_NETWORK
16437   REQ_BGL = False
16438
16439   def ExpandNames(self):
16440     self.network_name = self.op.network_name
16441     self.group_name = self.op.group_name
16442
16443     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16444     self.network = self.cfg.GetNetwork(self.network_uuid)
16445     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16446     self.group = self.cfg.GetNodeGroup(self.group_uuid)
16447
16448     self.needed_locks = {
16449       locking.LEVEL_INSTANCE: [],
16450       locking.LEVEL_NODEGROUP: [self.group_uuid],
16451       }
16452     self.share_locks[locking.LEVEL_INSTANCE] = 1
16453
16454   def DeclareLocks(self, level):
16455     if level == locking.LEVEL_INSTANCE:
16456       assert not self.needed_locks[locking.LEVEL_INSTANCE]
16457
16458       # Lock instances optimistically, needs verification once group lock has
16459       # been acquired
16460       self.needed_locks[locking.LEVEL_INSTANCE] = \
16461           self.cfg.GetNodeGroupInstances(self.group_uuid)
16462
16463   def BuildHooksEnv(self):
16464     ret = dict()
16465     ret["GROUP_NAME"] = self.group_name
16466     ret.update(_BuildNetworkHookEnvByObject(self, self.network))
16467     return ret
16468
16469   def BuildHooksNodes(self):
16470     nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16471     return (nodes, nodes)
16472
16473
16474   def CheckPrereq(self):
16475     l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16476                                    for i in value)
16477
16478     self.connected = True
16479     if self.network_uuid not in self.group.networks:
16480       self.LogWarning("Network '%s' is"
16481                          " not mapped to group '%s'" %
16482                          (self.network_name, self.group.name))
16483       self.connected = False
16484       return
16485
16486     if self.op.conflicts_check:
16487       groupinstances = []
16488       for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16489         groupinstances.append(self.cfg.GetInstanceInfo(n))
16490       instances = [(instance.name, idx, nic.ip)
16491                    for instance in groupinstances
16492                    for idx, nic in enumerate(instance.nics)
16493                    if nic.network == self.network_name]
16494       if instances:
16495         self.LogWarning("Following occurences use IPs from network %s"
16496                            " that is about to disconnected from the nodegroup"
16497                            " %s: %s" %
16498                            (self.network_name, self.group.name,
16499                             l(instances)))
16500         raise errors.OpPrereqError("Conflicting IPs."
16501                                    " Please remove/modify"
16502                                    " corresponding NICS",
16503                                    errors.ECODE_INVAL)
16504
16505   def Exec(self, feedback_fn):
16506     if not self.connected:
16507       return
16508
16509     del self.group.networks[self.network_uuid]
16510     self.cfg.Update(self.group, feedback_fn)
16511
16512
16513 #: Query type implementations
16514 _QUERY_IMPL = {
16515   constants.QR_CLUSTER: _ClusterQuery,
16516   constants.QR_INSTANCE: _InstanceQuery,
16517   constants.QR_NODE: _NodeQuery,
16518   constants.QR_GROUP: _GroupQuery,
16519   constants.QR_NETWORK: _NetworkQuery,
16520   constants.QR_OS: _OsQuery,
16521   constants.QR_EXTSTORAGE: _ExtStorageQuery,
16522   constants.QR_EXPORT: _ExportQuery,
16523   }
16524
16525 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16526
16527
16528 def _GetQueryImplementation(name):
16529   """Returns the implemtnation for a query type.
16530
16531   @param name: Query type, must be one of L{constants.QR_VIA_OP}
16532
16533   """
16534   try:
16535     return _QUERY_IMPL[name]
16536   except KeyError:
16537     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16538                                errors.ECODE_INVAL)
16539
16540 def _CheckForConflictingIp(lu, ip, node):
16541   """In case of conflicting ip raise error.
16542
16543   @type ip: string
16544   @param ip: ip address
16545   @type node: string
16546   @param node: node name
16547
16548   """
16549   (conf_net, conf_netparams) = lu.cfg.CheckIPInNodeGroup(ip, node)
16550   if conf_net is not None:
16551     raise errors.OpPrereqError("Conflicting IP found:"
16552                                " %s <> %s." % (ip, conf_net),
16553                                errors.ECODE_INVAL)
16554
16555   return (None, None)